harvestdor-indexer 1.0.4 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -2,47 +2,100 @@
2
2
  http_interactions:
3
3
  - request:
4
4
  method: get
5
- uri: http://127.0.0.1:3000/
5
+ uri: http://purl.stanford.edu/yg867hg1375.xml
6
6
  body:
7
7
  encoding: US-ASCII
8
8
  string: ''
9
9
  headers:
10
+ Accept-Encoding:
11
+ - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
10
12
  Accept:
11
- - '*/*'
13
+ - "*/*"
12
14
  User-Agent:
13
15
  - Ruby
14
16
  response:
15
17
  status:
16
18
  code: 200
17
- message: OK
19
+ message: ''
18
20
  headers:
19
- X-Frame-Options:
20
- - SAMEORIGIN
21
- X-Xss-Protection:
22
- - 1; mode=block
23
- X-Content-Type-Options:
24
- - nosniff
25
- Content-Type:
26
- - text/html; charset=utf-8
21
+ Date:
22
+ - Wed, 17 Dec 2014 19:39:37 GMT
23
+ Server:
24
+ - Apache/2.2.15 (Red Hat)
25
+ X-Powered-By:
26
+ - Phusion Passenger (mod_rails/mod_rack) 3.0.19
27
+ X-Ua-Compatible:
28
+ - IE=Edge,chrome=1
27
29
  Etag:
28
- - '"444bcb3a3fcf8389296c49467f27e1d6"'
30
+ - '"67aa6d1481ba1537ae63af5aaf493f84"'
29
31
  Cache-Control:
30
32
  - max-age=0, private, must-revalidate
31
- X-Meta-Request-Version:
32
- - 0.3.4
33
33
  X-Request-Id:
34
- - 4fef0b48-9ee9-4ef8-811e-78b8c0f643f0
34
+ - f2e753d56bf896cde6e941be0f51d05a
35
35
  X-Runtime:
36
- - '0.005477'
37
- Connection:
38
- - close
39
- Server:
40
- - thin 1.6.2 codename Doc Brown
36
+ - '0.007983'
37
+ X-Rack-Cache:
38
+ - miss
39
+ Status:
40
+ - '200'
41
+ Content-Length:
42
+ - '2180'
43
+ Content-Type:
44
+ - application/xml; charset=utf-8
41
45
  body:
42
- encoding: US-ASCII
43
- string: ok
44
- http_version:
45
- recorded_at: Wed, 12 Nov 2014 19:34:02 GMT
46
+ encoding: UTF-8
47
+ string: |
48
+ <publicObject id="druid:yg867hg1375" published="2013-11-11T15:34:32-08:00">
49
+ <identityMetadata>
50
+ <objectId>druid:yg867hg1375</objectId>
51
+ <objectCreator>DOR</objectCreator>
52
+ <objectLabel>Francis E. Stafford photographs, 1909-1933</objectLabel>
53
+ <objectType>collection</objectType>
54
+ <adminPolicy>druid:vb546ms7107</adminPolicy>
55
+ <otherId name="catkey">9615156</otherId>
56
+ <otherId name="uuid">8f1feb20-4b29-11e3-8e31-0050569b3c3c</otherId>
57
+ <tag>Remediated By : 3.25.3</tag>
58
+ </identityMetadata>
59
+ <xml/>
60
+ <rightsMetadata>
61
+ <access type="discover">
62
+ <machine>
63
+ <world/>
64
+ </machine>
65
+ </access>
66
+ <access type="read">
67
+ <machine>
68
+ <world/>
69
+ </machine>
70
+ </access>
71
+ <use>
72
+ <human type="useAndReproduction"/>
73
+ <human type="creativeCommons"/>
74
+ <machine type="creativeCommons"/>
75
+ </use>
76
+ <copyright>
77
+ <human/>
78
+ </copyright>
79
+ </rightsMetadata>
80
+ <rdf:RDF xmlns:fedora-model="info:fedora/fedora-system:def/model#" xmlns:hydra="http://projecthydra.org/ns/relations#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
81
+ <rdf:Description rdf:about="info:fedora/druid:yg867hg1375">
82
+ </rdf:Description>
83
+ </rdf:RDF>
84
+ <oai_dc:dc xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:srw_dc="info:srw/schema/1/dc-schema" xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd">
85
+ <dc:title>Francis E. Stafford photographs, 1909-1933</dc:title>
86
+ <dc:contributor>Stafford, Francis E., 1884-1938</dc:contributor>
87
+ <dc:type>Collection</dc:type>
88
+ <dc:date>1909-1933</dc:date>
89
+ <dc:language>und</dc:language>
90
+ <dc:format>3 oversize boxes.</dc:format>
91
+ <dc:description>Photographs of scenes in China, mainly between 1909 and 1915.</dc:description>
92
+ <dc:rights>Closed. Digital use copies available.</dc:rights>
93
+ <dc:description type="biographical/historical">American missionary in China, 1909-1915 and 1932-1933.</dc:description>
94
+ <dc:coverage>China</dc:coverage>
95
+ </oai_dc:dc>
96
+ </publicObject>
97
+ http_version:
98
+ recorded_at: Wed, 17 Dec 2014 19:39:38 GMT
46
99
  - request:
47
100
  method: get
48
101
  uri: http://127.0.0.1:3000/collections/yg867hg1375
@@ -74,9 +127,9 @@ http_interactions:
74
127
  X-Meta-Request-Version:
75
128
  - 0.3.4
76
129
  X-Request-Id:
77
- - 26936654-94cb-4135-b867-53f9ba72d1b5
130
+ - 125a9964-6326-4114-9f59-fb533551d554
78
131
  X-Runtime:
79
- - '0.011706'
132
+ - '0.011086'
80
133
  Connection:
81
134
  - close
82
135
  Server:
@@ -95,5 +148,5 @@ http_interactions:
95
148
  B: Photographs of China''s natural landscapes, urban scenes, cultural landmarks,
96
149
  social customs, and people."}],"counts":{"collections":1,"items":5,"total_count":6}}'
97
150
  http_version:
98
- recorded_at: Wed, 12 Nov 2014 19:34:02 GMT
151
+ recorded_at: Wed, 12 Nov 2014 19:34:03 GMT
99
152
  recorded_with: VCR 2.9.3
@@ -4,7 +4,7 @@ Coveralls.wear!
4
4
  $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
5
5
  $LOAD_PATH.unshift(File.dirname(__FILE__))
6
6
 
7
- require 'harvestdor-indexer'
7
+ require 'harvestdor/indexer'
8
8
 
9
9
  require 'vcr'
10
10
 
@@ -0,0 +1,174 @@
1
+ require 'spec_helper'
2
+
3
+ describe Harvestdor::Indexer::Resource do
4
+
5
+ before(:all) do
6
+ VCR.use_cassette('before_all_call') do
7
+ @config_yml_path = File.join(File.dirname(__FILE__), "..", "config", "ap.yml")
8
+ require 'yaml'
9
+ @config = YAML.load_file(@config_yml_path)
10
+ @fake_druid = 'oo000oo0000'
11
+
12
+ @indexer = Harvestdor::Indexer.new(@config)
13
+ @hdor_client = @indexer.send(:harvestdor_client)
14
+ @whitelist_path = File.join(File.dirname(__FILE__), "../config/ap_whitelist.txt")
15
+ end
16
+ end
17
+
18
+ let :resource do
19
+ Harvestdor::Indexer::Resource.new(@indexer, @fake_druid)
20
+ end
21
+
22
+ context "smods_rec method" do
23
+ before(:all) do
24
+ @ns_decl = "xmlns='#{Mods::MODS_NS}'"
25
+ @mods_xml = "<mods #{@ns_decl}><note>hi</note></mods>"
26
+ @ng_mods_xml = Nokogiri::XML(@mods_xml)
27
+ end
28
+ it "should call mods method on harvestdor_client" do
29
+ expect(@hdor_client).to receive(:mods).with(@fake_druid).and_return(@ng_mods_xml)
30
+ resource.smods_rec
31
+ end
32
+ it "should return Stanford::Mods::Record object" do
33
+ expect(@hdor_client).to receive(:mods).with(@fake_druid).and_return(@ng_mods_xml)
34
+ expect(resource.smods_rec).to be_an_instance_of(Stanford::Mods::Record)
35
+ end
36
+ it "should raise exception if MODS xml for the druid is empty" do
37
+ allow(@hdor_client).to receive(:mods).with(@fake_druid).and_return(Nokogiri::XML("<mods #{@ns_decl}/>"))
38
+ expect { resource.smods_rec }.to raise_error(RuntimeError, Regexp.new("^Empty MODS metadata for #{@fake_druid}: <"))
39
+ end
40
+ it "should raise exception if there is no MODS xml for the druid" do
41
+ VCR.use_cassette('exception_no_MODS_call') do
42
+ expect { resource.smods_rec }.to raise_error(Harvestdor::Errors::MissingMods)
43
+ end
44
+ end
45
+ end
46
+
47
+ context "public_xml related methods" do
48
+ before(:all) do
49
+ @id_md_xml = "<identityMetadata><objectId>druid:#{@fake_druid}</objectId></identityMetadata>"
50
+ @cntnt_md_xml = "<contentMetadata type='image' objectId='#{@fake_druid}'>foo</contentMetadata>"
51
+ @rights_md_xml = "<rightsMetadata><access type=\"discover\"><machine><world>bar</world></machine></access></rightsMetadata>"
52
+ @rdf_xml = "<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'><rdf:Description rdf:about=\"info:fedora/druid:#{@fake_druid}\">relationship!</rdf:Description></rdf:RDF>"
53
+ @pub_xml = "<publicObject id='druid:#{@fake_druid}'>#{@id_md_xml}#{@cntnt_md_xml}#{@rights_md_xml}#{@rdf_xml}</publicObject>"
54
+ @ng_pub_xml = Nokogiri::XML(@pub_xml)
55
+ end
56
+ context "#public_xml" do
57
+ it "should call public_xml method on harvestdor_client" do
58
+ expect(@hdor_client).to receive(:public_xml).with(@fake_druid).and_return(@ng_pub_xml)
59
+ resource.public_xml
60
+ end
61
+ it "retrieves entire public xml as a Nokogiri::XML::Document" do
62
+ expect(@hdor_client).to receive(:public_xml).with(@fake_druid).and_return(@ng_pub_xml)
63
+ px = resource.public_xml
64
+ expect(px).to be_kind_of(Nokogiri::XML::Document)
65
+ expect(px.root.name).to eq('publicObject')
66
+ expect(px.root.attributes['id'].text).to eq("druid:#{@fake_druid}")
67
+ end
68
+ it "raises exception if public xml for the druid is empty" do
69
+ expect(@hdor_client).to receive(:public_xml).with(@fake_druid).and_return(Nokogiri::XML("<publicObject/>"))
70
+ expect { resource.public_xml }.to raise_error(RuntimeError, Regexp.new("^Empty public xml for #{@fake_druid}: <"))
71
+ end
72
+ it "raises error if there is no public_xml page for the druid" do
73
+ expect(@hdor_client).to receive(:public_xml).with(@fake_druid).and_return(nil)
74
+ expect { resource.public_xml }.to raise_error(RuntimeError, "No public xml for #{@fake_druid}")
75
+ end
76
+ end
77
+ context "#content_metadata" do
78
+ it "returns a Nokogiri::XML::Document derived from the public xml if a druid is passed" do
79
+ allow(Harvestdor).to receive(:public_xml).with(@fake_druid, @indexer.config.harvestdor.purl).and_return(@ng_pub_xml)
80
+ cm = resource.content_metadata
81
+ expect(cm).to be_kind_of(Nokogiri::XML::Document)
82
+ expect(cm.root).not_to eq(nil)
83
+ expect(cm.root.name).to eq('contentMetadata')
84
+ expect(cm.root.attributes['objectId'].text).to eq(@fake_druid)
85
+ expect(cm.root.text.strip).to eq('foo')
86
+ end
87
+ it "raises RuntimeError if nil is returned by Harvestdor::Client.contentMetadata for the druid" do
88
+ expect(@hdor_client).to receive(:content_metadata).with(@fake_druid).and_return(nil)
89
+ expect { resource.content_metadata }.to raise_error(RuntimeError, "No contentMetadata for \"#{@fake_druid}\"")
90
+ end
91
+ end
92
+ context "#identity_metadata" do
93
+ it "returns a Nokogiri::XML::Document derived from the public xml if a druid is passed" do
94
+ allow(Harvestdor).to receive(:public_xml).with(@fake_druid, @indexer.config.harvestdor.purl).and_return(@ng_pub_xml)
95
+ im = resource.identity_metadata
96
+ expect(im).to be_kind_of(Nokogiri::XML::Document)
97
+ expect(im.root).not_to eq(nil)
98
+ expect(im.root.name).to eq('identityMetadata')
99
+ expect(im.root.text.strip).to eq("druid:#{@fake_druid}")
100
+ end
101
+ it "raises RuntimeError if nil is returned by Harvestdor::Client.identityMetadata for the druid" do
102
+ expect(@hdor_client).to receive(:identity_metadata).with(@fake_druid).and_return(nil)
103
+ expect { resource.identity_metadata }.to raise_error(RuntimeError, "No identityMetadata for \"#{@fake_druid}\"")
104
+ end
105
+ end
106
+ context "#rights_metadata" do
107
+ it "returns a Nokogiri::XML::Document derived from the public xml if a druid is passed" do
108
+ allow(Harvestdor).to receive(:public_xml).with(@fake_druid, @indexer.config.harvestdor.purl).and_return(@ng_pub_xml)
109
+ im = resource.rights_metadata
110
+ expect(im).to be_kind_of(Nokogiri::XML::Document)
111
+ expect(im.root).not_to eq(nil)
112
+ expect(im.root.name).to eq('rightsMetadata')
113
+ expect(im.root.text.strip).to eq("bar")
114
+ end
115
+ it "raises RuntimeError if nil is returned by Harvestdor::Client.rightsMetadata for the druid" do
116
+ expect(@hdor_client).to receive(:rights_metadata).with(@fake_druid).and_return(nil)
117
+ expect { resource.rights_metadata }.to raise_error(RuntimeError, "No rightsMetadata for \"#{@fake_druid}\"")
118
+ end
119
+ end
120
+ context "#rdf" do
121
+ it "returns a Nokogiri::XML::Document derived from the public xml if a druid is passed" do
122
+ allow(Harvestdor).to receive(:public_xml).with(@fake_druid, @indexer.config.harvestdor.purl).and_return(@ng_pub_xml)
123
+ im = resource.rdf
124
+ expect(im).to be_kind_of(Nokogiri::XML::Document)
125
+ expect(im.root).not_to eq(nil)
126
+ expect(im.root.name).to eq('RDF')
127
+ expect(im.root.text.strip).to eq("relationship!")
128
+ end
129
+ it "raises RuntimeError if nil is returned by Harvestdor::Client.rdf for the druid" do
130
+ expect(@hdor_client).to receive(:rdf).with(@fake_druid).and_return(nil)
131
+ expect { resource.rdf }.to raise_error(RuntimeError, "No RDF for \"#{@fake_druid}\"")
132
+ end
133
+ end
134
+
135
+ describe "#public_xml_or_druid" do
136
+ it "should return the public_xml, if the public_xml has been loaded" do
137
+ allow(resource).to receive(:public_xml?).and_return(true)
138
+ allow(resource).to receive(:public_xml).and_return(double)
139
+ expect(resource.public_xml_or_druid).to eq resource.public_xml
140
+ end
141
+ it "should return the druid, if the public_xml has not been loaded" do
142
+ allow(resource).to receive(:public_xml?).and_return(false)
143
+ expect(resource.public_xml_or_druid).to eq @fake_druid
144
+ end
145
+ end
146
+
147
+ describe "#identity_md_obj_label" do
148
+ it "should extract the objectLabel from the identity metadata" do
149
+ allow(resource).to receive(:identity_metadata).and_return(Nokogiri::XML("<identityMetadata><objectLabel>label</objectLabel></identityMetadata>"))
150
+ expect(resource.identity_md_obj_label).to eq "label"
151
+ end
152
+ end
153
+
154
+ describe "#collections" do
155
+ it "should extract the collection this resource is a member of and return Resource objects for those collections" do
156
+ allow(resource).to receive(:public_xml).and_return(Nokogiri::XML <<-EOF
157
+ <publicObject>
158
+ <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:fedora="info:fedora/fedora-system:def/relations-external#">
159
+ <rdf:Description>
160
+ <fedora:isMemberOfCollection rdf:resource="some:druid" />
161
+ </rdf:Description>
162
+ </rdf:RDF>
163
+ </publicObject>
164
+ EOF
165
+ )
166
+
167
+ expect(resource.collections.length).to eq 1
168
+ expect(resource.collections.first.druid).to eq "some:druid"
169
+ expect(resource.collections.first.indexer).to eq resource.indexer
170
+ end
171
+ end
172
+ end
173
+
174
+ end
@@ -0,0 +1,32 @@
1
+ require 'spec_helper'
2
+
3
+ describe Harvestdor::Indexer::Solr do
4
+ let :indexer do
5
+ double(logger: Logger.new("/dev/null"))
6
+ end
7
+
8
+ let :solr do
9
+ Harvestdor::Indexer::Solr.new indexer
10
+ end
11
+
12
+ # The method that sends the solr document to solr
13
+ describe "#add" do
14
+ let(:doc_hash) do
15
+ {
16
+ :id => "whatever",
17
+ :modsxml => 'whatever',
18
+ :title_display => 'title',
19
+ :pub_year_tisim => 'some year',
20
+ :author_person_display => 'author',
21
+ :format => 'Image',
22
+ :language => 'English'
23
+ }
24
+ end
25
+
26
+ it "sends an add request to the solr_client" do
27
+ expect(solr.client).to receive(:add).with(doc_hash)
28
+ solr.add(doc_hash)
29
+ end
30
+ end
31
+
32
+ end
@@ -5,61 +5,37 @@ describe Harvestdor::Indexer do
5
5
  before(:all) do
6
6
  VCR.use_cassette('before_all_call') do
7
7
  @config_yml_path = File.join(File.dirname(__FILE__), "..", "config", "ap.yml")
8
- @client_config_path = File.join(File.dirname(__FILE__), "../..", "config", "dor-fetcher-client.yml")
9
- @indexer = Harvestdor::Indexer.new(@config_yml_path, @client_config_path)
10
8
  require 'yaml'
11
- @yaml = YAML.load_file(@config_yml_path)
9
+ @config = YAML.load_file(@config_yml_path)
10
+
11
+ @indexer = Harvestdor::Indexer.new(@config) do |config|
12
+ config.whitelist = ["druid:yg867hg1375"]
13
+ end
12
14
  @hdor_client = @indexer.send(:harvestdor_client)
13
- @fake_druid = 'oo000oo0000'
14
- @blacklist_path = File.join(File.dirname(__FILE__), "../config/ap_blacklist.txt")
15
+ @fake_druid = 'druid:oo000oo0000'
15
16
  @whitelist_path = File.join(File.dirname(__FILE__), "../config/ap_whitelist.txt")
16
17
  end
17
18
  end
18
-
19
- # The method that sends the solr document to solr
20
- describe "#solr_add" do
21
- before(:each) do
22
- doc_hash = {
23
- :modsxml => 'whatever',
24
- :title_display => 'title',
25
- :pub_year_tisim => 'some year',
26
- :author_person_display => 'author',
27
- :format => 'Image',
28
- :language => 'English'
29
- }
30
- end
31
- it "sends an add request to the solr_client" do
32
- expect(@indexer.solr_client).to receive(:add)
33
- @indexer.solr_add(@doc_hash, "abc123")
34
- end
35
- end
36
-
19
+
37
20
  describe "access methods" do
38
21
  it "initializes success count" do
39
- @indexer.success_count.should == 0
22
+ expect(@indexer.metrics.success_count).to eq(0)
40
23
  end
41
24
  it "initializes error count" do
42
- @indexer.error_count.should == 0
43
- end
44
- it "initializes max_retries" do
45
- expect(@indexer.max_retries).to eql(10)
46
- end
47
- it "allows overriding of max_retries" do
48
- @indexer.max_retries=6
49
- @indexer.max_retries.should == 6
25
+ expect(@indexer.metrics.error_count).to eq(0)
50
26
  end
51
27
  end
52
28
 
53
29
  describe "logging" do
54
30
  it "should write the log file to the directory indicated by log_dir" do
55
31
  @indexer.logger.info("indexer_spec logging test message")
56
- File.exists?(File.join(@yaml['log_dir'], @yaml['log_name'])).should == true
32
+ expect(File.exists?(File.join(@config['harvestdor']['log_dir'], @config['harvestdor']['log_name']))).to eq(true)
57
33
  end
58
34
  end
59
35
 
60
36
  it "should initialize the harvestdor_client from the config" do
61
37
  expect(@hdor_client).to be_an_instance_of(Harvestdor::Client)
62
- expect(@hdor_client.config.default_set).to eq(@yaml['default_set'])
38
+ expect(@hdor_client.config.default_set).to eq(@config['harvestdor']['default_set'])
63
39
  end
64
40
 
65
41
  context "harvest_and_index" do
@@ -69,63 +45,34 @@ describe Harvestdor::Indexer do
69
45
  }
70
46
  end
71
47
  it "should call dor_fetcher_client.druid_array and then call :add on rsolr connection" do
72
- @indexer.should_receive(:druids).and_return([@fake_druid])
73
- @indexer.solr_client.should_receive(:add).with(@doc_hash)
74
- @indexer.solr_client.should_receive(:commit)
48
+ allow_any_instance_of(Harvestdor::Indexer::Resource).to receive(:collection?).and_return(false)
49
+ expect(@indexer).to receive(:druids).and_return([@fake_druid])
50
+ expect(@indexer.solr).to receive(:add).with(@doc_hash)
51
+ expect(@indexer.solr).to receive(:commit!)
75
52
  @indexer.harvest_and_index
76
53
  end
77
54
 
78
55
  it "should only call :commit on rsolr connection once" do
79
56
  VCR.use_cassette('single_rsolr_connection_call') do
80
- indexer = Harvestdor::Indexer.new(@config_yml_path, @client_config_path)
81
- hdor_client = indexer.send(:harvestdor_client)
82
- indexer.dor_fetcher_client.should_receive(:druid_array).and_return(["druid:yg867hg1375", "druid:jf275fd6276", "druid:nz353cp1092", "druid:tc552kq0798", "druid:th998nk0722", "druid:ww689vs6534"])
83
- indexer.solr_client.should_receive(:add).exactly(6).times
84
- indexer.solr_client.should_receive(:commit).once
85
- indexer.harvest_and_index
57
+ hdor_client = @indexer.send(:harvestdor_client)
58
+ expect(@indexer.dor_fetcher_client).to receive(:druid_array).and_return(["druid:yg867hg1375", "druid:jf275fd6276", "druid:nz353cp1092", "druid:tc552kq0798", "druid:th998nk0722", "druid:ww689vs6534"])
59
+ expect(@indexer.solr).to receive(:add).exactly(6).times
60
+ expect(@indexer.solr).to receive(:commit!).once
61
+ @indexer.harvest_and_index
86
62
  end
87
63
  end
88
64
 
89
- it "should not process druids in blacklist" do
90
- VCR.use_cassette('ignore_druids_in_blacklist_call') do
91
- lambda{
92
- indexer = Harvestdor::Indexer.new(@config_yml_path, @client_config_path, {:blacklist => @blacklist_path})
93
- hdor_client = indexer.send(:harvestdor_client)
94
- indexer.dor_fetcher_client.should_receive(:druid_array).and_return(["druid:yg867hg1375", "druid:jf275fd6276", "druid:nz353cp1092", "druid:tc552kq0798", "druid:th998nk0722", "druid:ww689vs6534"])
95
- indexer.solr_client.should_receive(:add).with(hash_including({:id => 'druid:nz353cp1092'}))
96
- indexer.solr_client.should_not_receive(:add).with(hash_including({:id => 'druid:jf275fd6276'}))
97
- indexer.solr_client.should_not_receive(:add).with(hash_including({:id => 'druid:tc552kq0798'}))
98
- indexer.solr_client.should_receive(:add).with(hash_including({:id => 'druid:th998nk0722'}))
99
- indexer.solr_client.should_receive(:commit)
100
- indexer.harvest_and_index
101
- }
102
- end
103
- end
104
- it "should not process druid if it is in both blacklist and whitelist" do
105
- VCR.use_cassette('ignore_druids_in_blacklist_and_whitelist_call') do
106
- lambda{
107
- indexer = Harvestdor::Indexer.new(@config_yml_path, @client_config_path, {:blacklist => @blacklist_path, :whitelist => @whitelist_path})
108
- hdor_client = indexer.send(:harvestdor_client)
109
- indexer.dor_fetcher_client.should_not_receive(:druid_array)
110
- indexer.solr_client.should_receive(:add).with(hash_including({:id => 'druid:yg867hg1375'}))
111
- indexer.solr_client.should_not_receive(:add).with(hash_including({:id => 'druid:jf275fd6276'}))
112
- indexer.solr_client.should_receive(:commit)
113
- indexer.harvest_and_index
114
- }
115
- end
116
- end
117
65
  it "should only process druids in whitelist if it exists" do
118
66
  VCR.use_cassette('process_druids_whitelist_call') do
119
- lambda{
120
- indexer = Harvestdor::Indexer.new(@config_yml_path, @client_config_path, {:whitelist => @whitelist_path})
121
- hdor_client = indexer.send(:harvestdor_client)
122
- indexer.dor_fetcher_client.should_not_receive(:druid_array)
123
- indexer.solr_client.should_receive(:add).with(hash_including({:id => 'druid:yg867hg1375'}))
124
- indexer.solr_client.should_receive(:add).with(hash_including({:id => 'druid:jf275fd6276'}))
125
- indexer.solr_client.should_receive(:add).with(hash_including({:id => 'druid:nz353cp1092'}))
126
- indexer.solr_client.should_receive(:commit)
127
- indexer.harvest_and_index
67
+ indexer = Harvestdor::Indexer.new(@config.merge(:whitelist => @whitelist_path))
68
+ hdor_client = indexer.send(:harvestdor_client)
69
+ added = []
70
+ allow(indexer.solr).to receive(:add) { |hash|
71
+ added << hash[:id]
128
72
  }
73
+ expect(indexer.solr).to receive(:commit!)
74
+ indexer.harvest_and_index
75
+ expect(added).to match_array ["druid:tc552kq0798", "druid:th998nk0722", "druid:ww689vs6534", "druid:yg867hg1375", 'druid:jf275fd6276', 'druid:nz353cp1092']
129
76
  end
130
77
  end
131
78
 
@@ -137,227 +84,36 @@ describe Harvestdor::Indexer do
137
84
  expect(@indexer.dor_fetcher_client).to be_an_instance_of(DorFetcher::Client)
138
85
  end
139
86
 
140
- it "should strip off is_member_of_collection_ and is_governed_by_ and return only the druid" do
141
- expect(@indexer.strip_default_set_string()).to eq("yg867hg1375")
142
- end
143
-
144
87
  it "druids method should call druid_array and get_collection methods on fetcher_client" do
145
88
  VCR.use_cassette('get_collection_druids_call') do
146
- expect(@indexer.druids).to eq(["druid:yg867hg1375", "druid:jf275fd6276", "druid:nz353cp1092", "druid:tc552kq0798", "druid:th998nk0722", "druid:ww689vs6534"])
89
+ expect(@indexer.resources.map(&:druid)).to match_array ["druid:yg867hg1375", "druid:jf275fd6276", "druid:nz353cp1092", "druid:tc552kq0798", "druid:th998nk0722", "druid:ww689vs6534"]
147
90
  end
148
91
  end
149
92
 
150
93
  it "should get the configuration of the dor-fetcher client from included yml file" do
151
- expect(@indexer.dor_fetcher_client.service_url).to eq(@indexer.client_config["dor_fetcher_service_url"])
94
+ expect(@indexer.dor_fetcher_client.service_url).to eq("http://127.0.0.1:3000")
152
95
  end
153
96
 
154
97
  end # ending replacing OAI context
155
-
156
- context "smods_rec method" do
157
- before(:all) do
158
- @fake_druid = 'oo000oo0000'
159
- @ns_decl = "xmlns='#{Mods::MODS_NS}'"
160
- @mods_xml = "<mods #{@ns_decl}><note>hi</note></mods>"
161
- @ng_mods_xml = Nokogiri::XML(@mods_xml)
162
- end
163
- it "should call mods method on harvestdor_client" do
164
- @hdor_client.should_receive(:mods).with(@fake_druid).and_return(@ng_mods_xml)
165
- @indexer.smods_rec(@fake_druid)
166
- end
167
- it "should return Stanford::Mods::Record object" do
168
- @hdor_client.should_receive(:mods).with(@fake_druid).and_return(@ng_mods_xml)
169
- @indexer.smods_rec(@fake_druid).should be_an_instance_of(Stanford::Mods::Record)
170
- end
171
- it "should raise exception if MODS xml for the druid is empty" do
172
- @hdor_client.stub(:mods).with(@fake_druid).and_return(Nokogiri::XML("<mods #{@ns_decl}/>"))
173
- expect { @indexer.smods_rec(@fake_druid) }.to raise_error(RuntimeError, Regexp.new("^Empty MODS metadata for #{@fake_druid}: <"))
174
- end
175
- it "should raise exception if there is no MODS xml for the druid" do
176
- VCR.use_cassette('exception_no_MODS_call') do
177
- expect { @indexer.smods_rec(@fake_druid) }.to raise_error(Harvestdor::Errors::MissingMods)
178
- end
179
- end
180
- end
181
-
182
- context "public_xml related methods" do
183
- before(:all) do
184
- @id_md_xml = "<identityMetadata><objectId>druid:#{@fake_druid}</objectId></identityMetadata>"
185
- @cntnt_md_xml = "<contentMetadata type='image' objectId='#{@fake_druid}'>foo</contentMetadata>"
186
- @rights_md_xml = "<rightsMetadata><access type=\"discover\"><machine><world>bar</world></machine></access></rightsMetadata>"
187
- @rdf_xml = "<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'><rdf:Description rdf:about=\"info:fedora/druid:#{@fake_druid}\">relationship!</rdf:Description></rdf:RDF>"
188
- @pub_xml = "<publicObject id='druid:#{@fake_druid}'>#{@id_md_xml}#{@cntnt_md_xml}#{@rights_md_xml}#{@rdf_xml}</publicObject>"
189
- @ng_pub_xml = Nokogiri::XML(@pub_xml)
190
- end
191
- context "#public_xml" do
192
- it "should call public_xml method on harvestdor_client" do
193
- @hdor_client.should_receive(:public_xml).with(@fake_druid).and_return(@ng_pub_xml)
194
- @indexer.public_xml @fake_druid
195
- end
196
- it "retrieves entire public xml as a Nokogiri::XML::Document" do
197
- @hdor_client.should_receive(:public_xml).with(@fake_druid).and_return(@ng_pub_xml)
198
- px = @indexer.public_xml @fake_druid
199
- px.should be_kind_of(Nokogiri::XML::Document)
200
- px.root.name.should == 'publicObject'
201
- px.root.attributes['id'].text.should == "druid:#{@fake_druid}"
202
- end
203
- it "raises exception if public xml for the druid is empty" do
204
- @hdor_client.should_receive(:public_xml).with(@fake_druid).and_return(Nokogiri::XML("<publicObject/>"))
205
- expect { @indexer.public_xml(@fake_druid) }.to raise_error(RuntimeError, Regexp.new("^Empty public xml for #{@fake_druid}: <"))
206
- end
207
- it "raises error if there is no public_xml page for the druid" do
208
- @hdor_client.should_receive(:public_xml).with(@fake_druid).and_return(nil)
209
- expect { @indexer.public_xml(@fake_druid) }.to raise_error(RuntimeError, "No public xml for #{@fake_druid}")
210
- end
211
- end
212
- context "#content_metadata" do
213
- it "returns a Nokogiri::XML::Document derived from the public xml if a druid is passed" do
214
- Harvestdor.stub(:public_xml).with(@fake_druid, @indexer.config.purl).and_return(@ng_pub_xml)
215
- cm = @indexer.content_metadata(@fake_druid)
216
- cm.should be_kind_of(Nokogiri::XML::Document)
217
- cm.root.should_not == nil
218
- cm.root.name.should == 'contentMetadata'
219
- cm.root.attributes['objectId'].text.should == @fake_druid
220
- cm.root.text.strip.should == 'foo'
221
- end
222
- it "if passed a Nokogiri::XML::Document of the public xml, it does no fetch" do
223
- URI::HTTP.any_instance.should_not_receive(:open)
224
- @hdor_client.should_receive(:content_metadata).and_call_original
225
- cm = @indexer.content_metadata(@ng_pub_xml)
226
- cm.should be_kind_of(Nokogiri::XML::Document)
227
- cm.root.should_not == nil
228
- cm.root.name.should == 'contentMetadata'
229
- cm.root.attributes['objectId'].text.should == @fake_druid
230
- cm.root.text.strip.should == 'foo'
231
- end
232
- it "raises RuntimeError if nil is returned by Harvestdor::Client.contentMetadata for the druid" do
233
- @hdor_client.should_receive(:content_metadata).with(@fake_druid).and_return(nil)
234
- expect { @indexer.content_metadata(@fake_druid) }.to raise_error(RuntimeError, "No contentMetadata for \"#{@fake_druid}\"")
235
- end
236
- end
237
- context "#identity_metadata" do
238
- it "returns a Nokogiri::XML::Document derived from the public xml if a druid is passed" do
239
- Harvestdor.stub(:public_xml).with(@fake_druid, @indexer.config.purl).and_return(@ng_pub_xml)
240
- im = @indexer.identity_metadata(@fake_druid)
241
- im.should be_kind_of(Nokogiri::XML::Document)
242
- im.root.should_not == nil
243
- im.root.name.should == 'identityMetadata'
244
- im.root.text.strip.should == "druid:#{@fake_druid}"
245
- end
246
- it "if passed a Nokogiri::XML::Document of the public xml, it does no fetch" do
247
- URI::HTTP.any_instance.should_not_receive(:open)
248
- @hdor_client.should_receive(:identity_metadata).and_call_original
249
- im = @indexer.identity_metadata(@ng_pub_xml)
250
- im.should be_kind_of(Nokogiri::XML::Document)
251
- im.root.should_not == nil
252
- im.root.name.should == 'identityMetadata'
253
- im.root.text.strip.should == "druid:#{@fake_druid}"
254
- end
255
- it "raises RuntimeError if nil is returned by Harvestdor::Client.identityMetadata for the druid" do
256
- @hdor_client.should_receive(:identity_metadata).with(@fake_druid).and_return(nil)
257
- expect { @indexer.identity_metadata(@fake_druid) }.to raise_error(RuntimeError, "No identityMetadata for \"#{@fake_druid}\"")
258
- end
259
- end
260
- context "#rights_metadata" do
261
- it "returns a Nokogiri::XML::Document derived from the public xml if a druid is passed" do
262
- Harvestdor.stub(:public_xml).with(@fake_druid, @indexer.config.purl).and_return(@ng_pub_xml)
263
- im = @indexer.rights_metadata(@fake_druid)
264
- im.should be_kind_of(Nokogiri::XML::Document)
265
- im.root.should_not == nil
266
- im.root.name.should == 'rightsMetadata'
267
- im.root.text.strip.should == "bar"
268
- end
269
- it "raises RuntimeError if nil is returned by Harvestdor::Client.rightsMetadata for the druid" do
270
- @hdor_client.should_receive(:rights_metadata).with(@fake_druid).and_return(nil)
271
- expect { @indexer.rights_metadata(@fake_druid) }.to raise_error(RuntimeError, "No rightsMetadata for \"#{@fake_druid}\"")
272
- end
273
- end
274
- context "#rdf" do
275
- it "returns a Nokogiri::XML::Document derived from the public xml if a druid is passed" do
276
- Harvestdor.stub(:public_xml).with(@fake_druid, @indexer.config.purl).and_return(@ng_pub_xml)
277
- im = @indexer.rdf(@fake_druid)
278
- im.should be_kind_of(Nokogiri::XML::Document)
279
- im.root.should_not == nil
280
- im.root.name.should == 'RDF'
281
- im.root.text.strip.should == "relationship!"
282
- end
283
- it "raises RuntimeError if nil is returned by Harvestdor::Client.rdf for the druid" do
284
- @hdor_client.should_receive(:rdf).with(@fake_druid).and_return(nil)
285
- expect { @indexer.rdf(@fake_druid) }.to raise_error(RuntimeError, "No RDF for \"#{@fake_druid}\"")
286
- end
287
- end
288
- end
289
-
290
- context "blacklist" do
291
- it "should be an Array with an entry for each non-empty line in the file" do
292
- @indexer.send(:load_blacklist, @blacklist_path)
293
- @indexer.send(:blacklist).should be_an_instance_of(Array)
294
- @indexer.send(:blacklist).size.should == 2
295
- end
296
- it "should be empty Array if there was no blacklist config setting" do
297
- VCR.use_cassette('empty_array_no_blacklist_config_call') do
298
- indexer = Harvestdor::Indexer.new(@config_yml_path, @client_config_path)
299
- expect(indexer.blacklist).to eq([])
300
- end
301
- end
302
- context "load_blacklist" do
303
- it "knows what is in the blacklist" do
304
- VCR.use_cassette('know_what_is_in_blacklist_call') do
305
- indexer = Harvestdor::Indexer.new(@config_yml_path, @client_config_path, {:blacklist => @blacklist_path})
306
- expect(indexer.blacklist).to eq(["druid:jf275fd6276", "druid:tc552kq0798"])
307
- end
308
- end
309
- it "should not be called if there was no blacklist config setting" do
310
- VCR.use_cassette('no_blacklist_config_call') do
311
- lambda{
312
- indexer = Harvestdor::Indexer.new(@config_yml_path, @client_config_path)
313
-
314
- indexer.should_not_receive(:load_blacklist)
315
-
316
- hdor_client = indexer.send(:harvestdor_client)
317
- indexer.dor_fetcher_client.should_receive(:druid_array).and_return([@fake_druid])
318
- indexer.solr_client.should_receive(:add)
319
- indexer.solr_client.should_receive(:commit)
320
- indexer.harvest_and_index
321
- }
322
- end
323
- end
324
- it "should only try to load a blacklist once" do
325
- VCR.use_cassette('load_blacklist_once_call') do
326
- indexer = Harvestdor::Indexer.new(@config_yml_path, @client_config_path, {:blacklist => @blacklist_path})
327
- indexer.send(:blacklist)
328
- File.any_instance.should_not_receive(:open)
329
- indexer.send(:blacklist)
330
- end
331
- end
332
- it "should log an error message and throw RuntimeError if it can't find the indicated blacklist file" do
333
- VCR.use_cassette('no_blacklist_found_call') do
334
- exp_msg = 'Unable to find list of druids at bad_path'
335
- indexer = Harvestdor::Indexer.new(@config_yml_path, @client_config_path, {:blacklist => 'bad_path'})
336
- indexer.logger.should_receive(:fatal).with(exp_msg)
337
- expect { indexer.send(:load_blacklist, 'bad_path') }.to raise_error(exp_msg)
338
- end
339
- end
340
- end
341
- end # blacklist
342
98
 
343
99
  context "whitelist" do
344
100
  it "knows what is in the whitelist" do
345
101
  VCR.use_cassette('know_what_is_in_whitelist_call') do
346
102
  lambda{
347
- indexer = Harvestdor::Indexer.new(@config_yml_path, @client_config_path, {:whitelist => @whitelist_path})
103
+ indexer = Harvestdor::Indexer.new({:whitelist => @whitelist_path})
348
104
  expect(indexer.whitelist).to eq(["druid:yg867hg1375", "druid:jf275fd6276", "druid:nz353cp1092"])
349
105
  }
350
106
  end
351
107
  end
352
108
  it "should be an Array with an entry for each non-empty line in the file" do
353
109
  @indexer.send(:load_whitelist, @whitelist_path)
354
- @indexer.send(:whitelist).should be_an_instance_of(Array)
355
- @indexer.send(:whitelist).size.should == 3
110
+ expect(@indexer.send(:whitelist)).to be_an_instance_of(Array)
111
+ expect(@indexer.send(:whitelist).size).to eq(3)
356
112
  end
357
113
  it "should be empty Array if there was no whitelist config setting" do
358
114
  VCR.use_cassette('empty_array_no_whitelist_config_call') do
359
115
  lambda{
360
- indexer = Harvestdor::Indexer.new(@config_yml_path, @client_config_path)
116
+ indexer = Harvestdor::Indexer.new()
361
117
  expect(indexer.whitelist).to eq([])
362
118
  }
363
119
  end
@@ -366,31 +122,31 @@ describe Harvestdor::Indexer do
366
122
  it "should not be called if there was no whitelist config setting" do
367
123
  VCR.use_cassette('no_whitelist_config_call') do
368
124
  lambda{
369
- indexer = Harvestdor::Indexer.new(@config_yml_path, @client_config_path)
125
+ indexer = Harvestdor::Indexer.new()
370
126
 
371
- indexer.should_not_receive(:load_whitelist)
127
+ expect(indexer).not_to receive(:load_whitelist)
372
128
 
373
129
  hdor_client = indexer.send(:harvestdor_client)
374
- indexer.dor_fetcher_client.should_receive(:druid_array).and_return([@fake_druid])
375
- indexer.solr_client.should_receive(:add)
376
- indexer.solr_client.should_receive(:commit)
130
+ expect(indexer.dor_fetcher_client).to receive(:druid_array).and_return([@fake_druid])
131
+ expect(indexer.solr_client).to receive(:add)
132
+ expect(indexer.solr_client).to receive(:commit)
377
133
  indexer.harvest_and_index
378
134
  }
379
135
  end
380
136
  end
381
137
  it "should only try to load a whitelist once" do
382
138
  VCR.use_cassette('load_whitelist_once_call') do
383
- indexer = Harvestdor::Indexer.new(@config_yml_path, @client_config_path, {:whitelist => @whitelist_path})
139
+ indexer = Harvestdor::Indexer.new({:whitelist => @whitelist_path})
384
140
  indexer.send(:whitelist)
385
- File.any_instance.should_not_receive(:open)
141
+ expect_any_instance_of(File).not_to receive(:open)
386
142
  indexer.send(:whitelist)
387
143
  end
388
144
  end
389
145
  it "should log an error message and throw RuntimeError if it can't find the indicated whitelist file" do
390
146
  VCR.use_cassette('cant_find_whitelist_call') do
391
147
  exp_msg = 'Unable to find list of druids at bad_path'
392
- indexer = Harvestdor::Indexer.new(@config_yml_path, @client_config_path, {:whitelist => 'bad_path'})
393
- indexer.logger.should_receive(:fatal).with(exp_msg)
148
+ indexer = Harvestdor::Indexer.new(@config.merge(:whitelist => 'bad_path'))
149
+ expect(indexer.logger).to receive(:fatal).with(exp_msg)
394
150
  expect { indexer.send(:load_whitelist, 'bad_path') }.to raise_error(exp_msg)
395
151
  end
396
152
  end
@@ -399,15 +155,15 @@ describe Harvestdor::Indexer do
399
155
 
400
156
  it "solr_client should initialize the rsolr client using the options from the config" do
401
157
  VCR.use_cassette('rsolr_client_config_call') do
402
- indexer = Harvestdor::Indexer.new(nil, @client_config_path, Confstruct::Configuration.new(:solr => { :url => 'http://localhost:2345', :a => 1 }) )
403
- RSolr.should_receive(:connect).with(hash_including(:a => 1, :url => 'http://localhost:2345')).and_return('foo')
404
- indexer.solr_client
158
+ indexer = Harvestdor::Indexer.new(Confstruct::Configuration.new(:solr => { :url => 'http://localhost:2345', :a => 1 }) )
159
+ expect(RSolr).to receive(:connect).with(hash_including(:a => 1, :url => 'http://localhost:2345')).and_return('foo')
160
+ indexer.solr
405
161
  end
406
162
  end
407
163
 
408
164
  context "skip heartbeat" do
409
165
  it "allows me to use a fake url for dor-fetcher-client" do
410
- expect {Harvestdor::Indexer.new(@config_yml_path, @client_config_path)}.not_to raise_error
166
+ expect {Harvestdor::Indexer.new()}.not_to raise_error
411
167
  end
412
168
  end
413
169
  end