harvestdor-indexer 1.0.4 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +4 -2
- data/Gemfile +1 -1
- data/README.rdoc +1 -0
- data/harvestdor-indexer.gemspec +4 -2
- data/lib/harvestdor-indexer.rb +1 -317
- data/lib/harvestdor/indexer.rb +159 -0
- data/lib/harvestdor/indexer/metrics.rb +53 -0
- data/lib/harvestdor/indexer/resource.rb +174 -0
- data/lib/harvestdor/indexer/solr.rb +39 -0
- data/lib/{harvestdor-indexer → harvestdor/indexer}/version.rb +1 -1
- data/spec/config/ap.yml +32 -44
- data/spec/fixtures/vcr_cassettes/get_collection_druids_call.yml +96 -0
- data/spec/fixtures/vcr_cassettes/process_druids_whitelist_call.yml +1494 -16
- data/spec/fixtures/vcr_cassettes/single_rsolr_connection_call.yml +80 -27
- data/spec/spec_helper.rb +1 -1
- data/spec/unit/harvestdor-indexer-resource_spec.rb +174 -0
- data/spec/unit/harvestdor-indexer-solr_spec.rb +32 -0
- data/spec/unit/harvestdor-indexer_spec.rb +47 -291
- data/spec/unit/harvestdor/indexer/metrics_spec.rb +46 -0
- metadata +45 -10
- data/config/dor-fetcher-client.yml +0 -4
- data/spec/config/ap_blacklist.txt +0 -5
@@ -2,47 +2,100 @@
|
|
2
2
|
http_interactions:
|
3
3
|
- request:
|
4
4
|
method: get
|
5
|
-
uri: http://
|
5
|
+
uri: http://purl.stanford.edu/yg867hg1375.xml
|
6
6
|
body:
|
7
7
|
encoding: US-ASCII
|
8
8
|
string: ''
|
9
9
|
headers:
|
10
|
+
Accept-Encoding:
|
11
|
+
- gzip;q=1.0,deflate;q=0.6,identity;q=0.3
|
10
12
|
Accept:
|
11
|
-
-
|
13
|
+
- "*/*"
|
12
14
|
User-Agent:
|
13
15
|
- Ruby
|
14
16
|
response:
|
15
17
|
status:
|
16
18
|
code: 200
|
17
|
-
message:
|
19
|
+
message: ''
|
18
20
|
headers:
|
19
|
-
|
20
|
-
-
|
21
|
-
|
22
|
-
-
|
23
|
-
X-
|
24
|
-
-
|
25
|
-
|
26
|
-
-
|
21
|
+
Date:
|
22
|
+
- Wed, 17 Dec 2014 19:39:37 GMT
|
23
|
+
Server:
|
24
|
+
- Apache/2.2.15 (Red Hat)
|
25
|
+
X-Powered-By:
|
26
|
+
- Phusion Passenger (mod_rails/mod_rack) 3.0.19
|
27
|
+
X-Ua-Compatible:
|
28
|
+
- IE=Edge,chrome=1
|
27
29
|
Etag:
|
28
|
-
- '"
|
30
|
+
- '"67aa6d1481ba1537ae63af5aaf493f84"'
|
29
31
|
Cache-Control:
|
30
32
|
- max-age=0, private, must-revalidate
|
31
|
-
X-Meta-Request-Version:
|
32
|
-
- 0.3.4
|
33
33
|
X-Request-Id:
|
34
|
-
-
|
34
|
+
- f2e753d56bf896cde6e941be0f51d05a
|
35
35
|
X-Runtime:
|
36
|
-
- '0.
|
37
|
-
|
38
|
-
-
|
39
|
-
|
40
|
-
-
|
36
|
+
- '0.007983'
|
37
|
+
X-Rack-Cache:
|
38
|
+
- miss
|
39
|
+
Status:
|
40
|
+
- '200'
|
41
|
+
Content-Length:
|
42
|
+
- '2180'
|
43
|
+
Content-Type:
|
44
|
+
- application/xml; charset=utf-8
|
41
45
|
body:
|
42
|
-
encoding:
|
43
|
-
string:
|
44
|
-
|
45
|
-
|
46
|
+
encoding: UTF-8
|
47
|
+
string: |
|
48
|
+
<publicObject id="druid:yg867hg1375" published="2013-11-11T15:34:32-08:00">
|
49
|
+
<identityMetadata>
|
50
|
+
<objectId>druid:yg867hg1375</objectId>
|
51
|
+
<objectCreator>DOR</objectCreator>
|
52
|
+
<objectLabel>Francis E. Stafford photographs, 1909-1933</objectLabel>
|
53
|
+
<objectType>collection</objectType>
|
54
|
+
<adminPolicy>druid:vb546ms7107</adminPolicy>
|
55
|
+
<otherId name="catkey">9615156</otherId>
|
56
|
+
<otherId name="uuid">8f1feb20-4b29-11e3-8e31-0050569b3c3c</otherId>
|
57
|
+
<tag>Remediated By : 3.25.3</tag>
|
58
|
+
</identityMetadata>
|
59
|
+
<xml/>
|
60
|
+
<rightsMetadata>
|
61
|
+
<access type="discover">
|
62
|
+
<machine>
|
63
|
+
<world/>
|
64
|
+
</machine>
|
65
|
+
</access>
|
66
|
+
<access type="read">
|
67
|
+
<machine>
|
68
|
+
<world/>
|
69
|
+
</machine>
|
70
|
+
</access>
|
71
|
+
<use>
|
72
|
+
<human type="useAndReproduction"/>
|
73
|
+
<human type="creativeCommons"/>
|
74
|
+
<machine type="creativeCommons"/>
|
75
|
+
</use>
|
76
|
+
<copyright>
|
77
|
+
<human/>
|
78
|
+
</copyright>
|
79
|
+
</rightsMetadata>
|
80
|
+
<rdf:RDF xmlns:fedora-model="info:fedora/fedora-system:def/model#" xmlns:hydra="http://projecthydra.org/ns/relations#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
81
|
+
<rdf:Description rdf:about="info:fedora/druid:yg867hg1375">
|
82
|
+
</rdf:Description>
|
83
|
+
</rdf:RDF>
|
84
|
+
<oai_dc:dc xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:srw_dc="info:srw/schema/1/dc-schema" xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd">
|
85
|
+
<dc:title>Francis E. Stafford photographs, 1909-1933</dc:title>
|
86
|
+
<dc:contributor>Stafford, Francis E., 1884-1938</dc:contributor>
|
87
|
+
<dc:type>Collection</dc:type>
|
88
|
+
<dc:date>1909-1933</dc:date>
|
89
|
+
<dc:language>und</dc:language>
|
90
|
+
<dc:format>3 oversize boxes.</dc:format>
|
91
|
+
<dc:description>Photographs of scenes in China, mainly between 1909 and 1915.</dc:description>
|
92
|
+
<dc:rights>Closed. Digital use copies available.</dc:rights>
|
93
|
+
<dc:description type="biographical/historical">American missionary in China, 1909-1915 and 1932-1933.</dc:description>
|
94
|
+
<dc:coverage>China</dc:coverage>
|
95
|
+
</oai_dc:dc>
|
96
|
+
</publicObject>
|
97
|
+
http_version:
|
98
|
+
recorded_at: Wed, 17 Dec 2014 19:39:38 GMT
|
46
99
|
- request:
|
47
100
|
method: get
|
48
101
|
uri: http://127.0.0.1:3000/collections/yg867hg1375
|
@@ -74,9 +127,9 @@ http_interactions:
|
|
74
127
|
X-Meta-Request-Version:
|
75
128
|
- 0.3.4
|
76
129
|
X-Request-Id:
|
77
|
-
-
|
130
|
+
- 125a9964-6326-4114-9f59-fb533551d554
|
78
131
|
X-Runtime:
|
79
|
-
- '0.
|
132
|
+
- '0.011086'
|
80
133
|
Connection:
|
81
134
|
- close
|
82
135
|
Server:
|
@@ -95,5 +148,5 @@ http_interactions:
|
|
95
148
|
B: Photographs of China''s natural landscapes, urban scenes, cultural landmarks,
|
96
149
|
social customs, and people."}],"counts":{"collections":1,"items":5,"total_count":6}}'
|
97
150
|
http_version:
|
98
|
-
recorded_at: Wed, 12 Nov 2014 19:34:
|
151
|
+
recorded_at: Wed, 12 Nov 2014 19:34:03 GMT
|
99
152
|
recorded_with: VCR 2.9.3
|
data/spec/spec_helper.rb
CHANGED
@@ -0,0 +1,174 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Harvestdor::Indexer::Resource do
|
4
|
+
|
5
|
+
before(:all) do
|
6
|
+
VCR.use_cassette('before_all_call') do
|
7
|
+
@config_yml_path = File.join(File.dirname(__FILE__), "..", "config", "ap.yml")
|
8
|
+
require 'yaml'
|
9
|
+
@config = YAML.load_file(@config_yml_path)
|
10
|
+
@fake_druid = 'oo000oo0000'
|
11
|
+
|
12
|
+
@indexer = Harvestdor::Indexer.new(@config)
|
13
|
+
@hdor_client = @indexer.send(:harvestdor_client)
|
14
|
+
@whitelist_path = File.join(File.dirname(__FILE__), "../config/ap_whitelist.txt")
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
let :resource do
|
19
|
+
Harvestdor::Indexer::Resource.new(@indexer, @fake_druid)
|
20
|
+
end
|
21
|
+
|
22
|
+
context "smods_rec method" do
|
23
|
+
before(:all) do
|
24
|
+
@ns_decl = "xmlns='#{Mods::MODS_NS}'"
|
25
|
+
@mods_xml = "<mods #{@ns_decl}><note>hi</note></mods>"
|
26
|
+
@ng_mods_xml = Nokogiri::XML(@mods_xml)
|
27
|
+
end
|
28
|
+
it "should call mods method on harvestdor_client" do
|
29
|
+
expect(@hdor_client).to receive(:mods).with(@fake_druid).and_return(@ng_mods_xml)
|
30
|
+
resource.smods_rec
|
31
|
+
end
|
32
|
+
it "should return Stanford::Mods::Record object" do
|
33
|
+
expect(@hdor_client).to receive(:mods).with(@fake_druid).and_return(@ng_mods_xml)
|
34
|
+
expect(resource.smods_rec).to be_an_instance_of(Stanford::Mods::Record)
|
35
|
+
end
|
36
|
+
it "should raise exception if MODS xml for the druid is empty" do
|
37
|
+
allow(@hdor_client).to receive(:mods).with(@fake_druid).and_return(Nokogiri::XML("<mods #{@ns_decl}/>"))
|
38
|
+
expect { resource.smods_rec }.to raise_error(RuntimeError, Regexp.new("^Empty MODS metadata for #{@fake_druid}: <"))
|
39
|
+
end
|
40
|
+
it "should raise exception if there is no MODS xml for the druid" do
|
41
|
+
VCR.use_cassette('exception_no_MODS_call') do
|
42
|
+
expect { resource.smods_rec }.to raise_error(Harvestdor::Errors::MissingMods)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
context "public_xml related methods" do
|
48
|
+
before(:all) do
|
49
|
+
@id_md_xml = "<identityMetadata><objectId>druid:#{@fake_druid}</objectId></identityMetadata>"
|
50
|
+
@cntnt_md_xml = "<contentMetadata type='image' objectId='#{@fake_druid}'>foo</contentMetadata>"
|
51
|
+
@rights_md_xml = "<rightsMetadata><access type=\"discover\"><machine><world>bar</world></machine></access></rightsMetadata>"
|
52
|
+
@rdf_xml = "<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'><rdf:Description rdf:about=\"info:fedora/druid:#{@fake_druid}\">relationship!</rdf:Description></rdf:RDF>"
|
53
|
+
@pub_xml = "<publicObject id='druid:#{@fake_druid}'>#{@id_md_xml}#{@cntnt_md_xml}#{@rights_md_xml}#{@rdf_xml}</publicObject>"
|
54
|
+
@ng_pub_xml = Nokogiri::XML(@pub_xml)
|
55
|
+
end
|
56
|
+
context "#public_xml" do
|
57
|
+
it "should call public_xml method on harvestdor_client" do
|
58
|
+
expect(@hdor_client).to receive(:public_xml).with(@fake_druid).and_return(@ng_pub_xml)
|
59
|
+
resource.public_xml
|
60
|
+
end
|
61
|
+
it "retrieves entire public xml as a Nokogiri::XML::Document" do
|
62
|
+
expect(@hdor_client).to receive(:public_xml).with(@fake_druid).and_return(@ng_pub_xml)
|
63
|
+
px = resource.public_xml
|
64
|
+
expect(px).to be_kind_of(Nokogiri::XML::Document)
|
65
|
+
expect(px.root.name).to eq('publicObject')
|
66
|
+
expect(px.root.attributes['id'].text).to eq("druid:#{@fake_druid}")
|
67
|
+
end
|
68
|
+
it "raises exception if public xml for the druid is empty" do
|
69
|
+
expect(@hdor_client).to receive(:public_xml).with(@fake_druid).and_return(Nokogiri::XML("<publicObject/>"))
|
70
|
+
expect { resource.public_xml }.to raise_error(RuntimeError, Regexp.new("^Empty public xml for #{@fake_druid}: <"))
|
71
|
+
end
|
72
|
+
it "raises error if there is no public_xml page for the druid" do
|
73
|
+
expect(@hdor_client).to receive(:public_xml).with(@fake_druid).and_return(nil)
|
74
|
+
expect { resource.public_xml }.to raise_error(RuntimeError, "No public xml for #{@fake_druid}")
|
75
|
+
end
|
76
|
+
end
|
77
|
+
context "#content_metadata" do
|
78
|
+
it "returns a Nokogiri::XML::Document derived from the public xml if a druid is passed" do
|
79
|
+
allow(Harvestdor).to receive(:public_xml).with(@fake_druid, @indexer.config.harvestdor.purl).and_return(@ng_pub_xml)
|
80
|
+
cm = resource.content_metadata
|
81
|
+
expect(cm).to be_kind_of(Nokogiri::XML::Document)
|
82
|
+
expect(cm.root).not_to eq(nil)
|
83
|
+
expect(cm.root.name).to eq('contentMetadata')
|
84
|
+
expect(cm.root.attributes['objectId'].text).to eq(@fake_druid)
|
85
|
+
expect(cm.root.text.strip).to eq('foo')
|
86
|
+
end
|
87
|
+
it "raises RuntimeError if nil is returned by Harvestdor::Client.contentMetadata for the druid" do
|
88
|
+
expect(@hdor_client).to receive(:content_metadata).with(@fake_druid).and_return(nil)
|
89
|
+
expect { resource.content_metadata }.to raise_error(RuntimeError, "No contentMetadata for \"#{@fake_druid}\"")
|
90
|
+
end
|
91
|
+
end
|
92
|
+
context "#identity_metadata" do
|
93
|
+
it "returns a Nokogiri::XML::Document derived from the public xml if a druid is passed" do
|
94
|
+
allow(Harvestdor).to receive(:public_xml).with(@fake_druid, @indexer.config.harvestdor.purl).and_return(@ng_pub_xml)
|
95
|
+
im = resource.identity_metadata
|
96
|
+
expect(im).to be_kind_of(Nokogiri::XML::Document)
|
97
|
+
expect(im.root).not_to eq(nil)
|
98
|
+
expect(im.root.name).to eq('identityMetadata')
|
99
|
+
expect(im.root.text.strip).to eq("druid:#{@fake_druid}")
|
100
|
+
end
|
101
|
+
it "raises RuntimeError if nil is returned by Harvestdor::Client.identityMetadata for the druid" do
|
102
|
+
expect(@hdor_client).to receive(:identity_metadata).with(@fake_druid).and_return(nil)
|
103
|
+
expect { resource.identity_metadata }.to raise_error(RuntimeError, "No identityMetadata for \"#{@fake_druid}\"")
|
104
|
+
end
|
105
|
+
end
|
106
|
+
context "#rights_metadata" do
|
107
|
+
it "returns a Nokogiri::XML::Document derived from the public xml if a druid is passed" do
|
108
|
+
allow(Harvestdor).to receive(:public_xml).with(@fake_druid, @indexer.config.harvestdor.purl).and_return(@ng_pub_xml)
|
109
|
+
im = resource.rights_metadata
|
110
|
+
expect(im).to be_kind_of(Nokogiri::XML::Document)
|
111
|
+
expect(im.root).not_to eq(nil)
|
112
|
+
expect(im.root.name).to eq('rightsMetadata')
|
113
|
+
expect(im.root.text.strip).to eq("bar")
|
114
|
+
end
|
115
|
+
it "raises RuntimeError if nil is returned by Harvestdor::Client.rightsMetadata for the druid" do
|
116
|
+
expect(@hdor_client).to receive(:rights_metadata).with(@fake_druid).and_return(nil)
|
117
|
+
expect { resource.rights_metadata }.to raise_error(RuntimeError, "No rightsMetadata for \"#{@fake_druid}\"")
|
118
|
+
end
|
119
|
+
end
|
120
|
+
context "#rdf" do
|
121
|
+
it "returns a Nokogiri::XML::Document derived from the public xml if a druid is passed" do
|
122
|
+
allow(Harvestdor).to receive(:public_xml).with(@fake_druid, @indexer.config.harvestdor.purl).and_return(@ng_pub_xml)
|
123
|
+
im = resource.rdf
|
124
|
+
expect(im).to be_kind_of(Nokogiri::XML::Document)
|
125
|
+
expect(im.root).not_to eq(nil)
|
126
|
+
expect(im.root.name).to eq('RDF')
|
127
|
+
expect(im.root.text.strip).to eq("relationship!")
|
128
|
+
end
|
129
|
+
it "raises RuntimeError if nil is returned by Harvestdor::Client.rdf for the druid" do
|
130
|
+
expect(@hdor_client).to receive(:rdf).with(@fake_druid).and_return(nil)
|
131
|
+
expect { resource.rdf }.to raise_error(RuntimeError, "No RDF for \"#{@fake_druid}\"")
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
describe "#public_xml_or_druid" do
|
136
|
+
it "should return the public_xml, if the public_xml has been loaded" do
|
137
|
+
allow(resource).to receive(:public_xml?).and_return(true)
|
138
|
+
allow(resource).to receive(:public_xml).and_return(double)
|
139
|
+
expect(resource.public_xml_or_druid).to eq resource.public_xml
|
140
|
+
end
|
141
|
+
it "should return the druid, if the public_xml has not been loaded" do
|
142
|
+
allow(resource).to receive(:public_xml?).and_return(false)
|
143
|
+
expect(resource.public_xml_or_druid).to eq @fake_druid
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
describe "#identity_md_obj_label" do
|
148
|
+
it "should extract the objectLabel from the identity metadata" do
|
149
|
+
allow(resource).to receive(:identity_metadata).and_return(Nokogiri::XML("<identityMetadata><objectLabel>label</objectLabel></identityMetadata>"))
|
150
|
+
expect(resource.identity_md_obj_label).to eq "label"
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
describe "#collections" do
|
155
|
+
it "should extract the collection this resource is a member of and return Resource objects for those collections" do
|
156
|
+
allow(resource).to receive(:public_xml).and_return(Nokogiri::XML <<-EOF
|
157
|
+
<publicObject>
|
158
|
+
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:fedora="info:fedora/fedora-system:def/relations-external#">
|
159
|
+
<rdf:Description>
|
160
|
+
<fedora:isMemberOfCollection rdf:resource="some:druid" />
|
161
|
+
</rdf:Description>
|
162
|
+
</rdf:RDF>
|
163
|
+
</publicObject>
|
164
|
+
EOF
|
165
|
+
)
|
166
|
+
|
167
|
+
expect(resource.collections.length).to eq 1
|
168
|
+
expect(resource.collections.first.druid).to eq "some:druid"
|
169
|
+
expect(resource.collections.first.indexer).to eq resource.indexer
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Harvestdor::Indexer::Solr do
|
4
|
+
let :indexer do
|
5
|
+
double(logger: Logger.new("/dev/null"))
|
6
|
+
end
|
7
|
+
|
8
|
+
let :solr do
|
9
|
+
Harvestdor::Indexer::Solr.new indexer
|
10
|
+
end
|
11
|
+
|
12
|
+
# The method that sends the solr document to solr
|
13
|
+
describe "#add" do
|
14
|
+
let(:doc_hash) do
|
15
|
+
{
|
16
|
+
:id => "whatever",
|
17
|
+
:modsxml => 'whatever',
|
18
|
+
:title_display => 'title',
|
19
|
+
:pub_year_tisim => 'some year',
|
20
|
+
:author_person_display => 'author',
|
21
|
+
:format => 'Image',
|
22
|
+
:language => 'English'
|
23
|
+
}
|
24
|
+
end
|
25
|
+
|
26
|
+
it "sends an add request to the solr_client" do
|
27
|
+
expect(solr.client).to receive(:add).with(doc_hash)
|
28
|
+
solr.add(doc_hash)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
@@ -5,61 +5,37 @@ describe Harvestdor::Indexer do
|
|
5
5
|
before(:all) do
|
6
6
|
VCR.use_cassette('before_all_call') do
|
7
7
|
@config_yml_path = File.join(File.dirname(__FILE__), "..", "config", "ap.yml")
|
8
|
-
@client_config_path = File.join(File.dirname(__FILE__), "../..", "config", "dor-fetcher-client.yml")
|
9
|
-
@indexer = Harvestdor::Indexer.new(@config_yml_path, @client_config_path)
|
10
8
|
require 'yaml'
|
11
|
-
@
|
9
|
+
@config = YAML.load_file(@config_yml_path)
|
10
|
+
|
11
|
+
@indexer = Harvestdor::Indexer.new(@config) do |config|
|
12
|
+
config.whitelist = ["druid:yg867hg1375"]
|
13
|
+
end
|
12
14
|
@hdor_client = @indexer.send(:harvestdor_client)
|
13
|
-
@fake_druid = 'oo000oo0000'
|
14
|
-
@blacklist_path = File.join(File.dirname(__FILE__), "../config/ap_blacklist.txt")
|
15
|
+
@fake_druid = 'druid:oo000oo0000'
|
15
16
|
@whitelist_path = File.join(File.dirname(__FILE__), "../config/ap_whitelist.txt")
|
16
17
|
end
|
17
18
|
end
|
18
|
-
|
19
|
-
# The method that sends the solr document to solr
|
20
|
-
describe "#solr_add" do
|
21
|
-
before(:each) do
|
22
|
-
doc_hash = {
|
23
|
-
:modsxml => 'whatever',
|
24
|
-
:title_display => 'title',
|
25
|
-
:pub_year_tisim => 'some year',
|
26
|
-
:author_person_display => 'author',
|
27
|
-
:format => 'Image',
|
28
|
-
:language => 'English'
|
29
|
-
}
|
30
|
-
end
|
31
|
-
it "sends an add request to the solr_client" do
|
32
|
-
expect(@indexer.solr_client).to receive(:add)
|
33
|
-
@indexer.solr_add(@doc_hash, "abc123")
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
19
|
+
|
37
20
|
describe "access methods" do
|
38
21
|
it "initializes success count" do
|
39
|
-
@indexer.success_count.
|
22
|
+
expect(@indexer.metrics.success_count).to eq(0)
|
40
23
|
end
|
41
24
|
it "initializes error count" do
|
42
|
-
@indexer.error_count.
|
43
|
-
end
|
44
|
-
it "initializes max_retries" do
|
45
|
-
expect(@indexer.max_retries).to eql(10)
|
46
|
-
end
|
47
|
-
it "allows overriding of max_retries" do
|
48
|
-
@indexer.max_retries=6
|
49
|
-
@indexer.max_retries.should == 6
|
25
|
+
expect(@indexer.metrics.error_count).to eq(0)
|
50
26
|
end
|
51
27
|
end
|
52
28
|
|
53
29
|
describe "logging" do
|
54
30
|
it "should write the log file to the directory indicated by log_dir" do
|
55
31
|
@indexer.logger.info("indexer_spec logging test message")
|
56
|
-
File.exists?(File.join(@
|
32
|
+
expect(File.exists?(File.join(@config['harvestdor']['log_dir'], @config['harvestdor']['log_name']))).to eq(true)
|
57
33
|
end
|
58
34
|
end
|
59
35
|
|
60
36
|
it "should initialize the harvestdor_client from the config" do
|
61
37
|
expect(@hdor_client).to be_an_instance_of(Harvestdor::Client)
|
62
|
-
expect(@hdor_client.config.default_set).to eq(@
|
38
|
+
expect(@hdor_client.config.default_set).to eq(@config['harvestdor']['default_set'])
|
63
39
|
end
|
64
40
|
|
65
41
|
context "harvest_and_index" do
|
@@ -69,63 +45,34 @@ describe Harvestdor::Indexer do
|
|
69
45
|
}
|
70
46
|
end
|
71
47
|
it "should call dor_fetcher_client.druid_array and then call :add on rsolr connection" do
|
72
|
-
|
73
|
-
@indexer.
|
74
|
-
@indexer.
|
48
|
+
allow_any_instance_of(Harvestdor::Indexer::Resource).to receive(:collection?).and_return(false)
|
49
|
+
expect(@indexer).to receive(:druids).and_return([@fake_druid])
|
50
|
+
expect(@indexer.solr).to receive(:add).with(@doc_hash)
|
51
|
+
expect(@indexer.solr).to receive(:commit!)
|
75
52
|
@indexer.harvest_and_index
|
76
53
|
end
|
77
54
|
|
78
55
|
it "should only call :commit on rsolr connection once" do
|
79
56
|
VCR.use_cassette('single_rsolr_connection_call') do
|
80
|
-
|
81
|
-
|
82
|
-
indexer.
|
83
|
-
indexer.
|
84
|
-
indexer.
|
85
|
-
indexer.harvest_and_index
|
57
|
+
hdor_client = @indexer.send(:harvestdor_client)
|
58
|
+
expect(@indexer.dor_fetcher_client).to receive(:druid_array).and_return(["druid:yg867hg1375", "druid:jf275fd6276", "druid:nz353cp1092", "druid:tc552kq0798", "druid:th998nk0722", "druid:ww689vs6534"])
|
59
|
+
expect(@indexer.solr).to receive(:add).exactly(6).times
|
60
|
+
expect(@indexer.solr).to receive(:commit!).once
|
61
|
+
@indexer.harvest_and_index
|
86
62
|
end
|
87
63
|
end
|
88
64
|
|
89
|
-
it "should not process druids in blacklist" do
|
90
|
-
VCR.use_cassette('ignore_druids_in_blacklist_call') do
|
91
|
-
lambda{
|
92
|
-
indexer = Harvestdor::Indexer.new(@config_yml_path, @client_config_path, {:blacklist => @blacklist_path})
|
93
|
-
hdor_client = indexer.send(:harvestdor_client)
|
94
|
-
indexer.dor_fetcher_client.should_receive(:druid_array).and_return(["druid:yg867hg1375", "druid:jf275fd6276", "druid:nz353cp1092", "druid:tc552kq0798", "druid:th998nk0722", "druid:ww689vs6534"])
|
95
|
-
indexer.solr_client.should_receive(:add).with(hash_including({:id => 'druid:nz353cp1092'}))
|
96
|
-
indexer.solr_client.should_not_receive(:add).with(hash_including({:id => 'druid:jf275fd6276'}))
|
97
|
-
indexer.solr_client.should_not_receive(:add).with(hash_including({:id => 'druid:tc552kq0798'}))
|
98
|
-
indexer.solr_client.should_receive(:add).with(hash_including({:id => 'druid:th998nk0722'}))
|
99
|
-
indexer.solr_client.should_receive(:commit)
|
100
|
-
indexer.harvest_and_index
|
101
|
-
}
|
102
|
-
end
|
103
|
-
end
|
104
|
-
it "should not process druid if it is in both blacklist and whitelist" do
|
105
|
-
VCR.use_cassette('ignore_druids_in_blacklist_and_whitelist_call') do
|
106
|
-
lambda{
|
107
|
-
indexer = Harvestdor::Indexer.new(@config_yml_path, @client_config_path, {:blacklist => @blacklist_path, :whitelist => @whitelist_path})
|
108
|
-
hdor_client = indexer.send(:harvestdor_client)
|
109
|
-
indexer.dor_fetcher_client.should_not_receive(:druid_array)
|
110
|
-
indexer.solr_client.should_receive(:add).with(hash_including({:id => 'druid:yg867hg1375'}))
|
111
|
-
indexer.solr_client.should_not_receive(:add).with(hash_including({:id => 'druid:jf275fd6276'}))
|
112
|
-
indexer.solr_client.should_receive(:commit)
|
113
|
-
indexer.harvest_and_index
|
114
|
-
}
|
115
|
-
end
|
116
|
-
end
|
117
65
|
it "should only process druids in whitelist if it exists" do
|
118
66
|
VCR.use_cassette('process_druids_whitelist_call') do
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
indexer.solr_client.should_receive(:add).with(hash_including({:id => 'druid:jf275fd6276'}))
|
125
|
-
indexer.solr_client.should_receive(:add).with(hash_including({:id => 'druid:nz353cp1092'}))
|
126
|
-
indexer.solr_client.should_receive(:commit)
|
127
|
-
indexer.harvest_and_index
|
67
|
+
indexer = Harvestdor::Indexer.new(@config.merge(:whitelist => @whitelist_path))
|
68
|
+
hdor_client = indexer.send(:harvestdor_client)
|
69
|
+
added = []
|
70
|
+
allow(indexer.solr).to receive(:add) { |hash|
|
71
|
+
added << hash[:id]
|
128
72
|
}
|
73
|
+
expect(indexer.solr).to receive(:commit!)
|
74
|
+
indexer.harvest_and_index
|
75
|
+
expect(added).to match_array ["druid:tc552kq0798", "druid:th998nk0722", "druid:ww689vs6534", "druid:yg867hg1375", 'druid:jf275fd6276', 'druid:nz353cp1092']
|
129
76
|
end
|
130
77
|
end
|
131
78
|
|
@@ -137,227 +84,36 @@ describe Harvestdor::Indexer do
|
|
137
84
|
expect(@indexer.dor_fetcher_client).to be_an_instance_of(DorFetcher::Client)
|
138
85
|
end
|
139
86
|
|
140
|
-
it "should strip off is_member_of_collection_ and is_governed_by_ and return only the druid" do
|
141
|
-
expect(@indexer.strip_default_set_string()).to eq("yg867hg1375")
|
142
|
-
end
|
143
|
-
|
144
87
|
it "druids method should call druid_array and get_collection methods on fetcher_client" do
|
145
88
|
VCR.use_cassette('get_collection_druids_call') do
|
146
|
-
expect(@indexer.
|
89
|
+
expect(@indexer.resources.map(&:druid)).to match_array ["druid:yg867hg1375", "druid:jf275fd6276", "druid:nz353cp1092", "druid:tc552kq0798", "druid:th998nk0722", "druid:ww689vs6534"]
|
147
90
|
end
|
148
91
|
end
|
149
92
|
|
150
93
|
it "should get the configuration of the dor-fetcher client from included yml file" do
|
151
|
-
expect(@indexer.dor_fetcher_client.service_url).to eq(
|
94
|
+
expect(@indexer.dor_fetcher_client.service_url).to eq("http://127.0.0.1:3000")
|
152
95
|
end
|
153
96
|
|
154
97
|
end # ending replacing OAI context
|
155
|
-
|
156
|
-
context "smods_rec method" do
|
157
|
-
before(:all) do
|
158
|
-
@fake_druid = 'oo000oo0000'
|
159
|
-
@ns_decl = "xmlns='#{Mods::MODS_NS}'"
|
160
|
-
@mods_xml = "<mods #{@ns_decl}><note>hi</note></mods>"
|
161
|
-
@ng_mods_xml = Nokogiri::XML(@mods_xml)
|
162
|
-
end
|
163
|
-
it "should call mods method on harvestdor_client" do
|
164
|
-
@hdor_client.should_receive(:mods).with(@fake_druid).and_return(@ng_mods_xml)
|
165
|
-
@indexer.smods_rec(@fake_druid)
|
166
|
-
end
|
167
|
-
it "should return Stanford::Mods::Record object" do
|
168
|
-
@hdor_client.should_receive(:mods).with(@fake_druid).and_return(@ng_mods_xml)
|
169
|
-
@indexer.smods_rec(@fake_druid).should be_an_instance_of(Stanford::Mods::Record)
|
170
|
-
end
|
171
|
-
it "should raise exception if MODS xml for the druid is empty" do
|
172
|
-
@hdor_client.stub(:mods).with(@fake_druid).and_return(Nokogiri::XML("<mods #{@ns_decl}/>"))
|
173
|
-
expect { @indexer.smods_rec(@fake_druid) }.to raise_error(RuntimeError, Regexp.new("^Empty MODS metadata for #{@fake_druid}: <"))
|
174
|
-
end
|
175
|
-
it "should raise exception if there is no MODS xml for the druid" do
|
176
|
-
VCR.use_cassette('exception_no_MODS_call') do
|
177
|
-
expect { @indexer.smods_rec(@fake_druid) }.to raise_error(Harvestdor::Errors::MissingMods)
|
178
|
-
end
|
179
|
-
end
|
180
|
-
end
|
181
|
-
|
182
|
-
context "public_xml related methods" do
|
183
|
-
before(:all) do
|
184
|
-
@id_md_xml = "<identityMetadata><objectId>druid:#{@fake_druid}</objectId></identityMetadata>"
|
185
|
-
@cntnt_md_xml = "<contentMetadata type='image' objectId='#{@fake_druid}'>foo</contentMetadata>"
|
186
|
-
@rights_md_xml = "<rightsMetadata><access type=\"discover\"><machine><world>bar</world></machine></access></rightsMetadata>"
|
187
|
-
@rdf_xml = "<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'><rdf:Description rdf:about=\"info:fedora/druid:#{@fake_druid}\">relationship!</rdf:Description></rdf:RDF>"
|
188
|
-
@pub_xml = "<publicObject id='druid:#{@fake_druid}'>#{@id_md_xml}#{@cntnt_md_xml}#{@rights_md_xml}#{@rdf_xml}</publicObject>"
|
189
|
-
@ng_pub_xml = Nokogiri::XML(@pub_xml)
|
190
|
-
end
|
191
|
-
context "#public_xml" do
|
192
|
-
it "should call public_xml method on harvestdor_client" do
|
193
|
-
@hdor_client.should_receive(:public_xml).with(@fake_druid).and_return(@ng_pub_xml)
|
194
|
-
@indexer.public_xml @fake_druid
|
195
|
-
end
|
196
|
-
it "retrieves entire public xml as a Nokogiri::XML::Document" do
|
197
|
-
@hdor_client.should_receive(:public_xml).with(@fake_druid).and_return(@ng_pub_xml)
|
198
|
-
px = @indexer.public_xml @fake_druid
|
199
|
-
px.should be_kind_of(Nokogiri::XML::Document)
|
200
|
-
px.root.name.should == 'publicObject'
|
201
|
-
px.root.attributes['id'].text.should == "druid:#{@fake_druid}"
|
202
|
-
end
|
203
|
-
it "raises exception if public xml for the druid is empty" do
|
204
|
-
@hdor_client.should_receive(:public_xml).with(@fake_druid).and_return(Nokogiri::XML("<publicObject/>"))
|
205
|
-
expect { @indexer.public_xml(@fake_druid) }.to raise_error(RuntimeError, Regexp.new("^Empty public xml for #{@fake_druid}: <"))
|
206
|
-
end
|
207
|
-
it "raises error if there is no public_xml page for the druid" do
|
208
|
-
@hdor_client.should_receive(:public_xml).with(@fake_druid).and_return(nil)
|
209
|
-
expect { @indexer.public_xml(@fake_druid) }.to raise_error(RuntimeError, "No public xml for #{@fake_druid}")
|
210
|
-
end
|
211
|
-
end
|
212
|
-
context "#content_metadata" do
|
213
|
-
it "returns a Nokogiri::XML::Document derived from the public xml if a druid is passed" do
|
214
|
-
Harvestdor.stub(:public_xml).with(@fake_druid, @indexer.config.purl).and_return(@ng_pub_xml)
|
215
|
-
cm = @indexer.content_metadata(@fake_druid)
|
216
|
-
cm.should be_kind_of(Nokogiri::XML::Document)
|
217
|
-
cm.root.should_not == nil
|
218
|
-
cm.root.name.should == 'contentMetadata'
|
219
|
-
cm.root.attributes['objectId'].text.should == @fake_druid
|
220
|
-
cm.root.text.strip.should == 'foo'
|
221
|
-
end
|
222
|
-
it "if passed a Nokogiri::XML::Document of the public xml, it does no fetch" do
|
223
|
-
URI::HTTP.any_instance.should_not_receive(:open)
|
224
|
-
@hdor_client.should_receive(:content_metadata).and_call_original
|
225
|
-
cm = @indexer.content_metadata(@ng_pub_xml)
|
226
|
-
cm.should be_kind_of(Nokogiri::XML::Document)
|
227
|
-
cm.root.should_not == nil
|
228
|
-
cm.root.name.should == 'contentMetadata'
|
229
|
-
cm.root.attributes['objectId'].text.should == @fake_druid
|
230
|
-
cm.root.text.strip.should == 'foo'
|
231
|
-
end
|
232
|
-
it "raises RuntimeError if nil is returned by Harvestdor::Client.contentMetadata for the druid" do
|
233
|
-
@hdor_client.should_receive(:content_metadata).with(@fake_druid).and_return(nil)
|
234
|
-
expect { @indexer.content_metadata(@fake_druid) }.to raise_error(RuntimeError, "No contentMetadata for \"#{@fake_druid}\"")
|
235
|
-
end
|
236
|
-
end
|
237
|
-
context "#identity_metadata" do
|
238
|
-
it "returns a Nokogiri::XML::Document derived from the public xml if a druid is passed" do
|
239
|
-
Harvestdor.stub(:public_xml).with(@fake_druid, @indexer.config.purl).and_return(@ng_pub_xml)
|
240
|
-
im = @indexer.identity_metadata(@fake_druid)
|
241
|
-
im.should be_kind_of(Nokogiri::XML::Document)
|
242
|
-
im.root.should_not == nil
|
243
|
-
im.root.name.should == 'identityMetadata'
|
244
|
-
im.root.text.strip.should == "druid:#{@fake_druid}"
|
245
|
-
end
|
246
|
-
it "if passed a Nokogiri::XML::Document of the public xml, it does no fetch" do
|
247
|
-
URI::HTTP.any_instance.should_not_receive(:open)
|
248
|
-
@hdor_client.should_receive(:identity_metadata).and_call_original
|
249
|
-
im = @indexer.identity_metadata(@ng_pub_xml)
|
250
|
-
im.should be_kind_of(Nokogiri::XML::Document)
|
251
|
-
im.root.should_not == nil
|
252
|
-
im.root.name.should == 'identityMetadata'
|
253
|
-
im.root.text.strip.should == "druid:#{@fake_druid}"
|
254
|
-
end
|
255
|
-
it "raises RuntimeError if nil is returned by Harvestdor::Client.identityMetadata for the druid" do
|
256
|
-
@hdor_client.should_receive(:identity_metadata).with(@fake_druid).and_return(nil)
|
257
|
-
expect { @indexer.identity_metadata(@fake_druid) }.to raise_error(RuntimeError, "No identityMetadata for \"#{@fake_druid}\"")
|
258
|
-
end
|
259
|
-
end
|
260
|
-
context "#rights_metadata" do
|
261
|
-
it "returns a Nokogiri::XML::Document derived from the public xml if a druid is passed" do
|
262
|
-
Harvestdor.stub(:public_xml).with(@fake_druid, @indexer.config.purl).and_return(@ng_pub_xml)
|
263
|
-
im = @indexer.rights_metadata(@fake_druid)
|
264
|
-
im.should be_kind_of(Nokogiri::XML::Document)
|
265
|
-
im.root.should_not == nil
|
266
|
-
im.root.name.should == 'rightsMetadata'
|
267
|
-
im.root.text.strip.should == "bar"
|
268
|
-
end
|
269
|
-
it "raises RuntimeError if nil is returned by Harvestdor::Client.rightsMetadata for the druid" do
|
270
|
-
@hdor_client.should_receive(:rights_metadata).with(@fake_druid).and_return(nil)
|
271
|
-
expect { @indexer.rights_metadata(@fake_druid) }.to raise_error(RuntimeError, "No rightsMetadata for \"#{@fake_druid}\"")
|
272
|
-
end
|
273
|
-
end
|
274
|
-
context "#rdf" do
|
275
|
-
it "returns a Nokogiri::XML::Document derived from the public xml if a druid is passed" do
|
276
|
-
Harvestdor.stub(:public_xml).with(@fake_druid, @indexer.config.purl).and_return(@ng_pub_xml)
|
277
|
-
im = @indexer.rdf(@fake_druid)
|
278
|
-
im.should be_kind_of(Nokogiri::XML::Document)
|
279
|
-
im.root.should_not == nil
|
280
|
-
im.root.name.should == 'RDF'
|
281
|
-
im.root.text.strip.should == "relationship!"
|
282
|
-
end
|
283
|
-
it "raises RuntimeError if nil is returned by Harvestdor::Client.rdf for the druid" do
|
284
|
-
@hdor_client.should_receive(:rdf).with(@fake_druid).and_return(nil)
|
285
|
-
expect { @indexer.rdf(@fake_druid) }.to raise_error(RuntimeError, "No RDF for \"#{@fake_druid}\"")
|
286
|
-
end
|
287
|
-
end
|
288
|
-
end
|
289
|
-
|
290
|
-
context "blacklist" do
|
291
|
-
it "should be an Array with an entry for each non-empty line in the file" do
|
292
|
-
@indexer.send(:load_blacklist, @blacklist_path)
|
293
|
-
@indexer.send(:blacklist).should be_an_instance_of(Array)
|
294
|
-
@indexer.send(:blacklist).size.should == 2
|
295
|
-
end
|
296
|
-
it "should be empty Array if there was no blacklist config setting" do
|
297
|
-
VCR.use_cassette('empty_array_no_blacklist_config_call') do
|
298
|
-
indexer = Harvestdor::Indexer.new(@config_yml_path, @client_config_path)
|
299
|
-
expect(indexer.blacklist).to eq([])
|
300
|
-
end
|
301
|
-
end
|
302
|
-
context "load_blacklist" do
|
303
|
-
it "knows what is in the blacklist" do
|
304
|
-
VCR.use_cassette('know_what_is_in_blacklist_call') do
|
305
|
-
indexer = Harvestdor::Indexer.new(@config_yml_path, @client_config_path, {:blacklist => @blacklist_path})
|
306
|
-
expect(indexer.blacklist).to eq(["druid:jf275fd6276", "druid:tc552kq0798"])
|
307
|
-
end
|
308
|
-
end
|
309
|
-
it "should not be called if there was no blacklist config setting" do
|
310
|
-
VCR.use_cassette('no_blacklist_config_call') do
|
311
|
-
lambda{
|
312
|
-
indexer = Harvestdor::Indexer.new(@config_yml_path, @client_config_path)
|
313
|
-
|
314
|
-
indexer.should_not_receive(:load_blacklist)
|
315
|
-
|
316
|
-
hdor_client = indexer.send(:harvestdor_client)
|
317
|
-
indexer.dor_fetcher_client.should_receive(:druid_array).and_return([@fake_druid])
|
318
|
-
indexer.solr_client.should_receive(:add)
|
319
|
-
indexer.solr_client.should_receive(:commit)
|
320
|
-
indexer.harvest_and_index
|
321
|
-
}
|
322
|
-
end
|
323
|
-
end
|
324
|
-
it "should only try to load a blacklist once" do
|
325
|
-
VCR.use_cassette('load_blacklist_once_call') do
|
326
|
-
indexer = Harvestdor::Indexer.new(@config_yml_path, @client_config_path, {:blacklist => @blacklist_path})
|
327
|
-
indexer.send(:blacklist)
|
328
|
-
File.any_instance.should_not_receive(:open)
|
329
|
-
indexer.send(:blacklist)
|
330
|
-
end
|
331
|
-
end
|
332
|
-
it "should log an error message and throw RuntimeError if it can't find the indicated blacklist file" do
|
333
|
-
VCR.use_cassette('no_blacklist_found_call') do
|
334
|
-
exp_msg = 'Unable to find list of druids at bad_path'
|
335
|
-
indexer = Harvestdor::Indexer.new(@config_yml_path, @client_config_path, {:blacklist => 'bad_path'})
|
336
|
-
indexer.logger.should_receive(:fatal).with(exp_msg)
|
337
|
-
expect { indexer.send(:load_blacklist, 'bad_path') }.to raise_error(exp_msg)
|
338
|
-
end
|
339
|
-
end
|
340
|
-
end
|
341
|
-
end # blacklist
|
342
98
|
|
343
99
|
context "whitelist" do
|
344
100
|
it "knows what is in the whitelist" do
|
345
101
|
VCR.use_cassette('know_what_is_in_whitelist_call') do
|
346
102
|
lambda{
|
347
|
-
indexer = Harvestdor::Indexer.new(
|
103
|
+
indexer = Harvestdor::Indexer.new({:whitelist => @whitelist_path})
|
348
104
|
expect(indexer.whitelist).to eq(["druid:yg867hg1375", "druid:jf275fd6276", "druid:nz353cp1092"])
|
349
105
|
}
|
350
106
|
end
|
351
107
|
end
|
352
108
|
it "should be an Array with an entry for each non-empty line in the file" do
|
353
109
|
@indexer.send(:load_whitelist, @whitelist_path)
|
354
|
-
@indexer.send(:whitelist).
|
355
|
-
@indexer.send(:whitelist).size.
|
110
|
+
expect(@indexer.send(:whitelist)).to be_an_instance_of(Array)
|
111
|
+
expect(@indexer.send(:whitelist).size).to eq(3)
|
356
112
|
end
|
357
113
|
it "should be empty Array if there was no whitelist config setting" do
|
358
114
|
VCR.use_cassette('empty_array_no_whitelist_config_call') do
|
359
115
|
lambda{
|
360
|
-
indexer = Harvestdor::Indexer.new(
|
116
|
+
indexer = Harvestdor::Indexer.new()
|
361
117
|
expect(indexer.whitelist).to eq([])
|
362
118
|
}
|
363
119
|
end
|
@@ -366,31 +122,31 @@ describe Harvestdor::Indexer do
|
|
366
122
|
it "should not be called if there was no whitelist config setting" do
|
367
123
|
VCR.use_cassette('no_whitelist_config_call') do
|
368
124
|
lambda{
|
369
|
-
indexer = Harvestdor::Indexer.new(
|
125
|
+
indexer = Harvestdor::Indexer.new()
|
370
126
|
|
371
|
-
indexer.
|
127
|
+
expect(indexer).not_to receive(:load_whitelist)
|
372
128
|
|
373
129
|
hdor_client = indexer.send(:harvestdor_client)
|
374
|
-
indexer.dor_fetcher_client.
|
375
|
-
indexer.solr_client.
|
376
|
-
indexer.solr_client.
|
130
|
+
expect(indexer.dor_fetcher_client).to receive(:druid_array).and_return([@fake_druid])
|
131
|
+
expect(indexer.solr_client).to receive(:add)
|
132
|
+
expect(indexer.solr_client).to receive(:commit)
|
377
133
|
indexer.harvest_and_index
|
378
134
|
}
|
379
135
|
end
|
380
136
|
end
|
381
137
|
it "should only try to load a whitelist once" do
|
382
138
|
VCR.use_cassette('load_whitelist_once_call') do
|
383
|
-
indexer = Harvestdor::Indexer.new(
|
139
|
+
indexer = Harvestdor::Indexer.new({:whitelist => @whitelist_path})
|
384
140
|
indexer.send(:whitelist)
|
385
|
-
File.
|
141
|
+
expect_any_instance_of(File).not_to receive(:open)
|
386
142
|
indexer.send(:whitelist)
|
387
143
|
end
|
388
144
|
end
|
389
145
|
it "should log an error message and throw RuntimeError if it can't find the indicated whitelist file" do
|
390
146
|
VCR.use_cassette('cant_find_whitelist_call') do
|
391
147
|
exp_msg = 'Unable to find list of druids at bad_path'
|
392
|
-
indexer = Harvestdor::Indexer.new(@
|
393
|
-
indexer.logger.
|
148
|
+
indexer = Harvestdor::Indexer.new(@config.merge(:whitelist => 'bad_path'))
|
149
|
+
expect(indexer.logger).to receive(:fatal).with(exp_msg)
|
394
150
|
expect { indexer.send(:load_whitelist, 'bad_path') }.to raise_error(exp_msg)
|
395
151
|
end
|
396
152
|
end
|
@@ -399,15 +155,15 @@ describe Harvestdor::Indexer do
|
|
399
155
|
|
400
156
|
it "solr_client should initialize the rsolr client using the options from the config" do
|
401
157
|
VCR.use_cassette('rsolr_client_config_call') do
|
402
|
-
indexer = Harvestdor::Indexer.new(
|
403
|
-
RSolr.
|
404
|
-
indexer.
|
158
|
+
indexer = Harvestdor::Indexer.new(Confstruct::Configuration.new(:solr => { :url => 'http://localhost:2345', :a => 1 }) )
|
159
|
+
expect(RSolr).to receive(:connect).with(hash_including(:a => 1, :url => 'http://localhost:2345')).and_return('foo')
|
160
|
+
indexer.solr
|
405
161
|
end
|
406
162
|
end
|
407
163
|
|
408
164
|
context "skip heartbeat" do
|
409
165
|
it "allows me to use a fake url for dor-fetcher-client" do
|
410
|
-
expect {Harvestdor::Indexer.new(
|
166
|
+
expect {Harvestdor::Indexer.new()}.not_to raise_error
|
411
167
|
end
|
412
168
|
end
|
413
169
|
end
|