harvestdor-indexer 1.0.4 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +4 -2
- data/Gemfile +1 -1
- data/README.rdoc +1 -0
- data/harvestdor-indexer.gemspec +4 -2
- data/lib/harvestdor-indexer.rb +1 -317
- data/lib/harvestdor/indexer.rb +159 -0
- data/lib/harvestdor/indexer/metrics.rb +53 -0
- data/lib/harvestdor/indexer/resource.rb +174 -0
- data/lib/harvestdor/indexer/solr.rb +39 -0
- data/lib/{harvestdor-indexer → harvestdor/indexer}/version.rb +1 -1
- data/spec/config/ap.yml +32 -44
- data/spec/fixtures/vcr_cassettes/get_collection_druids_call.yml +96 -0
- data/spec/fixtures/vcr_cassettes/process_druids_whitelist_call.yml +1494 -16
- data/spec/fixtures/vcr_cassettes/single_rsolr_connection_call.yml +80 -27
- data/spec/spec_helper.rb +1 -1
- data/spec/unit/harvestdor-indexer-resource_spec.rb +174 -0
- data/spec/unit/harvestdor-indexer-solr_spec.rb +32 -0
- data/spec/unit/harvestdor-indexer_spec.rb +47 -291
- data/spec/unit/harvestdor/indexer/metrics_spec.rb +46 -0
- metadata +45 -10
- data/config/dor-fetcher-client.yml +0 -4
- data/spec/config/ap_blacklist.txt +0 -5
@@ -2,47 +2,100 @@
|
|
2
2
|
http_interactions:
|
3
3
|
- request:
|
4
4
|
method: get
|
5
|
-
uri: http://
|
5
|
+
uri: http://purl.stanford.edu/yg867hg1375.xml
|
6
6
|
body:
|
7
7
|
encoding: US-ASCII
|
8
8
|
string: ''
|
9
9
|
headers:
|
10
|
+
Accept-Encoding:
|
11
|
+
- gzip;q=1.0,deflate;q=0.6,identity;q=0.3
|
10
12
|
Accept:
|
11
|
-
-
|
13
|
+
- "*/*"
|
12
14
|
User-Agent:
|
13
15
|
- Ruby
|
14
16
|
response:
|
15
17
|
status:
|
16
18
|
code: 200
|
17
|
-
message:
|
19
|
+
message: ''
|
18
20
|
headers:
|
19
|
-
|
20
|
-
-
|
21
|
-
|
22
|
-
-
|
23
|
-
X-
|
24
|
-
-
|
25
|
-
|
26
|
-
-
|
21
|
+
Date:
|
22
|
+
- Wed, 17 Dec 2014 19:39:37 GMT
|
23
|
+
Server:
|
24
|
+
- Apache/2.2.15 (Red Hat)
|
25
|
+
X-Powered-By:
|
26
|
+
- Phusion Passenger (mod_rails/mod_rack) 3.0.19
|
27
|
+
X-Ua-Compatible:
|
28
|
+
- IE=Edge,chrome=1
|
27
29
|
Etag:
|
28
|
-
- '"
|
30
|
+
- '"67aa6d1481ba1537ae63af5aaf493f84"'
|
29
31
|
Cache-Control:
|
30
32
|
- max-age=0, private, must-revalidate
|
31
|
-
X-Meta-Request-Version:
|
32
|
-
- 0.3.4
|
33
33
|
X-Request-Id:
|
34
|
-
-
|
34
|
+
- f2e753d56bf896cde6e941be0f51d05a
|
35
35
|
X-Runtime:
|
36
|
-
- '0.
|
37
|
-
|
38
|
-
-
|
39
|
-
|
40
|
-
-
|
36
|
+
- '0.007983'
|
37
|
+
X-Rack-Cache:
|
38
|
+
- miss
|
39
|
+
Status:
|
40
|
+
- '200'
|
41
|
+
Content-Length:
|
42
|
+
- '2180'
|
43
|
+
Content-Type:
|
44
|
+
- application/xml; charset=utf-8
|
41
45
|
body:
|
42
|
-
encoding:
|
43
|
-
string:
|
44
|
-
|
45
|
-
|
46
|
+
encoding: UTF-8
|
47
|
+
string: |
|
48
|
+
<publicObject id="druid:yg867hg1375" published="2013-11-11T15:34:32-08:00">
|
49
|
+
<identityMetadata>
|
50
|
+
<objectId>druid:yg867hg1375</objectId>
|
51
|
+
<objectCreator>DOR</objectCreator>
|
52
|
+
<objectLabel>Francis E. Stafford photographs, 1909-1933</objectLabel>
|
53
|
+
<objectType>collection</objectType>
|
54
|
+
<adminPolicy>druid:vb546ms7107</adminPolicy>
|
55
|
+
<otherId name="catkey">9615156</otherId>
|
56
|
+
<otherId name="uuid">8f1feb20-4b29-11e3-8e31-0050569b3c3c</otherId>
|
57
|
+
<tag>Remediated By : 3.25.3</tag>
|
58
|
+
</identityMetadata>
|
59
|
+
<xml/>
|
60
|
+
<rightsMetadata>
|
61
|
+
<access type="discover">
|
62
|
+
<machine>
|
63
|
+
<world/>
|
64
|
+
</machine>
|
65
|
+
</access>
|
66
|
+
<access type="read">
|
67
|
+
<machine>
|
68
|
+
<world/>
|
69
|
+
</machine>
|
70
|
+
</access>
|
71
|
+
<use>
|
72
|
+
<human type="useAndReproduction"/>
|
73
|
+
<human type="creativeCommons"/>
|
74
|
+
<machine type="creativeCommons"/>
|
75
|
+
</use>
|
76
|
+
<copyright>
|
77
|
+
<human/>
|
78
|
+
</copyright>
|
79
|
+
</rightsMetadata>
|
80
|
+
<rdf:RDF xmlns:fedora-model="info:fedora/fedora-system:def/model#" xmlns:hydra="http://projecthydra.org/ns/relations#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
81
|
+
<rdf:Description rdf:about="info:fedora/druid:yg867hg1375">
|
82
|
+
</rdf:Description>
|
83
|
+
</rdf:RDF>
|
84
|
+
<oai_dc:dc xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:srw_dc="info:srw/schema/1/dc-schema" xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd">
|
85
|
+
<dc:title>Francis E. Stafford photographs, 1909-1933</dc:title>
|
86
|
+
<dc:contributor>Stafford, Francis E., 1884-1938</dc:contributor>
|
87
|
+
<dc:type>Collection</dc:type>
|
88
|
+
<dc:date>1909-1933</dc:date>
|
89
|
+
<dc:language>und</dc:language>
|
90
|
+
<dc:format>3 oversize boxes.</dc:format>
|
91
|
+
<dc:description>Photographs of scenes in China, mainly between 1909 and 1915.</dc:description>
|
92
|
+
<dc:rights>Closed. Digital use copies available.</dc:rights>
|
93
|
+
<dc:description type="biographical/historical">American missionary in China, 1909-1915 and 1932-1933.</dc:description>
|
94
|
+
<dc:coverage>China</dc:coverage>
|
95
|
+
</oai_dc:dc>
|
96
|
+
</publicObject>
|
97
|
+
http_version:
|
98
|
+
recorded_at: Wed, 17 Dec 2014 19:39:38 GMT
|
46
99
|
- request:
|
47
100
|
method: get
|
48
101
|
uri: http://127.0.0.1:3000/collections/yg867hg1375
|
@@ -74,9 +127,9 @@ http_interactions:
|
|
74
127
|
X-Meta-Request-Version:
|
75
128
|
- 0.3.4
|
76
129
|
X-Request-Id:
|
77
|
-
-
|
130
|
+
- 125a9964-6326-4114-9f59-fb533551d554
|
78
131
|
X-Runtime:
|
79
|
-
- '0.
|
132
|
+
- '0.011086'
|
80
133
|
Connection:
|
81
134
|
- close
|
82
135
|
Server:
|
@@ -95,5 +148,5 @@ http_interactions:
|
|
95
148
|
B: Photographs of China''s natural landscapes, urban scenes, cultural landmarks,
|
96
149
|
social customs, and people."}],"counts":{"collections":1,"items":5,"total_count":6}}'
|
97
150
|
http_version:
|
98
|
-
recorded_at: Wed, 12 Nov 2014 19:34:
|
151
|
+
recorded_at: Wed, 12 Nov 2014 19:34:03 GMT
|
99
152
|
recorded_with: VCR 2.9.3
|
data/spec/spec_helper.rb
CHANGED
@@ -0,0 +1,174 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Harvestdor::Indexer::Resource do
|
4
|
+
|
5
|
+
before(:all) do
|
6
|
+
VCR.use_cassette('before_all_call') do
|
7
|
+
@config_yml_path = File.join(File.dirname(__FILE__), "..", "config", "ap.yml")
|
8
|
+
require 'yaml'
|
9
|
+
@config = YAML.load_file(@config_yml_path)
|
10
|
+
@fake_druid = 'oo000oo0000'
|
11
|
+
|
12
|
+
@indexer = Harvestdor::Indexer.new(@config)
|
13
|
+
@hdor_client = @indexer.send(:harvestdor_client)
|
14
|
+
@whitelist_path = File.join(File.dirname(__FILE__), "../config/ap_whitelist.txt")
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
let :resource do
|
19
|
+
Harvestdor::Indexer::Resource.new(@indexer, @fake_druid)
|
20
|
+
end
|
21
|
+
|
22
|
+
context "smods_rec method" do
|
23
|
+
before(:all) do
|
24
|
+
@ns_decl = "xmlns='#{Mods::MODS_NS}'"
|
25
|
+
@mods_xml = "<mods #{@ns_decl}><note>hi</note></mods>"
|
26
|
+
@ng_mods_xml = Nokogiri::XML(@mods_xml)
|
27
|
+
end
|
28
|
+
it "should call mods method on harvestdor_client" do
|
29
|
+
expect(@hdor_client).to receive(:mods).with(@fake_druid).and_return(@ng_mods_xml)
|
30
|
+
resource.smods_rec
|
31
|
+
end
|
32
|
+
it "should return Stanford::Mods::Record object" do
|
33
|
+
expect(@hdor_client).to receive(:mods).with(@fake_druid).and_return(@ng_mods_xml)
|
34
|
+
expect(resource.smods_rec).to be_an_instance_of(Stanford::Mods::Record)
|
35
|
+
end
|
36
|
+
it "should raise exception if MODS xml for the druid is empty" do
|
37
|
+
allow(@hdor_client).to receive(:mods).with(@fake_druid).and_return(Nokogiri::XML("<mods #{@ns_decl}/>"))
|
38
|
+
expect { resource.smods_rec }.to raise_error(RuntimeError, Regexp.new("^Empty MODS metadata for #{@fake_druid}: <"))
|
39
|
+
end
|
40
|
+
it "should raise exception if there is no MODS xml for the druid" do
|
41
|
+
VCR.use_cassette('exception_no_MODS_call') do
|
42
|
+
expect { resource.smods_rec }.to raise_error(Harvestdor::Errors::MissingMods)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
context "public_xml related methods" do
|
48
|
+
before(:all) do
|
49
|
+
@id_md_xml = "<identityMetadata><objectId>druid:#{@fake_druid}</objectId></identityMetadata>"
|
50
|
+
@cntnt_md_xml = "<contentMetadata type='image' objectId='#{@fake_druid}'>foo</contentMetadata>"
|
51
|
+
@rights_md_xml = "<rightsMetadata><access type=\"discover\"><machine><world>bar</world></machine></access></rightsMetadata>"
|
52
|
+
@rdf_xml = "<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'><rdf:Description rdf:about=\"info:fedora/druid:#{@fake_druid}\">relationship!</rdf:Description></rdf:RDF>"
|
53
|
+
@pub_xml = "<publicObject id='druid:#{@fake_druid}'>#{@id_md_xml}#{@cntnt_md_xml}#{@rights_md_xml}#{@rdf_xml}</publicObject>"
|
54
|
+
@ng_pub_xml = Nokogiri::XML(@pub_xml)
|
55
|
+
end
|
56
|
+
context "#public_xml" do
|
57
|
+
it "should call public_xml method on harvestdor_client" do
|
58
|
+
expect(@hdor_client).to receive(:public_xml).with(@fake_druid).and_return(@ng_pub_xml)
|
59
|
+
resource.public_xml
|
60
|
+
end
|
61
|
+
it "retrieves entire public xml as a Nokogiri::XML::Document" do
|
62
|
+
expect(@hdor_client).to receive(:public_xml).with(@fake_druid).and_return(@ng_pub_xml)
|
63
|
+
px = resource.public_xml
|
64
|
+
expect(px).to be_kind_of(Nokogiri::XML::Document)
|
65
|
+
expect(px.root.name).to eq('publicObject')
|
66
|
+
expect(px.root.attributes['id'].text).to eq("druid:#{@fake_druid}")
|
67
|
+
end
|
68
|
+
it "raises exception if public xml for the druid is empty" do
|
69
|
+
expect(@hdor_client).to receive(:public_xml).with(@fake_druid).and_return(Nokogiri::XML("<publicObject/>"))
|
70
|
+
expect { resource.public_xml }.to raise_error(RuntimeError, Regexp.new("^Empty public xml for #{@fake_druid}: <"))
|
71
|
+
end
|
72
|
+
it "raises error if there is no public_xml page for the druid" do
|
73
|
+
expect(@hdor_client).to receive(:public_xml).with(@fake_druid).and_return(nil)
|
74
|
+
expect { resource.public_xml }.to raise_error(RuntimeError, "No public xml for #{@fake_druid}")
|
75
|
+
end
|
76
|
+
end
|
77
|
+
context "#content_metadata" do
|
78
|
+
it "returns a Nokogiri::XML::Document derived from the public xml if a druid is passed" do
|
79
|
+
allow(Harvestdor).to receive(:public_xml).with(@fake_druid, @indexer.config.harvestdor.purl).and_return(@ng_pub_xml)
|
80
|
+
cm = resource.content_metadata
|
81
|
+
expect(cm).to be_kind_of(Nokogiri::XML::Document)
|
82
|
+
expect(cm.root).not_to eq(nil)
|
83
|
+
expect(cm.root.name).to eq('contentMetadata')
|
84
|
+
expect(cm.root.attributes['objectId'].text).to eq(@fake_druid)
|
85
|
+
expect(cm.root.text.strip).to eq('foo')
|
86
|
+
end
|
87
|
+
it "raises RuntimeError if nil is returned by Harvestdor::Client.contentMetadata for the druid" do
|
88
|
+
expect(@hdor_client).to receive(:content_metadata).with(@fake_druid).and_return(nil)
|
89
|
+
expect { resource.content_metadata }.to raise_error(RuntimeError, "No contentMetadata for \"#{@fake_druid}\"")
|
90
|
+
end
|
91
|
+
end
|
92
|
+
context "#identity_metadata" do
|
93
|
+
it "returns a Nokogiri::XML::Document derived from the public xml if a druid is passed" do
|
94
|
+
allow(Harvestdor).to receive(:public_xml).with(@fake_druid, @indexer.config.harvestdor.purl).and_return(@ng_pub_xml)
|
95
|
+
im = resource.identity_metadata
|
96
|
+
expect(im).to be_kind_of(Nokogiri::XML::Document)
|
97
|
+
expect(im.root).not_to eq(nil)
|
98
|
+
expect(im.root.name).to eq('identityMetadata')
|
99
|
+
expect(im.root.text.strip).to eq("druid:#{@fake_druid}")
|
100
|
+
end
|
101
|
+
it "raises RuntimeError if nil is returned by Harvestdor::Client.identityMetadata for the druid" do
|
102
|
+
expect(@hdor_client).to receive(:identity_metadata).with(@fake_druid).and_return(nil)
|
103
|
+
expect { resource.identity_metadata }.to raise_error(RuntimeError, "No identityMetadata for \"#{@fake_druid}\"")
|
104
|
+
end
|
105
|
+
end
|
106
|
+
context "#rights_metadata" do
|
107
|
+
it "returns a Nokogiri::XML::Document derived from the public xml if a druid is passed" do
|
108
|
+
allow(Harvestdor).to receive(:public_xml).with(@fake_druid, @indexer.config.harvestdor.purl).and_return(@ng_pub_xml)
|
109
|
+
im = resource.rights_metadata
|
110
|
+
expect(im).to be_kind_of(Nokogiri::XML::Document)
|
111
|
+
expect(im.root).not_to eq(nil)
|
112
|
+
expect(im.root.name).to eq('rightsMetadata')
|
113
|
+
expect(im.root.text.strip).to eq("bar")
|
114
|
+
end
|
115
|
+
it "raises RuntimeError if nil is returned by Harvestdor::Client.rightsMetadata for the druid" do
|
116
|
+
expect(@hdor_client).to receive(:rights_metadata).with(@fake_druid).and_return(nil)
|
117
|
+
expect { resource.rights_metadata }.to raise_error(RuntimeError, "No rightsMetadata for \"#{@fake_druid}\"")
|
118
|
+
end
|
119
|
+
end
|
120
|
+
context "#rdf" do
|
121
|
+
it "returns a Nokogiri::XML::Document derived from the public xml if a druid is passed" do
|
122
|
+
allow(Harvestdor).to receive(:public_xml).with(@fake_druid, @indexer.config.harvestdor.purl).and_return(@ng_pub_xml)
|
123
|
+
im = resource.rdf
|
124
|
+
expect(im).to be_kind_of(Nokogiri::XML::Document)
|
125
|
+
expect(im.root).not_to eq(nil)
|
126
|
+
expect(im.root.name).to eq('RDF')
|
127
|
+
expect(im.root.text.strip).to eq("relationship!")
|
128
|
+
end
|
129
|
+
it "raises RuntimeError if nil is returned by Harvestdor::Client.rdf for the druid" do
|
130
|
+
expect(@hdor_client).to receive(:rdf).with(@fake_druid).and_return(nil)
|
131
|
+
expect { resource.rdf }.to raise_error(RuntimeError, "No RDF for \"#{@fake_druid}\"")
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
describe "#public_xml_or_druid" do
|
136
|
+
it "should return the public_xml, if the public_xml has been loaded" do
|
137
|
+
allow(resource).to receive(:public_xml?).and_return(true)
|
138
|
+
allow(resource).to receive(:public_xml).and_return(double)
|
139
|
+
expect(resource.public_xml_or_druid).to eq resource.public_xml
|
140
|
+
end
|
141
|
+
it "should return the druid, if the public_xml has not been loaded" do
|
142
|
+
allow(resource).to receive(:public_xml?).and_return(false)
|
143
|
+
expect(resource.public_xml_or_druid).to eq @fake_druid
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
describe "#identity_md_obj_label" do
|
148
|
+
it "should extract the objectLabel from the identity metadata" do
|
149
|
+
allow(resource).to receive(:identity_metadata).and_return(Nokogiri::XML("<identityMetadata><objectLabel>label</objectLabel></identityMetadata>"))
|
150
|
+
expect(resource.identity_md_obj_label).to eq "label"
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
describe "#collections" do
|
155
|
+
it "should extract the collection this resource is a member of and return Resource objects for those collections" do
|
156
|
+
allow(resource).to receive(:public_xml).and_return(Nokogiri::XML <<-EOF
|
157
|
+
<publicObject>
|
158
|
+
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:fedora="info:fedora/fedora-system:def/relations-external#">
|
159
|
+
<rdf:Description>
|
160
|
+
<fedora:isMemberOfCollection rdf:resource="some:druid" />
|
161
|
+
</rdf:Description>
|
162
|
+
</rdf:RDF>
|
163
|
+
</publicObject>
|
164
|
+
EOF
|
165
|
+
)
|
166
|
+
|
167
|
+
expect(resource.collections.length).to eq 1
|
168
|
+
expect(resource.collections.first.druid).to eq "some:druid"
|
169
|
+
expect(resource.collections.first.indexer).to eq resource.indexer
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Harvestdor::Indexer::Solr do
|
4
|
+
let :indexer do
|
5
|
+
double(logger: Logger.new("/dev/null"))
|
6
|
+
end
|
7
|
+
|
8
|
+
let :solr do
|
9
|
+
Harvestdor::Indexer::Solr.new indexer
|
10
|
+
end
|
11
|
+
|
12
|
+
# The method that sends the solr document to solr
|
13
|
+
describe "#add" do
|
14
|
+
let(:doc_hash) do
|
15
|
+
{
|
16
|
+
:id => "whatever",
|
17
|
+
:modsxml => 'whatever',
|
18
|
+
:title_display => 'title',
|
19
|
+
:pub_year_tisim => 'some year',
|
20
|
+
:author_person_display => 'author',
|
21
|
+
:format => 'Image',
|
22
|
+
:language => 'English'
|
23
|
+
}
|
24
|
+
end
|
25
|
+
|
26
|
+
it "sends an add request to the solr_client" do
|
27
|
+
expect(solr.client).to receive(:add).with(doc_hash)
|
28
|
+
solr.add(doc_hash)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
@@ -5,61 +5,37 @@ describe Harvestdor::Indexer do
|
|
5
5
|
before(:all) do
|
6
6
|
VCR.use_cassette('before_all_call') do
|
7
7
|
@config_yml_path = File.join(File.dirname(__FILE__), "..", "config", "ap.yml")
|
8
|
-
@client_config_path = File.join(File.dirname(__FILE__), "../..", "config", "dor-fetcher-client.yml")
|
9
|
-
@indexer = Harvestdor::Indexer.new(@config_yml_path, @client_config_path)
|
10
8
|
require 'yaml'
|
11
|
-
@
|
9
|
+
@config = YAML.load_file(@config_yml_path)
|
10
|
+
|
11
|
+
@indexer = Harvestdor::Indexer.new(@config) do |config|
|
12
|
+
config.whitelist = ["druid:yg867hg1375"]
|
13
|
+
end
|
12
14
|
@hdor_client = @indexer.send(:harvestdor_client)
|
13
|
-
@fake_druid = 'oo000oo0000'
|
14
|
-
@blacklist_path = File.join(File.dirname(__FILE__), "../config/ap_blacklist.txt")
|
15
|
+
@fake_druid = 'druid:oo000oo0000'
|
15
16
|
@whitelist_path = File.join(File.dirname(__FILE__), "../config/ap_whitelist.txt")
|
16
17
|
end
|
17
18
|
end
|
18
|
-
|
19
|
-
# The method that sends the solr document to solr
|
20
|
-
describe "#solr_add" do
|
21
|
-
before(:each) do
|
22
|
-
doc_hash = {
|
23
|
-
:modsxml => 'whatever',
|
24
|
-
:title_display => 'title',
|
25
|
-
:pub_year_tisim => 'some year',
|
26
|
-
:author_person_display => 'author',
|
27
|
-
:format => 'Image',
|
28
|
-
:language => 'English'
|
29
|
-
}
|
30
|
-
end
|
31
|
-
it "sends an add request to the solr_client" do
|
32
|
-
expect(@indexer.solr_client).to receive(:add)
|
33
|
-
@indexer.solr_add(@doc_hash, "abc123")
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
19
|
+
|
37
20
|
describe "access methods" do
|
38
21
|
it "initializes success count" do
|
39
|
-
@indexer.success_count.
|
22
|
+
expect(@indexer.metrics.success_count).to eq(0)
|
40
23
|
end
|
41
24
|
it "initializes error count" do
|
42
|
-
@indexer.error_count.
|
43
|
-
end
|
44
|
-
it "initializes max_retries" do
|
45
|
-
expect(@indexer.max_retries).to eql(10)
|
46
|
-
end
|
47
|
-
it "allows overriding of max_retries" do
|
48
|
-
@indexer.max_retries=6
|
49
|
-
@indexer.max_retries.should == 6
|
25
|
+
expect(@indexer.metrics.error_count).to eq(0)
|
50
26
|
end
|
51
27
|
end
|
52
28
|
|
53
29
|
describe "logging" do
|
54
30
|
it "should write the log file to the directory indicated by log_dir" do
|
55
31
|
@indexer.logger.info("indexer_spec logging test message")
|
56
|
-
File.exists?(File.join(@
|
32
|
+
expect(File.exists?(File.join(@config['harvestdor']['log_dir'], @config['harvestdor']['log_name']))).to eq(true)
|
57
33
|
end
|
58
34
|
end
|
59
35
|
|
60
36
|
it "should initialize the harvestdor_client from the config" do
|
61
37
|
expect(@hdor_client).to be_an_instance_of(Harvestdor::Client)
|
62
|
-
expect(@hdor_client.config.default_set).to eq(@
|
38
|
+
expect(@hdor_client.config.default_set).to eq(@config['harvestdor']['default_set'])
|
63
39
|
end
|
64
40
|
|
65
41
|
context "harvest_and_index" do
|
@@ -69,63 +45,34 @@ describe Harvestdor::Indexer do
|
|
69
45
|
}
|
70
46
|
end
|
71
47
|
it "should call dor_fetcher_client.druid_array and then call :add on rsolr connection" do
|
72
|
-
|
73
|
-
@indexer.
|
74
|
-
@indexer.
|
48
|
+
allow_any_instance_of(Harvestdor::Indexer::Resource).to receive(:collection?).and_return(false)
|
49
|
+
expect(@indexer).to receive(:druids).and_return([@fake_druid])
|
50
|
+
expect(@indexer.solr).to receive(:add).with(@doc_hash)
|
51
|
+
expect(@indexer.solr).to receive(:commit!)
|
75
52
|
@indexer.harvest_and_index
|
76
53
|
end
|
77
54
|
|
78
55
|
it "should only call :commit on rsolr connection once" do
|
79
56
|
VCR.use_cassette('single_rsolr_connection_call') do
|
80
|
-
|
81
|
-
|
82
|
-
indexer.
|
83
|
-
indexer.
|
84
|
-
indexer.
|
85
|
-
indexer.harvest_and_index
|
57
|
+
hdor_client = @indexer.send(:harvestdor_client)
|
58
|
+
expect(@indexer.dor_fetcher_client).to receive(:druid_array).and_return(["druid:yg867hg1375", "druid:jf275fd6276", "druid:nz353cp1092", "druid:tc552kq0798", "druid:th998nk0722", "druid:ww689vs6534"])
|
59
|
+
expect(@indexer.solr).to receive(:add).exactly(6).times
|
60
|
+
expect(@indexer.solr).to receive(:commit!).once
|
61
|
+
@indexer.harvest_and_index
|
86
62
|
end
|
87
63
|
end
|
88
64
|
|
89
|
-
it "should not process druids in blacklist" do
|
90
|
-
VCR.use_cassette('ignore_druids_in_blacklist_call') do
|
91
|
-
lambda{
|
92
|
-
indexer = Harvestdor::Indexer.new(@config_yml_path, @client_config_path, {:blacklist => @blacklist_path})
|
93
|
-
hdor_client = indexer.send(:harvestdor_client)
|
94
|
-
indexer.dor_fetcher_client.should_receive(:druid_array).and_return(["druid:yg867hg1375", "druid:jf275fd6276", "druid:nz353cp1092", "druid:tc552kq0798", "druid:th998nk0722", "druid:ww689vs6534"])
|
95
|
-
indexer.solr_client.should_receive(:add).with(hash_including({:id => 'druid:nz353cp1092'}))
|
96
|
-
indexer.solr_client.should_not_receive(:add).with(hash_including({:id => 'druid:jf275fd6276'}))
|
97
|
-
indexer.solr_client.should_not_receive(:add).with(hash_including({:id => 'druid:tc552kq0798'}))
|
98
|
-
indexer.solr_client.should_receive(:add).with(hash_including({:id => 'druid:th998nk0722'}))
|
99
|
-
indexer.solr_client.should_receive(:commit)
|
100
|
-
indexer.harvest_and_index
|
101
|
-
}
|
102
|
-
end
|
103
|
-
end
|
104
|
-
it "should not process druid if it is in both blacklist and whitelist" do
|
105
|
-
VCR.use_cassette('ignore_druids_in_blacklist_and_whitelist_call') do
|
106
|
-
lambda{
|
107
|
-
indexer = Harvestdor::Indexer.new(@config_yml_path, @client_config_path, {:blacklist => @blacklist_path, :whitelist => @whitelist_path})
|
108
|
-
hdor_client = indexer.send(:harvestdor_client)
|
109
|
-
indexer.dor_fetcher_client.should_not_receive(:druid_array)
|
110
|
-
indexer.solr_client.should_receive(:add).with(hash_including({:id => 'druid:yg867hg1375'}))
|
111
|
-
indexer.solr_client.should_not_receive(:add).with(hash_including({:id => 'druid:jf275fd6276'}))
|
112
|
-
indexer.solr_client.should_receive(:commit)
|
113
|
-
indexer.harvest_and_index
|
114
|
-
}
|
115
|
-
end
|
116
|
-
end
|
117
65
|
it "should only process druids in whitelist if it exists" do
|
118
66
|
VCR.use_cassette('process_druids_whitelist_call') do
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
indexer.solr_client.should_receive(:add).with(hash_including({:id => 'druid:jf275fd6276'}))
|
125
|
-
indexer.solr_client.should_receive(:add).with(hash_including({:id => 'druid:nz353cp1092'}))
|
126
|
-
indexer.solr_client.should_receive(:commit)
|
127
|
-
indexer.harvest_and_index
|
67
|
+
indexer = Harvestdor::Indexer.new(@config.merge(:whitelist => @whitelist_path))
|
68
|
+
hdor_client = indexer.send(:harvestdor_client)
|
69
|
+
added = []
|
70
|
+
allow(indexer.solr).to receive(:add) { |hash|
|
71
|
+
added << hash[:id]
|
128
72
|
}
|
73
|
+
expect(indexer.solr).to receive(:commit!)
|
74
|
+
indexer.harvest_and_index
|
75
|
+
expect(added).to match_array ["druid:tc552kq0798", "druid:th998nk0722", "druid:ww689vs6534", "druid:yg867hg1375", 'druid:jf275fd6276', 'druid:nz353cp1092']
|
129
76
|
end
|
130
77
|
end
|
131
78
|
|
@@ -137,227 +84,36 @@ describe Harvestdor::Indexer do
|
|
137
84
|
expect(@indexer.dor_fetcher_client).to be_an_instance_of(DorFetcher::Client)
|
138
85
|
end
|
139
86
|
|
140
|
-
it "should strip off is_member_of_collection_ and is_governed_by_ and return only the druid" do
|
141
|
-
expect(@indexer.strip_default_set_string()).to eq("yg867hg1375")
|
142
|
-
end
|
143
|
-
|
144
87
|
it "druids method should call druid_array and get_collection methods on fetcher_client" do
|
145
88
|
VCR.use_cassette('get_collection_druids_call') do
|
146
|
-
expect(@indexer.
|
89
|
+
expect(@indexer.resources.map(&:druid)).to match_array ["druid:yg867hg1375", "druid:jf275fd6276", "druid:nz353cp1092", "druid:tc552kq0798", "druid:th998nk0722", "druid:ww689vs6534"]
|
147
90
|
end
|
148
91
|
end
|
149
92
|
|
150
93
|
it "should get the configuration of the dor-fetcher client from included yml file" do
|
151
|
-
expect(@indexer.dor_fetcher_client.service_url).to eq(
|
94
|
+
expect(@indexer.dor_fetcher_client.service_url).to eq("http://127.0.0.1:3000")
|
152
95
|
end
|
153
96
|
|
154
97
|
end # ending replacing OAI context
|
155
|
-
|
156
|
-
context "smods_rec method" do
|
157
|
-
before(:all) do
|
158
|
-
@fake_druid = 'oo000oo0000'
|
159
|
-
@ns_decl = "xmlns='#{Mods::MODS_NS}'"
|
160
|
-
@mods_xml = "<mods #{@ns_decl}><note>hi</note></mods>"
|
161
|
-
@ng_mods_xml = Nokogiri::XML(@mods_xml)
|
162
|
-
end
|
163
|
-
it "should call mods method on harvestdor_client" do
|
164
|
-
@hdor_client.should_receive(:mods).with(@fake_druid).and_return(@ng_mods_xml)
|
165
|
-
@indexer.smods_rec(@fake_druid)
|
166
|
-
end
|
167
|
-
it "should return Stanford::Mods::Record object" do
|
168
|
-
@hdor_client.should_receive(:mods).with(@fake_druid).and_return(@ng_mods_xml)
|
169
|
-
@indexer.smods_rec(@fake_druid).should be_an_instance_of(Stanford::Mods::Record)
|
170
|
-
end
|
171
|
-
it "should raise exception if MODS xml for the druid is empty" do
|
172
|
-
@hdor_client.stub(:mods).with(@fake_druid).and_return(Nokogiri::XML("<mods #{@ns_decl}/>"))
|
173
|
-
expect { @indexer.smods_rec(@fake_druid) }.to raise_error(RuntimeError, Regexp.new("^Empty MODS metadata for #{@fake_druid}: <"))
|
174
|
-
end
|
175
|
-
it "should raise exception if there is no MODS xml for the druid" do
|
176
|
-
VCR.use_cassette('exception_no_MODS_call') do
|
177
|
-
expect { @indexer.smods_rec(@fake_druid) }.to raise_error(Harvestdor::Errors::MissingMods)
|
178
|
-
end
|
179
|
-
end
|
180
|
-
end
|
181
|
-
|
182
|
-
context "public_xml related methods" do
|
183
|
-
before(:all) do
|
184
|
-
@id_md_xml = "<identityMetadata><objectId>druid:#{@fake_druid}</objectId></identityMetadata>"
|
185
|
-
@cntnt_md_xml = "<contentMetadata type='image' objectId='#{@fake_druid}'>foo</contentMetadata>"
|
186
|
-
@rights_md_xml = "<rightsMetadata><access type=\"discover\"><machine><world>bar</world></machine></access></rightsMetadata>"
|
187
|
-
@rdf_xml = "<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'><rdf:Description rdf:about=\"info:fedora/druid:#{@fake_druid}\">relationship!</rdf:Description></rdf:RDF>"
|
188
|
-
@pub_xml = "<publicObject id='druid:#{@fake_druid}'>#{@id_md_xml}#{@cntnt_md_xml}#{@rights_md_xml}#{@rdf_xml}</publicObject>"
|
189
|
-
@ng_pub_xml = Nokogiri::XML(@pub_xml)
|
190
|
-
end
|
191
|
-
context "#public_xml" do
|
192
|
-
it "should call public_xml method on harvestdor_client" do
|
193
|
-
@hdor_client.should_receive(:public_xml).with(@fake_druid).and_return(@ng_pub_xml)
|
194
|
-
@indexer.public_xml @fake_druid
|
195
|
-
end
|
196
|
-
it "retrieves entire public xml as a Nokogiri::XML::Document" do
|
197
|
-
@hdor_client.should_receive(:public_xml).with(@fake_druid).and_return(@ng_pub_xml)
|
198
|
-
px = @indexer.public_xml @fake_druid
|
199
|
-
px.should be_kind_of(Nokogiri::XML::Document)
|
200
|
-
px.root.name.should == 'publicObject'
|
201
|
-
px.root.attributes['id'].text.should == "druid:#{@fake_druid}"
|
202
|
-
end
|
203
|
-
it "raises exception if public xml for the druid is empty" do
|
204
|
-
@hdor_client.should_receive(:public_xml).with(@fake_druid).and_return(Nokogiri::XML("<publicObject/>"))
|
205
|
-
expect { @indexer.public_xml(@fake_druid) }.to raise_error(RuntimeError, Regexp.new("^Empty public xml for #{@fake_druid}: <"))
|
206
|
-
end
|
207
|
-
it "raises error if there is no public_xml page for the druid" do
|
208
|
-
@hdor_client.should_receive(:public_xml).with(@fake_druid).and_return(nil)
|
209
|
-
expect { @indexer.public_xml(@fake_druid) }.to raise_error(RuntimeError, "No public xml for #{@fake_druid}")
|
210
|
-
end
|
211
|
-
end
|
212
|
-
context "#content_metadata" do
|
213
|
-
it "returns a Nokogiri::XML::Document derived from the public xml if a druid is passed" do
|
214
|
-
Harvestdor.stub(:public_xml).with(@fake_druid, @indexer.config.purl).and_return(@ng_pub_xml)
|
215
|
-
cm = @indexer.content_metadata(@fake_druid)
|
216
|
-
cm.should be_kind_of(Nokogiri::XML::Document)
|
217
|
-
cm.root.should_not == nil
|
218
|
-
cm.root.name.should == 'contentMetadata'
|
219
|
-
cm.root.attributes['objectId'].text.should == @fake_druid
|
220
|
-
cm.root.text.strip.should == 'foo'
|
221
|
-
end
|
222
|
-
it "if passed a Nokogiri::XML::Document of the public xml, it does no fetch" do
|
223
|
-
URI::HTTP.any_instance.should_not_receive(:open)
|
224
|
-
@hdor_client.should_receive(:content_metadata).and_call_original
|
225
|
-
cm = @indexer.content_metadata(@ng_pub_xml)
|
226
|
-
cm.should be_kind_of(Nokogiri::XML::Document)
|
227
|
-
cm.root.should_not == nil
|
228
|
-
cm.root.name.should == 'contentMetadata'
|
229
|
-
cm.root.attributes['objectId'].text.should == @fake_druid
|
230
|
-
cm.root.text.strip.should == 'foo'
|
231
|
-
end
|
232
|
-
it "raises RuntimeError if nil is returned by Harvestdor::Client.contentMetadata for the druid" do
|
233
|
-
@hdor_client.should_receive(:content_metadata).with(@fake_druid).and_return(nil)
|
234
|
-
expect { @indexer.content_metadata(@fake_druid) }.to raise_error(RuntimeError, "No contentMetadata for \"#{@fake_druid}\"")
|
235
|
-
end
|
236
|
-
end
|
237
|
-
context "#identity_metadata" do
|
238
|
-
it "returns a Nokogiri::XML::Document derived from the public xml if a druid is passed" do
|
239
|
-
Harvestdor.stub(:public_xml).with(@fake_druid, @indexer.config.purl).and_return(@ng_pub_xml)
|
240
|
-
im = @indexer.identity_metadata(@fake_druid)
|
241
|
-
im.should be_kind_of(Nokogiri::XML::Document)
|
242
|
-
im.root.should_not == nil
|
243
|
-
im.root.name.should == 'identityMetadata'
|
244
|
-
im.root.text.strip.should == "druid:#{@fake_druid}"
|
245
|
-
end
|
246
|
-
it "if passed a Nokogiri::XML::Document of the public xml, it does no fetch" do
|
247
|
-
URI::HTTP.any_instance.should_not_receive(:open)
|
248
|
-
@hdor_client.should_receive(:identity_metadata).and_call_original
|
249
|
-
im = @indexer.identity_metadata(@ng_pub_xml)
|
250
|
-
im.should be_kind_of(Nokogiri::XML::Document)
|
251
|
-
im.root.should_not == nil
|
252
|
-
im.root.name.should == 'identityMetadata'
|
253
|
-
im.root.text.strip.should == "druid:#{@fake_druid}"
|
254
|
-
end
|
255
|
-
it "raises RuntimeError if nil is returned by Harvestdor::Client.identityMetadata for the druid" do
|
256
|
-
@hdor_client.should_receive(:identity_metadata).with(@fake_druid).and_return(nil)
|
257
|
-
expect { @indexer.identity_metadata(@fake_druid) }.to raise_error(RuntimeError, "No identityMetadata for \"#{@fake_druid}\"")
|
258
|
-
end
|
259
|
-
end
|
260
|
-
context "#rights_metadata" do
|
261
|
-
it "returns a Nokogiri::XML::Document derived from the public xml if a druid is passed" do
|
262
|
-
Harvestdor.stub(:public_xml).with(@fake_druid, @indexer.config.purl).and_return(@ng_pub_xml)
|
263
|
-
im = @indexer.rights_metadata(@fake_druid)
|
264
|
-
im.should be_kind_of(Nokogiri::XML::Document)
|
265
|
-
im.root.should_not == nil
|
266
|
-
im.root.name.should == 'rightsMetadata'
|
267
|
-
im.root.text.strip.should == "bar"
|
268
|
-
end
|
269
|
-
it "raises RuntimeError if nil is returned by Harvestdor::Client.rightsMetadata for the druid" do
|
270
|
-
@hdor_client.should_receive(:rights_metadata).with(@fake_druid).and_return(nil)
|
271
|
-
expect { @indexer.rights_metadata(@fake_druid) }.to raise_error(RuntimeError, "No rightsMetadata for \"#{@fake_druid}\"")
|
272
|
-
end
|
273
|
-
end
|
274
|
-
context "#rdf" do
|
275
|
-
it "returns a Nokogiri::XML::Document derived from the public xml if a druid is passed" do
|
276
|
-
Harvestdor.stub(:public_xml).with(@fake_druid, @indexer.config.purl).and_return(@ng_pub_xml)
|
277
|
-
im = @indexer.rdf(@fake_druid)
|
278
|
-
im.should be_kind_of(Nokogiri::XML::Document)
|
279
|
-
im.root.should_not == nil
|
280
|
-
im.root.name.should == 'RDF'
|
281
|
-
im.root.text.strip.should == "relationship!"
|
282
|
-
end
|
283
|
-
it "raises RuntimeError if nil is returned by Harvestdor::Client.rdf for the druid" do
|
284
|
-
@hdor_client.should_receive(:rdf).with(@fake_druid).and_return(nil)
|
285
|
-
expect { @indexer.rdf(@fake_druid) }.to raise_error(RuntimeError, "No RDF for \"#{@fake_druid}\"")
|
286
|
-
end
|
287
|
-
end
|
288
|
-
end
|
289
|
-
|
290
|
-
context "blacklist" do
|
291
|
-
it "should be an Array with an entry for each non-empty line in the file" do
|
292
|
-
@indexer.send(:load_blacklist, @blacklist_path)
|
293
|
-
@indexer.send(:blacklist).should be_an_instance_of(Array)
|
294
|
-
@indexer.send(:blacklist).size.should == 2
|
295
|
-
end
|
296
|
-
it "should be empty Array if there was no blacklist config setting" do
|
297
|
-
VCR.use_cassette('empty_array_no_blacklist_config_call') do
|
298
|
-
indexer = Harvestdor::Indexer.new(@config_yml_path, @client_config_path)
|
299
|
-
expect(indexer.blacklist).to eq([])
|
300
|
-
end
|
301
|
-
end
|
302
|
-
context "load_blacklist" do
|
303
|
-
it "knows what is in the blacklist" do
|
304
|
-
VCR.use_cassette('know_what_is_in_blacklist_call') do
|
305
|
-
indexer = Harvestdor::Indexer.new(@config_yml_path, @client_config_path, {:blacklist => @blacklist_path})
|
306
|
-
expect(indexer.blacklist).to eq(["druid:jf275fd6276", "druid:tc552kq0798"])
|
307
|
-
end
|
308
|
-
end
|
309
|
-
it "should not be called if there was no blacklist config setting" do
|
310
|
-
VCR.use_cassette('no_blacklist_config_call') do
|
311
|
-
lambda{
|
312
|
-
indexer = Harvestdor::Indexer.new(@config_yml_path, @client_config_path)
|
313
|
-
|
314
|
-
indexer.should_not_receive(:load_blacklist)
|
315
|
-
|
316
|
-
hdor_client = indexer.send(:harvestdor_client)
|
317
|
-
indexer.dor_fetcher_client.should_receive(:druid_array).and_return([@fake_druid])
|
318
|
-
indexer.solr_client.should_receive(:add)
|
319
|
-
indexer.solr_client.should_receive(:commit)
|
320
|
-
indexer.harvest_and_index
|
321
|
-
}
|
322
|
-
end
|
323
|
-
end
|
324
|
-
it "should only try to load a blacklist once" do
|
325
|
-
VCR.use_cassette('load_blacklist_once_call') do
|
326
|
-
indexer = Harvestdor::Indexer.new(@config_yml_path, @client_config_path, {:blacklist => @blacklist_path})
|
327
|
-
indexer.send(:blacklist)
|
328
|
-
File.any_instance.should_not_receive(:open)
|
329
|
-
indexer.send(:blacklist)
|
330
|
-
end
|
331
|
-
end
|
332
|
-
it "should log an error message and throw RuntimeError if it can't find the indicated blacklist file" do
|
333
|
-
VCR.use_cassette('no_blacklist_found_call') do
|
334
|
-
exp_msg = 'Unable to find list of druids at bad_path'
|
335
|
-
indexer = Harvestdor::Indexer.new(@config_yml_path, @client_config_path, {:blacklist => 'bad_path'})
|
336
|
-
indexer.logger.should_receive(:fatal).with(exp_msg)
|
337
|
-
expect { indexer.send(:load_blacklist, 'bad_path') }.to raise_error(exp_msg)
|
338
|
-
end
|
339
|
-
end
|
340
|
-
end
|
341
|
-
end # blacklist
|
342
98
|
|
343
99
|
context "whitelist" do
|
344
100
|
it "knows what is in the whitelist" do
|
345
101
|
VCR.use_cassette('know_what_is_in_whitelist_call') do
|
346
102
|
lambda{
|
347
|
-
indexer = Harvestdor::Indexer.new(
|
103
|
+
indexer = Harvestdor::Indexer.new({:whitelist => @whitelist_path})
|
348
104
|
expect(indexer.whitelist).to eq(["druid:yg867hg1375", "druid:jf275fd6276", "druid:nz353cp1092"])
|
349
105
|
}
|
350
106
|
end
|
351
107
|
end
|
352
108
|
it "should be an Array with an entry for each non-empty line in the file" do
|
353
109
|
@indexer.send(:load_whitelist, @whitelist_path)
|
354
|
-
@indexer.send(:whitelist).
|
355
|
-
@indexer.send(:whitelist).size.
|
110
|
+
expect(@indexer.send(:whitelist)).to be_an_instance_of(Array)
|
111
|
+
expect(@indexer.send(:whitelist).size).to eq(3)
|
356
112
|
end
|
357
113
|
it "should be empty Array if there was no whitelist config setting" do
|
358
114
|
VCR.use_cassette('empty_array_no_whitelist_config_call') do
|
359
115
|
lambda{
|
360
|
-
indexer = Harvestdor::Indexer.new(
|
116
|
+
indexer = Harvestdor::Indexer.new()
|
361
117
|
expect(indexer.whitelist).to eq([])
|
362
118
|
}
|
363
119
|
end
|
@@ -366,31 +122,31 @@ describe Harvestdor::Indexer do
|
|
366
122
|
it "should not be called if there was no whitelist config setting" do
|
367
123
|
VCR.use_cassette('no_whitelist_config_call') do
|
368
124
|
lambda{
|
369
|
-
indexer = Harvestdor::Indexer.new(
|
125
|
+
indexer = Harvestdor::Indexer.new()
|
370
126
|
|
371
|
-
indexer.
|
127
|
+
expect(indexer).not_to receive(:load_whitelist)
|
372
128
|
|
373
129
|
hdor_client = indexer.send(:harvestdor_client)
|
374
|
-
indexer.dor_fetcher_client.
|
375
|
-
indexer.solr_client.
|
376
|
-
indexer.solr_client.
|
130
|
+
expect(indexer.dor_fetcher_client).to receive(:druid_array).and_return([@fake_druid])
|
131
|
+
expect(indexer.solr_client).to receive(:add)
|
132
|
+
expect(indexer.solr_client).to receive(:commit)
|
377
133
|
indexer.harvest_and_index
|
378
134
|
}
|
379
135
|
end
|
380
136
|
end
|
381
137
|
it "should only try to load a whitelist once" do
|
382
138
|
VCR.use_cassette('load_whitelist_once_call') do
|
383
|
-
indexer = Harvestdor::Indexer.new(
|
139
|
+
indexer = Harvestdor::Indexer.new({:whitelist => @whitelist_path})
|
384
140
|
indexer.send(:whitelist)
|
385
|
-
File.
|
141
|
+
expect_any_instance_of(File).not_to receive(:open)
|
386
142
|
indexer.send(:whitelist)
|
387
143
|
end
|
388
144
|
end
|
389
145
|
it "should log an error message and throw RuntimeError if it can't find the indicated whitelist file" do
|
390
146
|
VCR.use_cassette('cant_find_whitelist_call') do
|
391
147
|
exp_msg = 'Unable to find list of druids at bad_path'
|
392
|
-
indexer = Harvestdor::Indexer.new(@
|
393
|
-
indexer.logger.
|
148
|
+
indexer = Harvestdor::Indexer.new(@config.merge(:whitelist => 'bad_path'))
|
149
|
+
expect(indexer.logger).to receive(:fatal).with(exp_msg)
|
394
150
|
expect { indexer.send(:load_whitelist, 'bad_path') }.to raise_error(exp_msg)
|
395
151
|
end
|
396
152
|
end
|
@@ -399,15 +155,15 @@ describe Harvestdor::Indexer do
|
|
399
155
|
|
400
156
|
it "solr_client should initialize the rsolr client using the options from the config" do
|
401
157
|
VCR.use_cassette('rsolr_client_config_call') do
|
402
|
-
indexer = Harvestdor::Indexer.new(
|
403
|
-
RSolr.
|
404
|
-
indexer.
|
158
|
+
indexer = Harvestdor::Indexer.new(Confstruct::Configuration.new(:solr => { :url => 'http://localhost:2345', :a => 1 }) )
|
159
|
+
expect(RSolr).to receive(:connect).with(hash_including(:a => 1, :url => 'http://localhost:2345')).and_return('foo')
|
160
|
+
indexer.solr
|
405
161
|
end
|
406
162
|
end
|
407
163
|
|
408
164
|
context "skip heartbeat" do
|
409
165
|
it "allows me to use a fake url for dor-fetcher-client" do
|
410
|
-
expect {Harvestdor::Indexer.new(
|
166
|
+
expect {Harvestdor::Indexer.new()}.not_to raise_error
|
411
167
|
end
|
412
168
|
end
|
413
169
|
end
|