harvestdor-indexer 2.1.1 → 2.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +5 -0
- data/.rubocop_todo.yml +2 -221
- data/README.rdoc +12 -40
- data/Rakefile +12 -12
- data/harvestdor-indexer.gemspec +16 -16
- data/lib/harvestdor/indexer/metrics.rb +11 -13
- data/lib/harvestdor/indexer/resource.rb +25 -19
- data/lib/harvestdor/indexer/solr.rb +1 -1
- data/lib/harvestdor/indexer/version.rb +1 -1
- data/lib/harvestdor/indexer.rb +38 -24
- data/spec/fixtures/vcr_cassettes/get_collection_druids_call.yml +112 -3
- data/spec/fixtures/vcr_cassettes/process_druids_whitelist_call.yml +113 -4
- data/spec/fixtures/vcr_cassettes/single_rsolr_connection_call.yml +112 -3
- data/spec/unit/harvestdor/indexer/metrics_spec.rb +13 -13
- data/spec/unit/harvestdor-indexer-resource_spec.rb +75 -47
- data/spec/unit/harvestdor-indexer-solr_spec.rb +11 -11
- data/spec/unit/harvestdor-indexer_spec.rb +67 -53
- metadata +2 -2
@@ -1,50 +1,78 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
describe Harvestdor::Indexer::Resource do
|
4
|
-
|
4
|
+
|
5
5
|
before(:all) do
|
6
6
|
VCR.use_cassette('before_all_call') do
|
7
|
-
@config_yml_path = File.join(File.dirname(__FILE__),
|
7
|
+
@config_yml_path = File.join(File.dirname(__FILE__), '..', 'config', 'ap.yml')
|
8
8
|
require 'yaml'
|
9
9
|
@config = YAML.load_file(@config_yml_path)
|
10
10
|
@fake_druid = 'oo000oo0000'
|
11
|
-
|
11
|
+
|
12
12
|
@indexer = Harvestdor::Indexer.new(@config)
|
13
13
|
@hdor_client = @indexer.send(:harvestdor_client)
|
14
|
-
@whitelist_path = File.join(File.dirname(__FILE__),
|
14
|
+
@whitelist_path = File.join(File.dirname(__FILE__), '../config/ap_whitelist.txt')
|
15
15
|
end
|
16
16
|
end
|
17
17
|
|
18
18
|
let :resource do
|
19
|
-
|
19
|
+
described_class.new(@indexer, @fake_druid)
|
20
20
|
end
|
21
|
-
|
22
|
-
|
21
|
+
|
22
|
+
subject { resource }
|
23
|
+
|
24
|
+
describe '#items' do
|
25
|
+
context 'for a regular item' do
|
26
|
+
before do
|
27
|
+
allow(subject).to receive(:collection?).and_return(false)
|
28
|
+
end
|
29
|
+
|
30
|
+
it 'is empty if it is not a collection' do
|
31
|
+
expect(subject.items).to be_empty
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
context 'for a collection' do
|
36
|
+
before do
|
37
|
+
allow(subject).to receive(:collection?).and_return(true)
|
38
|
+
allow(subject).to receive(:items_druids).and_return %w(oo000oo0001 oo000oo0002)
|
39
|
+
end
|
40
|
+
|
41
|
+
it 'enumerates the items in the collection' do
|
42
|
+
expect(subject.items.count).to eq 2
|
43
|
+
|
44
|
+
child = subject.items.first
|
45
|
+
expect(child.druid).to eq 'oo000oo0001'
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
context 'smods_rec method' do
|
23
51
|
before(:all) do
|
24
52
|
@ns_decl = "xmlns='#{Mods::MODS_NS}'"
|
25
53
|
@mods_xml = "<mods #{@ns_decl}><note>hi</note></mods>"
|
26
|
-
@ng_mods_xml = Nokogiri::XML(@mods_xml)
|
54
|
+
@ng_mods_xml = Nokogiri::XML(@mods_xml)
|
27
55
|
end
|
28
|
-
it
|
56
|
+
it 'calls mods method on harvestdor_client' do
|
29
57
|
expect(@hdor_client).to receive(:mods).with(@fake_druid).and_return(@ng_mods_xml)
|
30
58
|
resource.smods_rec
|
31
59
|
end
|
32
|
-
it
|
60
|
+
it 'returns Stanford::Mods::Record object' do
|
33
61
|
expect(@hdor_client).to receive(:mods).with(@fake_druid).and_return(@ng_mods_xml)
|
34
62
|
expect(resource.smods_rec).to be_an_instance_of(Stanford::Mods::Record)
|
35
63
|
end
|
36
|
-
it
|
64
|
+
it 'raises exception if MODS xml for the druid is empty' do
|
37
65
|
allow(@hdor_client).to receive(:mods).with(@fake_druid).and_return(Nokogiri::XML("<mods #{@ns_decl}/>"))
|
38
66
|
expect { resource.smods_rec }.to raise_error(RuntimeError, Regexp.new("^Empty MODS metadata for #{@fake_druid}: <"))
|
39
67
|
end
|
40
|
-
it
|
68
|
+
it 'raises exception if there is no MODS xml for the druid' do
|
41
69
|
VCR.use_cassette('exception_no_MODS_call') do
|
42
70
|
expect { resource.smods_rec }.to raise_error(Harvestdor::Errors::MissingMods)
|
43
71
|
end
|
44
72
|
end
|
45
73
|
end
|
46
|
-
|
47
|
-
context
|
74
|
+
|
75
|
+
context 'public_xml related methods' do
|
48
76
|
before(:all) do
|
49
77
|
@id_md_xml = "<identityMetadata><objectId>druid:#{@fake_druid}</objectId></identityMetadata>"
|
50
78
|
@cntnt_md_xml = "<contentMetadata type='image' objectId='#{@fake_druid}'>foo</contentMetadata>"
|
@@ -53,29 +81,29 @@ describe Harvestdor::Indexer::Resource do
|
|
53
81
|
@pub_xml = "<publicObject id='druid:#{@fake_druid}'>#{@id_md_xml}#{@cntnt_md_xml}#{@rights_md_xml}#{@rdf_xml}</publicObject>"
|
54
82
|
@ng_pub_xml = Nokogiri::XML(@pub_xml)
|
55
83
|
end
|
56
|
-
context
|
57
|
-
it
|
84
|
+
context '#public_xml' do
|
85
|
+
it 'calls public_xml method on harvestdor_client' do
|
58
86
|
expect(@hdor_client).to receive(:public_xml).with(@fake_druid).and_return(@ng_pub_xml)
|
59
87
|
resource.public_xml
|
60
88
|
end
|
61
|
-
it
|
89
|
+
it 'retrieves entire public xml as a Nokogiri::XML::Document' do
|
62
90
|
expect(@hdor_client).to receive(:public_xml).with(@fake_druid).and_return(@ng_pub_xml)
|
63
91
|
px = resource.public_xml
|
64
92
|
expect(px).to be_kind_of(Nokogiri::XML::Document)
|
65
93
|
expect(px.root.name).to eq('publicObject')
|
66
94
|
expect(px.root.attributes['id'].text).to eq("druid:#{@fake_druid}")
|
67
95
|
end
|
68
|
-
it
|
69
|
-
expect(@hdor_client).to receive(:public_xml).with(@fake_druid).and_return(Nokogiri::XML(
|
96
|
+
it 'raises exception if public xml for the druid is empty' do
|
97
|
+
expect(@hdor_client).to receive(:public_xml).with(@fake_druid).and_return(Nokogiri::XML('<publicObject/>'))
|
70
98
|
expect { resource.public_xml }.to raise_error(RuntimeError, Regexp.new("^Empty public xml for #{@fake_druid}: <"))
|
71
99
|
end
|
72
|
-
it
|
100
|
+
it 'raises error if there is no public_xml page for the druid' do
|
73
101
|
expect(@hdor_client).to receive(:public_xml).with(@fake_druid).and_return(nil)
|
74
102
|
expect { resource.public_xml }.to raise_error(RuntimeError, "No public xml for #{@fake_druid}")
|
75
103
|
end
|
76
104
|
end
|
77
|
-
context
|
78
|
-
it
|
105
|
+
context '#content_metadata' do
|
106
|
+
it 'returns a Nokogiri::XML::Document derived from the public xml if a druid is passed' do
|
79
107
|
allow(Harvestdor).to receive(:public_xml).with(@fake_druid, @indexer.config.harvestdor.purl).and_return(@ng_pub_xml)
|
80
108
|
cm = resource.content_metadata
|
81
109
|
expect(cm).to be_kind_of(Nokogiri::XML::Document)
|
@@ -84,13 +112,13 @@ describe Harvestdor::Indexer::Resource do
|
|
84
112
|
expect(cm.root.attributes['objectId'].text).to eq(@fake_druid)
|
85
113
|
expect(cm.root.text.strip).to eq('foo')
|
86
114
|
end
|
87
|
-
it
|
115
|
+
it 'raises RuntimeError if nil is returned by Harvestdor::Client.contentMetadata for the druid' do
|
88
116
|
expect(@hdor_client).to receive(:content_metadata).with(@fake_druid).and_return(nil)
|
89
117
|
expect { resource.content_metadata }.to raise_error(RuntimeError, "No contentMetadata for \"#{@fake_druid}\"")
|
90
118
|
end
|
91
119
|
end
|
92
|
-
context
|
93
|
-
it
|
120
|
+
context '#identity_metadata' do
|
121
|
+
it 'returns a Nokogiri::XML::Document derived from the public xml if a druid is passed' do
|
94
122
|
allow(Harvestdor).to receive(:public_xml).with(@fake_druid, @indexer.config.harvestdor.purl).and_return(@ng_pub_xml)
|
95
123
|
im = resource.identity_metadata
|
96
124
|
expect(im).to be_kind_of(Nokogiri::XML::Document)
|
@@ -98,61 +126,61 @@ describe Harvestdor::Indexer::Resource do
|
|
98
126
|
expect(im.root.name).to eq('identityMetadata')
|
99
127
|
expect(im.root.text.strip).to eq("druid:#{@fake_druid}")
|
100
128
|
end
|
101
|
-
it
|
129
|
+
it 'raises RuntimeError if nil is returned by Harvestdor::Client.identityMetadata for the druid' do
|
102
130
|
expect(@hdor_client).to receive(:identity_metadata).with(@fake_druid).and_return(nil)
|
103
131
|
expect { resource.identity_metadata }.to raise_error(RuntimeError, "No identityMetadata for \"#{@fake_druid}\"")
|
104
132
|
end
|
105
133
|
end
|
106
|
-
context
|
107
|
-
it
|
134
|
+
context '#rights_metadata' do
|
135
|
+
it 'returns a Nokogiri::XML::Document derived from the public xml if a druid is passed' do
|
108
136
|
allow(Harvestdor).to receive(:public_xml).with(@fake_druid, @indexer.config.harvestdor.purl).and_return(@ng_pub_xml)
|
109
137
|
im = resource.rights_metadata
|
110
138
|
expect(im).to be_kind_of(Nokogiri::XML::Document)
|
111
139
|
expect(im.root).not_to eq(nil)
|
112
140
|
expect(im.root.name).to eq('rightsMetadata')
|
113
|
-
expect(im.root.text.strip).to eq(
|
141
|
+
expect(im.root.text.strip).to eq('bar')
|
114
142
|
end
|
115
|
-
it
|
143
|
+
it 'raises RuntimeError if nil is returned by Harvestdor::Client.rightsMetadata for the druid' do
|
116
144
|
expect(@hdor_client).to receive(:rights_metadata).with(@fake_druid).and_return(nil)
|
117
145
|
expect { resource.rights_metadata }.to raise_error(RuntimeError, "No rightsMetadata for \"#{@fake_druid}\"")
|
118
146
|
end
|
119
147
|
end
|
120
|
-
context
|
121
|
-
it
|
148
|
+
context '#rdf' do
|
149
|
+
it 'returns a Nokogiri::XML::Document derived from the public xml if a druid is passed' do
|
122
150
|
allow(Harvestdor).to receive(:public_xml).with(@fake_druid, @indexer.config.harvestdor.purl).and_return(@ng_pub_xml)
|
123
151
|
im = resource.rdf
|
124
152
|
expect(im).to be_kind_of(Nokogiri::XML::Document)
|
125
153
|
expect(im.root).not_to eq(nil)
|
126
154
|
expect(im.root.name).to eq('RDF')
|
127
|
-
expect(im.root.text.strip).to eq(
|
155
|
+
expect(im.root.text.strip).to eq('relationship!')
|
128
156
|
end
|
129
|
-
it
|
157
|
+
it 'raises RuntimeError if nil is returned by Harvestdor::Client.rdf for the druid' do
|
130
158
|
expect(@hdor_client).to receive(:rdf).with(@fake_druid).and_return(nil)
|
131
159
|
expect { resource.rdf }.to raise_error(RuntimeError, "No RDF for \"#{@fake_druid}\"")
|
132
160
|
end
|
133
161
|
end
|
134
|
-
|
135
|
-
describe
|
136
|
-
it
|
162
|
+
|
163
|
+
describe '#public_xml_or_druid' do
|
164
|
+
it 'returns the public_xml, if the public_xml has been loaded' do
|
137
165
|
allow(resource).to receive(:public_xml?).and_return(true)
|
138
166
|
allow(resource).to receive(:public_xml).and_return(double)
|
139
167
|
expect(resource.public_xml_or_druid).to eq resource.public_xml
|
140
168
|
end
|
141
|
-
it
|
169
|
+
it 'returns the druid, if the public_xml has not been loaded' do
|
142
170
|
allow(resource).to receive(:public_xml?).and_return(false)
|
143
171
|
expect(resource.public_xml_or_druid).to eq @fake_druid
|
144
172
|
end
|
145
173
|
end
|
146
174
|
|
147
|
-
describe
|
148
|
-
it
|
149
|
-
allow(resource).to receive(:identity_metadata).and_return(Nokogiri::XML(
|
150
|
-
expect(resource.identity_md_obj_label).to eq
|
175
|
+
describe '#identity_md_obj_label' do
|
176
|
+
it 'extracts the objectLabel from the identity metadata' do
|
177
|
+
allow(resource).to receive(:identity_metadata).and_return(Nokogiri::XML('<identityMetadata><objectLabel>label</objectLabel></identityMetadata>'))
|
178
|
+
expect(resource.identity_md_obj_label).to eq 'label'
|
151
179
|
end
|
152
180
|
end
|
153
181
|
|
154
|
-
describe
|
155
|
-
it
|
182
|
+
describe '#collections' do
|
183
|
+
it 'extracts the collection this resource is a member of and return Resource objects for those collections' do
|
156
184
|
allow(resource).to receive(:public_xml).and_return(Nokogiri::XML <<-EOF
|
157
185
|
<publicObject>
|
158
186
|
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:fedora="info:fedora/fedora-system:def/relations-external#">
|
@@ -162,13 +190,13 @@ describe Harvestdor::Indexer::Resource do
|
|
162
190
|
</rdf:RDF>
|
163
191
|
</publicObject>
|
164
192
|
EOF
|
165
|
-
|
193
|
+
)
|
166
194
|
|
167
195
|
expect(resource.collections.length).to eq 1
|
168
|
-
expect(resource.collections.first.druid).to eq
|
196
|
+
expect(resource.collections.first.druid).to eq 'some:druid'
|
169
197
|
expect(resource.collections.first.indexer).to eq resource.indexer
|
170
198
|
end
|
171
199
|
end
|
172
200
|
end
|
173
|
-
|
201
|
+
|
174
202
|
end
|
@@ -2,28 +2,28 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
describe Harvestdor::Indexer::Solr do
|
4
4
|
let :indexer do
|
5
|
-
double(logger: Logger.new(
|
5
|
+
double(logger: Logger.new('/dev/null'))
|
6
6
|
end
|
7
7
|
|
8
8
|
let :solr do
|
9
|
-
|
9
|
+
described_class.new indexer
|
10
10
|
end
|
11
11
|
|
12
12
|
# The method that sends the solr document to solr
|
13
|
-
describe
|
13
|
+
describe '#add' do
|
14
14
|
let(:doc_hash) do
|
15
15
|
{
|
16
|
-
:
|
17
|
-
:
|
18
|
-
:
|
19
|
-
:
|
20
|
-
:
|
21
|
-
:
|
22
|
-
:
|
16
|
+
id: 'whatever',
|
17
|
+
modsxml: 'whatever',
|
18
|
+
title_display: 'title',
|
19
|
+
pub_year_tisim: 'some year',
|
20
|
+
author_person_display: 'author',
|
21
|
+
format: 'Image',
|
22
|
+
language: 'English'
|
23
23
|
}
|
24
24
|
end
|
25
25
|
|
26
|
-
it
|
26
|
+
it 'sends an add request to the solr_client' do
|
27
27
|
expect(solr.client).to receive(:add).with(doc_hash)
|
28
28
|
solr.add(doc_hash)
|
29
29
|
end
|
@@ -4,48 +4,62 @@ describe Harvestdor::Indexer do
|
|
4
4
|
|
5
5
|
before(:all) do
|
6
6
|
VCR.use_cassette('before_all_call') do
|
7
|
-
@config_yml_path = File.join(File.dirname(__FILE__),
|
7
|
+
@config_yml_path = File.join(File.dirname(__FILE__), '..', 'config', 'ap.yml')
|
8
8
|
require 'yaml'
|
9
9
|
@config = YAML.load_file(@config_yml_path)
|
10
10
|
|
11
|
-
@indexer =
|
12
|
-
config.whitelist = [
|
11
|
+
@indexer = described_class.new(@config) do |config|
|
12
|
+
config.whitelist = ['druid:yg867hg1375']
|
13
13
|
end
|
14
14
|
@hdor_client = @indexer.send(:harvestdor_client)
|
15
15
|
@fake_druid = 'druid:oo000oo0000'
|
16
|
-
@whitelist_path = File.join(File.dirname(__FILE__),
|
16
|
+
@whitelist_path = File.join(File.dirname(__FILE__), '../config/ap_whitelist.txt')
|
17
17
|
end
|
18
18
|
end
|
19
19
|
|
20
|
-
describe
|
21
|
-
it
|
20
|
+
describe 'access methods' do
|
21
|
+
it 'initializes success count' do
|
22
22
|
expect(@indexer.metrics.success_count).to eq(0)
|
23
23
|
end
|
24
|
-
it
|
24
|
+
it 'initializes error count' do
|
25
25
|
expect(@indexer.metrics.error_count).to eq(0)
|
26
26
|
end
|
27
27
|
end
|
28
28
|
|
29
|
-
describe
|
30
|
-
it
|
31
|
-
@indexer.logger.info(
|
32
|
-
expect(File.
|
29
|
+
describe 'logging' do
|
30
|
+
it 'writes the log file to the directory indicated by log_dir' do
|
31
|
+
@indexer.logger.info('indexer_spec logging test message')
|
32
|
+
expect(File.exist?(File.join(@config['harvestdor']['log_dir'], @config['harvestdor']['log_name']))).to eq(true)
|
33
33
|
end
|
34
34
|
end
|
35
35
|
|
36
|
-
it
|
36
|
+
it 'initializes the harvestdor_client from the config' do
|
37
37
|
expect(@hdor_client).to be_an_instance_of(Harvestdor::Client)
|
38
38
|
expect(@hdor_client.config.default_set).to eq(@config['harvestdor']['default_set'])
|
39
39
|
end
|
40
40
|
|
41
|
-
|
41
|
+
describe '#resources' do
|
42
|
+
it 'does not persist resources across calls' do
|
43
|
+
VCR.use_cassette('single_rsolr_connection_call') do
|
44
|
+
hdor_client = @indexer.send(:harvestdor_client)
|
45
|
+
allow(@indexer.dor_fetcher_client).to receive(:druid_array).and_return(['druid:yg867hg1375', 'druid:jf275fd6276', 'druid:nz353cp1092', 'druid:tc552kq0798', 'druid:th998nk0722', 'druid:ww689vs6534'])
|
46
|
+
|
47
|
+
a = @indexer.resources.first
|
48
|
+
b = @indexer.resources.first
|
49
|
+
|
50
|
+
expect(a).to_not eq b
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
context 'harvest_and_index' do
|
42
56
|
before(:all) do
|
43
57
|
@doc_hash = {
|
44
|
-
:
|
58
|
+
id: @fake_druid
|
45
59
|
}
|
46
60
|
end
|
47
61
|
|
48
|
-
it
|
62
|
+
it 'calls dor_fetcher_client.druid_array and then call :add on rsolr connection' do
|
49
63
|
allow_any_instance_of(Harvestdor::Indexer::Resource).to receive(:collection?).and_return(false)
|
50
64
|
expect(@indexer).to receive(:druids).and_return([@fake_druid])
|
51
65
|
expect(@indexer.solr).to receive(:add).with(@doc_hash)
|
@@ -53,19 +67,19 @@ describe Harvestdor::Indexer do
|
|
53
67
|
@indexer.harvest_and_index
|
54
68
|
end
|
55
69
|
|
56
|
-
it
|
70
|
+
it 'only calls :commit on rsolr connection once' do
|
57
71
|
VCR.use_cassette('single_rsolr_connection_call') do
|
58
72
|
hdor_client = @indexer.send(:harvestdor_client)
|
59
|
-
expect(@indexer.dor_fetcher_client).to receive(:druid_array).and_return([
|
60
|
-
expect(@indexer.solr).to receive(:add).
|
73
|
+
expect(@indexer.dor_fetcher_client).to receive(:druid_array).and_return(['druid:yg867hg1375', 'druid:jf275fd6276', 'druid:nz353cp1092', 'druid:tc552kq0798', 'druid:th998nk0722', 'druid:ww689vs6534'])
|
74
|
+
expect(@indexer.solr).to receive(:add).at_least(6).times
|
61
75
|
expect(@indexer.solr).to receive(:commit!).once
|
62
76
|
@indexer.harvest_and_index
|
63
77
|
end
|
64
78
|
end
|
65
79
|
|
66
|
-
it
|
80
|
+
it 'only processes druids in whitelist if it exists' do
|
67
81
|
VCR.use_cassette('process_druids_whitelist_call') do
|
68
|
-
indexer =
|
82
|
+
indexer = described_class.new(@config.merge(whitelist: @whitelist_path))
|
69
83
|
hdor_client = indexer.send(:harvestdor_client)
|
70
84
|
added = []
|
71
85
|
allow(indexer.solr).to receive(:add) { |hash|
|
@@ -73,56 +87,56 @@ describe Harvestdor::Indexer do
|
|
73
87
|
}
|
74
88
|
expect(indexer.solr).to receive(:commit!)
|
75
89
|
indexer.harvest_and_index
|
76
|
-
expect(added).to
|
90
|
+
expect(added).to include 'druid:tc552kq0798', 'druid:th998nk0722', 'druid:ww689vs6534', 'druid:yg867hg1375', 'druid:jf275fd6276', 'druid:nz353cp1092'
|
77
91
|
end
|
78
92
|
end
|
79
93
|
end # harvest_and_index
|
80
94
|
|
81
95
|
# Check for replacement of oai harvesting with dor-fetcher
|
82
|
-
context
|
83
|
-
|
84
|
-
|
85
|
-
|
96
|
+
context 'replacing OAI harvesting with dor-fetcher' do
|
97
|
+
it 'has a dor-fetcher client' do
|
98
|
+
expect(@indexer.dor_fetcher_client).to be_an_instance_of(DorFetcher::Client)
|
99
|
+
end
|
86
100
|
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
end
|
101
|
+
it 'druids method calls druid_array and get_collection methods on fetcher_client' do
|
102
|
+
VCR.use_cassette('get_collection_druids_call') do
|
103
|
+
expect(@indexer.resources.map(&:druid)).to include 'druid:yg867hg1375', 'druid:jf275fd6276', 'druid:nz353cp1092', 'druid:tc552kq0798', 'druid:th998nk0722', 'druid:ww689vs6534'
|
91
104
|
end
|
105
|
+
end
|
92
106
|
|
93
|
-
|
94
|
-
|
95
|
-
|
107
|
+
it 'gets the configuration of the dor-fetcher client from included yml file' do
|
108
|
+
expect(@indexer.dor_fetcher_client.service_url).to eq('http://127.0.0.1:3000')
|
109
|
+
end
|
96
110
|
|
97
111
|
end # ending replacing OAI context
|
98
112
|
|
99
|
-
context
|
100
|
-
it
|
113
|
+
context 'whitelist' do
|
114
|
+
it 'knows what is in the whitelist' do
|
101
115
|
VCR.use_cassette('know_what_is_in_whitelist_call') do
|
102
|
-
lambda{
|
103
|
-
indexer =
|
104
|
-
expect(indexer.whitelist).to eq([
|
116
|
+
lambda {
|
117
|
+
indexer = described_class.new({ whitelist: @whitelist_path })
|
118
|
+
expect(indexer.whitelist).to eq(['druid:yg867hg1375', 'druid:jf275fd6276', 'druid:nz353cp1092'])
|
105
119
|
}
|
106
120
|
end
|
107
121
|
end
|
108
|
-
it
|
122
|
+
it 'is an Array with an entry for each non-empty line in the file' do
|
109
123
|
@indexer.send(:load_whitelist, @whitelist_path)
|
110
124
|
expect(@indexer.send(:whitelist)).to be_an_instance_of(Array)
|
111
125
|
expect(@indexer.send(:whitelist).size).to eq(3)
|
112
126
|
end
|
113
|
-
it
|
127
|
+
it 'is empty Array if there was no whitelist config setting' do
|
114
128
|
VCR.use_cassette('empty_array_no_whitelist_config_call') do
|
115
|
-
lambda{
|
116
|
-
indexer =
|
129
|
+
lambda {
|
130
|
+
indexer = described_class.new
|
117
131
|
expect(indexer.whitelist).to eq([])
|
118
132
|
}
|
119
133
|
end
|
120
134
|
end
|
121
|
-
context
|
122
|
-
it
|
135
|
+
context 'load_whitelist' do
|
136
|
+
it 'is not called if there was no whitelist config setting' do
|
123
137
|
VCR.use_cassette('no_whitelist_config_call') do
|
124
|
-
lambda{
|
125
|
-
indexer =
|
138
|
+
lambda {
|
139
|
+
indexer = described_class.new
|
126
140
|
|
127
141
|
expect(indexer).not_to receive(:load_whitelist)
|
128
142
|
|
@@ -134,9 +148,9 @@ describe Harvestdor::Indexer do
|
|
134
148
|
}
|
135
149
|
end
|
136
150
|
end
|
137
|
-
it
|
151
|
+
it 'only try to load a whitelist once' do
|
138
152
|
VCR.use_cassette('load_whitelist_once_call') do
|
139
|
-
indexer =
|
153
|
+
indexer = described_class.new({ whitelist: @whitelist_path })
|
140
154
|
indexer.send(:whitelist)
|
141
155
|
expect_any_instance_of(File).not_to receive(:open)
|
142
156
|
indexer.send(:whitelist)
|
@@ -145,7 +159,7 @@ describe Harvestdor::Indexer do
|
|
145
159
|
it "log an error message and throw RuntimeError if it can't find the indicated whitelist file" do
|
146
160
|
VCR.use_cassette('cant_find_whitelist_call') do
|
147
161
|
exp_msg = 'Unable to find list of druids at bad_path'
|
148
|
-
indexer =
|
162
|
+
indexer = described_class.new(@config.merge(whitelist: 'bad_path'))
|
149
163
|
expect(indexer.logger).to receive(:fatal).with(exp_msg)
|
150
164
|
expect { indexer.send(:load_whitelist, 'bad_path') }.to raise_error(exp_msg)
|
151
165
|
end
|
@@ -153,17 +167,17 @@ describe Harvestdor::Indexer do
|
|
153
167
|
end
|
154
168
|
end # whitelist
|
155
169
|
|
156
|
-
it
|
170
|
+
it 'solr_client initializes the rsolr client using the options from the config' do
|
157
171
|
VCR.use_cassette('rsolr_client_config_call') do
|
158
|
-
indexer =
|
172
|
+
indexer = described_class.new(Confstruct::Configuration.new(solr: { url: 'http://localhost:2345', a: 1 }))
|
159
173
|
expect(RSolr).to receive(:connect).with(hash_including(a: 1, url: 'http://localhost:2345'))
|
160
174
|
indexer.solr
|
161
175
|
end
|
162
176
|
end
|
163
177
|
|
164
|
-
context
|
165
|
-
it
|
166
|
-
expect {
|
178
|
+
context 'dor fetcher' do
|
179
|
+
it 'skip_heartbeat allows me to use a fake url for dor-fetcher-client' do
|
180
|
+
expect { described_class.new }.not_to raise_error
|
167
181
|
end
|
168
182
|
end
|
169
183
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: harvestdor-indexer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naomi Dushay
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2015-10-
|
13
|
+
date: 2015-10-30 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: rsolr
|