harvestdor-indexer 0.0.13 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.rdoc +14 -13
- data/config/dor-fetcher-client.yml +4 -0
- data/harvestdor-indexer.gemspec +5 -2
- data/lib/harvestdor-indexer/version.rb +1 -1
- data/lib/harvestdor-indexer.rb +18 -7
- data/spec/config/ap.yml +5 -12
- data/spec/config/ap_blacklist.txt +2 -2
- data/spec/config/ap_whitelist.txt +3 -2
- data/spec/fixtures/vcr_cassettes/exception_no_MODS_call.yml +114 -0
- data/spec/fixtures/vcr_cassettes/get_collection_druids_call.yml +58 -0
- data/spec/fixtures/vcr_cassettes/ignore_druids_in_blacklist_call.yml +58 -0
- data/spec/fixtures/vcr_cassettes/no_blacklist_config_call.yml +58 -0
- data/spec/fixtures/vcr_cassettes/no_whitelist_config_call.yml +58 -0
- data/spec/fixtures/vcr_cassettes/single_rsolr_connection_call.yml +58 -0
- data/spec/spec_helper.rb +6 -2
- data/spec/unit/harvestdor-indexer_spec.rb +98 -61
- metadata +120 -63
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 45d033dffd56d8c3abd90ccafd0fa4666b4379da
|
4
|
+
data.tar.gz: f579c955fb390f2d7ed497aa237dbe763361d60b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a7e9213e27145df273eaee8384b690e9ae720aeffd7f781f64dec9bc13948c6716b942499305037c7210c59ae9b5f07747f5ff67e4fe315577a78bce7d530a81
|
7
|
+
data.tar.gz: 896d1c26f157087e2bd6f935e0106024e9a7840cdb5c5db05e6a348ff1fe0579410bc70895b26b3db60a6461883bfd5bfe4a8ee43483d0f8821bf73cad4c2b1c
|
data/README.rdoc
CHANGED
@@ -1,9 +1,8 @@
|
|
1
1
|
= Harvestdor::Indexer
|
2
2
|
{<img src="https://travis-ci.org/sul-dlss/harvestdor-indexer.svg" alt="Build Status" />}[https://travis-ci.org/sul-dlss/harvestdor-indexer]
|
3
|
-
{<img src="https://coveralls.io/repos/sul-dlss/harvestdor-indexer/badge.png" alt="Coverage Status" />}[https://coveralls.io/r/sul-dlss/harvestdor-indexer]
|
4
|
-
|
5
|
-
|
6
|
-
|
3
|
+
{<img src="https://coveralls.io/repos/sul-dlss/harvestdor-indexer/badge.png" alt="Coverage Status" />}[https://coveralls.io/r/sul-dlss/harvestdor-indexer]
|
4
|
+
{<img src="https://gemnasium.com/sul-dlss/harvestdor-indexer.svg" alt="Dependency Status" />}[https://gemnasium.com/sul-dlss/harvestdor-indexer]
|
5
|
+
{<img src="https://badge.fury.io/rb/harvestdor-indexer.svg" alt="Gem Version" />}[http://badge.fury.io/rb/harvestdor-indexer]
|
7
6
|
|
8
7
|
A Gem to harvest meta/data from DOR and the skeleton code to index it and write to Solr.
|
9
8
|
|
@@ -42,13 +41,10 @@ Note: Because of an update to underlying HTTP libraries, versions of this gem >
|
|
42
41
|
See spec/config/ap.yml for an example.
|
43
42
|
You will want to copy that file and change the following settings:
|
44
43
|
1. log_name
|
45
|
-
2. default_set
|
46
|
-
3.
|
47
|
-
4. blacklist or whitelist if you are using them
|
44
|
+
2. default_set
|
45
|
+
3. blacklist or whitelist if you are using them
|
48
46
|
|
49
|
-
|
50
|
-
|
51
|
-
indexer = Harvestdor::Indexer.new({:oai_repository_url => 'http://my_oai.org, :default_from_date => '2012-12-01'})
|
47
|
+
Update the dor-fetcher-client.yml file in the config directory with the location of the URL of the dor-fetcher-service provider. The defaulted value is the 3000 port for a localhost - dor_fetcher_service_url: http://127.0.0.1:3000
|
52
48
|
|
53
49
|
=== Override the Harvestdor::Indexer.index method
|
54
50
|
|
@@ -94,17 +90,21 @@ I suggest you write a script to run the code. Your script might look like this:
|
|
94
90
|
end
|
95
91
|
config_yml_path = ARGV.pop
|
96
92
|
if config_yml_path.nil?
|
97
|
-
puts "** You must provide the full path to a config yml file **"
|
93
|
+
puts "** You must provide the full path to a collection config yml file **"
|
94
|
+
exit
|
95
|
+
end
|
96
|
+
if client_config_path.nil?
|
97
|
+
puts "** You must provide the full path to dor-fetcher-client config yml file **"
|
98
98
|
exit
|
99
99
|
end
|
100
|
-
indexer = Harvestdor::Indexer.new(config_yml_path, opts)
|
100
|
+
indexer = Harvestdor::Indexer.new(config_yml_path, client_config_path, opts)
|
101
101
|
indexer.harvest_and_index
|
102
102
|
|
103
103
|
Then you run the script like so:
|
104
104
|
|
105
105
|
./bin/indexer config/(your coll).yml
|
106
106
|
|
107
|
-
I suggest you run your code on harvestdor-dev, as it is already set up to be able to harvest from the
|
107
|
+
I suggest you run your code on harvestdor-dev, as it is already set up to be able to harvest from the DorFetcher
|
108
108
|
|
109
109
|
|
110
110
|
== Contributing
|
@@ -118,6 +118,7 @@ I suggest you run your code on harvestdor-dev, as it is already set up to be abl
|
|
118
118
|
|
119
119
|
== Releases
|
120
120
|
|
121
|
+
* <b>1.0.0</b> Replaced OAI harvesting mechanism with dor-fetcher
|
121
122
|
* <b>0.0.13</b> Upgrade to latest faraday HTTP client syntax; Use retries gem (https://github.com/ooyala/retries) to make retrying of index process more robust
|
122
123
|
* <b>0.0.12</b> fix total_object nil error
|
123
124
|
* <b>0.0.11</b> fix error_count and success_count, allow setting of max-tries (retry solr add if error)
|
data/harvestdor-indexer.gemspec
CHANGED
@@ -6,8 +6,8 @@ require 'harvestdor-indexer/version'
|
|
6
6
|
Gem::Specification.new do |gem|
|
7
7
|
gem.name = "harvestdor-indexer"
|
8
8
|
gem.version = Harvestdor::Indexer::VERSION
|
9
|
-
gem.authors = ["Naomi Dushay"]
|
10
|
-
gem.email = ["ndushay@stanford.edu"]
|
9
|
+
gem.authors = ["Naomi Dushay", "Bess Sadler", "Laney McGlohon"]
|
10
|
+
gem.email = ["ndushay@stanford.edu", "bess@stanford.edu", "laneymcg@stanford.edu"]
|
11
11
|
gem.description = %q{Harvest DOR object metadata via a relationship (e.g. hydra:isGovernedBy rdf:resource="info:fedora/druid:hy787xj5878") and dates, plus code framework to write Solr docs to index}
|
12
12
|
gem.summary = %q{Harvest DOR object metadata and index it to Solr}
|
13
13
|
gem.homepage = "https://consul.stanford.edu/display/chimera/Chimera+project"
|
@@ -21,6 +21,7 @@ Gem::Specification.new do |gem|
|
|
21
21
|
gem.add_dependency 'retries'
|
22
22
|
gem.add_dependency 'harvestdor', '>=0.0.14'
|
23
23
|
gem.add_dependency 'stanford-mods'
|
24
|
+
gem.add_dependency 'dor-fetcher', '>=1.0.0'
|
24
25
|
|
25
26
|
# Runtime dependencies
|
26
27
|
gem.add_runtime_dependency 'confstruct'
|
@@ -36,5 +37,7 @@ Gem::Specification.new do |gem|
|
|
36
37
|
gem.add_development_dependency 'rspec'
|
37
38
|
gem.add_development_dependency 'coveralls'
|
38
39
|
# gem.add_development_dependency 'ruby-debug19'
|
40
|
+
gem.add_development_dependency 'vcr'
|
41
|
+
gem.add_development_dependency 'webmock'
|
39
42
|
|
40
43
|
end
|
data/lib/harvestdor-indexer.rb
CHANGED
@@ -2,10 +2,12 @@
|
|
2
2
|
require 'confstruct'
|
3
3
|
require 'rsolr'
|
4
4
|
require 'retries'
|
5
|
+
require 'json'
|
5
6
|
|
6
7
|
# sul-dlss gems
|
7
8
|
require 'harvestdor'
|
8
9
|
require 'stanford-mods'
|
10
|
+
require 'dor-fetcher'
|
9
11
|
|
10
12
|
# stdlib
|
11
13
|
require 'logger'
|
@@ -18,8 +20,9 @@ module Harvestdor
|
|
18
20
|
|
19
21
|
attr_accessor :error_count, :success_count, :max_retries
|
20
22
|
attr_accessor :total_time_to_parse,:total_time_to_solr
|
23
|
+
attr_accessor :dor_fetcher_client, :client_config
|
21
24
|
|
22
|
-
def initialize yml_path, options = {}
|
25
|
+
def initialize yml_path, client_config_path, options = {}
|
23
26
|
@success_count=0 # the number of objects successfully indexed
|
24
27
|
@error_count=0 # the number of objects that failed
|
25
28
|
@max_retries=10 # the number of times to retry an object
|
@@ -27,8 +30,10 @@ module Harvestdor
|
|
27
30
|
@total_time_to_parse=0
|
28
31
|
@yml_path = yml_path
|
29
32
|
config.configure(YAML.load_file(yml_path)) if yml_path
|
30
|
-
config.configure options
|
33
|
+
config.configure options
|
31
34
|
yield(config) if block_given?
|
35
|
+
@client_config = YAML.load_file(client_config_path) if client_config_path && File.exists?(client_config_path)
|
36
|
+
@dor_fetcher_client=DorFetcher::Client.new({:service_url => client_config["dor_fetcher_service_url"]})
|
32
37
|
end
|
33
38
|
|
34
39
|
def config
|
@@ -40,7 +45,7 @@ module Harvestdor
|
|
40
45
|
end
|
41
46
|
|
42
47
|
# per this Indexer's config options
|
43
|
-
# harvest the druids via
|
48
|
+
# harvest the druids via DorFetcher
|
44
49
|
# create a Solr profiling document for each druid
|
45
50
|
# write the result to the Solr index
|
46
51
|
def harvest_and_index
|
@@ -67,14 +72,14 @@ module Harvestdor
|
|
67
72
|
logger.info("Total records processed: #{total_objects}")
|
68
73
|
end
|
69
74
|
|
70
|
-
# return Array of druids contained in the
|
75
|
+
# return Array of druids contained in the DorFetcher pulling indicated by DorFetcher params
|
71
76
|
# @return [Array<String>] or enumeration over it, if block is given. (strings are druids, e.g. ab123cd1234)
|
72
77
|
def druids
|
73
78
|
if @druids.nil?
|
74
79
|
start_time=Time.now
|
75
|
-
logger.info("Starting
|
76
|
-
@druids =
|
77
|
-
logger.info("Completed
|
80
|
+
logger.info("Starting DorFetcher pulling of druids at #{start_time}.")
|
81
|
+
@druids = @dor_fetcher_client.druid_array(@dor_fetcher_client.get_collection(strip_default_set_string(), {}))
|
82
|
+
logger.info("Completed DorFetcher pulling of druids at #{Time.now}. Found #{@druids.size} druids. Total elapsed time for DorFetcher pulling = #{elapsed_time(start_time,:minutes)} minutes")
|
78
83
|
end
|
79
84
|
return @druids
|
80
85
|
end
|
@@ -224,6 +229,12 @@ module Harvestdor
|
|
224
229
|
@whitelist ||= []
|
225
230
|
end
|
226
231
|
|
232
|
+
# Get only the druid from the end of the default_set string
|
233
|
+
# from the yml file
|
234
|
+
def strip_default_set_string()
|
235
|
+
@config.default_set.split('_').last
|
236
|
+
end
|
237
|
+
|
227
238
|
protected #---------------------------------------------------------------------
|
228
239
|
|
229
240
|
def harvestdor_client
|
data/spec/config/ap.yml
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
# You will want to copy this file and change the following settings:
|
2
2
|
# 1. log_dir, log_name
|
3
|
-
# 2. default_set
|
4
|
-
# 2a. other OAI harvesting params
|
3
|
+
# 2. default_set
|
5
4
|
# 3. blacklist or whitelist if you are using them
|
6
5
|
# 4. Solr baseurl
|
7
6
|
|
@@ -16,8 +15,9 @@ purl: http://purl.stanford.edu
|
|
16
15
|
|
17
16
|
# ---------- White and Black list parameters -----
|
18
17
|
|
19
|
-
# name of file containing druids that will NOT be processed even if they are harvested
|
20
|
-
# either give absolute path or path relative to where the command will
|
18
|
+
# name of file containing druids that will NOT be processed even if they are harvested
|
19
|
+
# via DorFetcher either give absolute path or path relative to where the command will
|
20
|
+
# be executed
|
21
21
|
#blacklist: config/ap_blacklist.txt
|
22
22
|
|
23
23
|
# name of file containing druids that WILL be processed (all others will be ignored)
|
@@ -32,14 +32,9 @@ solr:
|
|
32
32
|
read_timeout: 60
|
33
33
|
open_timeout: 60
|
34
34
|
|
35
|
-
# ---------- OAI harvesting parameters -----------
|
36
|
-
|
37
|
-
# oai_repository_url: URL of the OAI data provider
|
38
|
-
oai_repository_url: https://dor-oaiprovider-prod.stanford.edu/oai
|
39
|
-
|
40
35
|
# default_set: default set for harvest (default: nil)
|
41
36
|
# can be overridden on calls to harvest_ids and harvest_records
|
42
|
-
default_set:
|
37
|
+
default_set: is_governed_by_yg867hg1375
|
43
38
|
|
44
39
|
# default_metadata_prefix: default metadata prefix to be used for harvesting (default: mods)
|
45
40
|
# can be overridden on calls to harvest_ids and harvest_records
|
@@ -50,8 +45,6 @@ default_set: is_governed_by_hy787xj5878
|
|
50
45
|
# default_until_date: default until date for harvest (default: nil)
|
51
46
|
# can be overridden on calls to harvest_ids and harvest_records
|
52
47
|
|
53
|
-
# oai_client_debug: true for OAI::Client debug mode (default: false)
|
54
|
-
|
55
48
|
# Additional options to pass to Faraday http client (https://github.com/technoweenie/faraday)
|
56
49
|
http_options:
|
57
50
|
ssl:
|
@@ -0,0 +1,114 @@
|
|
1
|
+
---
|
2
|
+
http_interactions:
|
3
|
+
- request:
|
4
|
+
method: get
|
5
|
+
uri: http://purl.stanford.edu/oo000oo0000.mods
|
6
|
+
body:
|
7
|
+
encoding: US-ASCII
|
8
|
+
string: ''
|
9
|
+
headers:
|
10
|
+
Accept:
|
11
|
+
- '*/*'
|
12
|
+
User-Agent:
|
13
|
+
- Ruby
|
14
|
+
response:
|
15
|
+
status:
|
16
|
+
code: 404
|
17
|
+
message: ''
|
18
|
+
headers:
|
19
|
+
Date:
|
20
|
+
- Wed, 22 Oct 2014 20:26:30 GMT
|
21
|
+
Server:
|
22
|
+
- Apache/2.2.15 (Red Hat)
|
23
|
+
X-Powered-By:
|
24
|
+
- Phusion Passenger (mod_rails/mod_rack) 3.0.19
|
25
|
+
X-Ua-Compatible:
|
26
|
+
- IE=Edge,chrome=1
|
27
|
+
Cache-Control:
|
28
|
+
- no-cache
|
29
|
+
X-Request-Id:
|
30
|
+
- eb7854ee5cc96cbf20bfdafb0e8ea1c2
|
31
|
+
X-Runtime:
|
32
|
+
- '0.011781'
|
33
|
+
X-Rack-Cache:
|
34
|
+
- miss
|
35
|
+
Status:
|
36
|
+
- '404'
|
37
|
+
Content-Length:
|
38
|
+
- '3015'
|
39
|
+
Content-Type:
|
40
|
+
- text/html; charset=utf-8
|
41
|
+
body:
|
42
|
+
encoding: US-ASCII
|
43
|
+
string: |
|
44
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
45
|
+
<html xmlns="http://www.w3.org/1999/xhtml">
|
46
|
+
<head>
|
47
|
+
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
48
|
+
<title>Stanford Digital Repository</title>
|
49
|
+
<link rel="shortcut icon" href="favicon.ico" />
|
50
|
+
<link href="/assets/site.css" media="screen" rel="stylesheet" type="text/css" />
|
51
|
+
</head>
|
52
|
+
<body>
|
53
|
+
<div id="container">
|
54
|
+
<div id="banner"><h1>Stanford Digital Repository</h1></div>
|
55
|
+
<div id="contents">
|
56
|
+
<div class="dialog">
|
57
|
+
<h2>The item you requested is not available.</h2>
|
58
|
+
<p>The item you requested is not yet available. It will be available at this URL when Library processing is completed.</p>
|
59
|
+
</div>
|
60
|
+
|
61
|
+
</div>
|
62
|
+
</div>
|
63
|
+
|
64
|
+
<div id="footer">
|
65
|
+
<div class="footer-contents">
|
66
|
+
<div class="footer-sul">
|
67
|
+
<div class="footer-logo">
|
68
|
+
<a href="http://library.stanford.edu" target="_blank"><img src="/images/footer-sul-logo.png"></a>
|
69
|
+
</div>
|
70
|
+
<div class="footer-links">
|
71
|
+
<a href="http://library.stanford.edu" target="_blank">Stanford University Libraries</a>
|
72
|
+
<a href="http://searchworks.stanford.edu" target="_blank">SearchWorks</a>
|
73
|
+
<a href="http://library.stanford.edu/ejournals" target="_blank">eJournals</a>
|
74
|
+
<a href="hhttp://library.stanford.edu/myawrap.html" target="_blank">My Account</a>
|
75
|
+
<a href="http://library.stanford.edu/ask" target="_blank">Ask Us</a>
|
76
|
+
</div>
|
77
|
+
</div>
|
78
|
+
<div class="footer-su">
|
79
|
+
<div class="footer-logo">
|
80
|
+
<a href="http://www.stanford.edu" target="_blank"><img src="/images/footer-stanford-logo.png"></a>
|
81
|
+
</div>
|
82
|
+
<div class="footer-links">
|
83
|
+
<a href="http://www.stanford.edu" target="_blank">SU Home</a>
|
84
|
+
<a href="http://visit.stanford.edu/plan/maps.html" target="_blank">Maps & Directions</a>
|
85
|
+
<a href="http://www.stanford.edu/search/" target="_blank">Search Stanford</a>
|
86
|
+
<a href="http://www.stanford.edu/site/terms.html" target="_blank">Terms of Use</a>
|
87
|
+
<a href="http://www.stanford.edu/site/copyright.html" target="_blank">Copyright Complaints</a>
|
88
|
+
</br>© Stanford University, Stanford, California 94305
|
89
|
+
</div>
|
90
|
+
</div>
|
91
|
+
</div>
|
92
|
+
</div>
|
93
|
+
<script src="/assets/jquery.js" type="text/javascript"></script>
|
94
|
+
<script src="/assets/jquery.truncator.js" type="text/javascript"></script>
|
95
|
+
<script src="/assets/application.js" type="text/javascript"></script>
|
96
|
+
|
97
|
+
<script type="text/javascript">
|
98
|
+
|
99
|
+
var _gaq = _gaq || [];
|
100
|
+
_gaq.push(['_setAccount', 'UA-7219229-11']);
|
101
|
+
_gaq.push(['_trackPageview']);
|
102
|
+
|
103
|
+
(function() {
|
104
|
+
var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
|
105
|
+
ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
|
106
|
+
var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
|
107
|
+
})();
|
108
|
+
|
109
|
+
</script>
|
110
|
+
</body>
|
111
|
+
</html>
|
112
|
+
http_version:
|
113
|
+
recorded_at: Wed, 22 Oct 2014 20:26:30 GMT
|
114
|
+
recorded_with: VCR 2.9.3
|
@@ -0,0 +1,58 @@
|
|
1
|
+
---
|
2
|
+
http_interactions:
|
3
|
+
- request:
|
4
|
+
method: get
|
5
|
+
uri: http://127.0.0.1:3000/collection/yg867hg1375
|
6
|
+
body:
|
7
|
+
encoding: US-ASCII
|
8
|
+
string: ''
|
9
|
+
headers:
|
10
|
+
Accept:
|
11
|
+
- '*/*'
|
12
|
+
User-Agent:
|
13
|
+
- Ruby
|
14
|
+
response:
|
15
|
+
status:
|
16
|
+
code: 200
|
17
|
+
message: 'OK '
|
18
|
+
headers:
|
19
|
+
X-Frame-Options:
|
20
|
+
- SAMEORIGIN
|
21
|
+
X-Xss-Protection:
|
22
|
+
- 1; mode=block
|
23
|
+
X-Content-Type-Options:
|
24
|
+
- nosniff
|
25
|
+
Content-Type:
|
26
|
+
- application/json; charset=utf-8
|
27
|
+
Etag:
|
28
|
+
- '"682afec57f678e4d153a5841b21395dd"'
|
29
|
+
Cache-Control:
|
30
|
+
- max-age=0, private, must-revalidate
|
31
|
+
X-Request-Id:
|
32
|
+
- 0954c447-9cb9-4eeb-8020-d87f13098f07
|
33
|
+
X-Runtime:
|
34
|
+
- '0.006736'
|
35
|
+
Server:
|
36
|
+
- WEBrick/1.3.1 (Ruby/2.1.2/2014-05-08)
|
37
|
+
Date:
|
38
|
+
- Wed, 22 Oct 2014 18:42:32 GMT
|
39
|
+
Content-Length:
|
40
|
+
- '1121'
|
41
|
+
Connection:
|
42
|
+
- Keep-Alive
|
43
|
+
body:
|
44
|
+
encoding: US-ASCII
|
45
|
+
string: '{"collection":[{"druid":"druid:yg867hg1375","latest_change":"2013-11-11T23:34:29Z","title":["Francis
|
46
|
+
E. Stafford photographs, 1909-1933"]}],"item":[{"druid":"druid:jf275fd6276","latest_change":"2013-11-11T23:34:29Z","title":["Album
|
47
|
+
A: Photographs of China''s natural landscapes, urban scenes, cultural landmarks,
|
48
|
+
social customs, and people."]},{"druid":"druid:nz353cp1092","latest_change":"2013-11-11T23:34:29Z","title":["Album
|
49
|
+
E: Photographs of the Seventh Day Adventist Church missionaries in China"]},{"druid":"druid:tc552kq0798","latest_change":"2013-11-11T23:34:29Z","title":["Album
|
50
|
+
D: Photographs of China''s natural landscapes, urban scenes, cultural landmarks,
|
51
|
+
social customs, and people."]},{"druid":"druid:th998nk0722","latest_change":"2013-11-11T23:34:29Z","title":["Album
|
52
|
+
C: Photographs of the Chinese Revolution of 1911 and the Shanghai Commercial
|
53
|
+
Press"]},{"druid":"druid:ww689vs6534","latest_change":"2013-11-11T23:34:29Z","title":["Album
|
54
|
+
B: Photographs of China''s natural landscapes, urban scenes, cultural landmarks,
|
55
|
+
social customs, and people."]}],"counts":[{"collection":1},{"item":5},{"total_count":6}]}'
|
56
|
+
http_version:
|
57
|
+
recorded_at: Wed, 22 Oct 2014 18:42:32 GMT
|
58
|
+
recorded_with: VCR 2.9.3
|
@@ -0,0 +1,58 @@
|
|
1
|
+
---
|
2
|
+
http_interactions:
|
3
|
+
- request:
|
4
|
+
method: get
|
5
|
+
uri: http://127.0.0.1:3000/collection/yg867hg1375
|
6
|
+
body:
|
7
|
+
encoding: US-ASCII
|
8
|
+
string: ''
|
9
|
+
headers:
|
10
|
+
Accept:
|
11
|
+
- '*/*'
|
12
|
+
User-Agent:
|
13
|
+
- Ruby
|
14
|
+
response:
|
15
|
+
status:
|
16
|
+
code: 200
|
17
|
+
message: 'OK '
|
18
|
+
headers:
|
19
|
+
X-Frame-Options:
|
20
|
+
- SAMEORIGIN
|
21
|
+
X-Xss-Protection:
|
22
|
+
- 1; mode=block
|
23
|
+
X-Content-Type-Options:
|
24
|
+
- nosniff
|
25
|
+
Content-Type:
|
26
|
+
- application/json; charset=utf-8
|
27
|
+
Etag:
|
28
|
+
- '"682afec57f678e4d153a5841b21395dd"'
|
29
|
+
Cache-Control:
|
30
|
+
- max-age=0, private, must-revalidate
|
31
|
+
X-Request-Id:
|
32
|
+
- 1e0232c6-fc39-49bf-b874-89567e225d00
|
33
|
+
X-Runtime:
|
34
|
+
- '0.006851'
|
35
|
+
Server:
|
36
|
+
- WEBrick/1.3.1 (Ruby/2.1.2/2014-05-08)
|
37
|
+
Date:
|
38
|
+
- Wed, 22 Oct 2014 18:53:15 GMT
|
39
|
+
Content-Length:
|
40
|
+
- '1121'
|
41
|
+
Connection:
|
42
|
+
- Keep-Alive
|
43
|
+
body:
|
44
|
+
encoding: US-ASCII
|
45
|
+
string: '{"collection":[{"druid":"druid:yg867hg1375","latest_change":"2013-11-11T23:34:29Z","title":["Francis
|
46
|
+
E. Stafford photographs, 1909-1933"]}],"item":[{"druid":"druid:jf275fd6276","latest_change":"2013-11-11T23:34:29Z","title":["Album
|
47
|
+
A: Photographs of China''s natural landscapes, urban scenes, cultural landmarks,
|
48
|
+
social customs, and people."]},{"druid":"druid:nz353cp1092","latest_change":"2013-11-11T23:34:29Z","title":["Album
|
49
|
+
E: Photographs of the Seventh Day Adventist Church missionaries in China"]},{"druid":"druid:tc552kq0798","latest_change":"2013-11-11T23:34:29Z","title":["Album
|
50
|
+
D: Photographs of China''s natural landscapes, urban scenes, cultural landmarks,
|
51
|
+
social customs, and people."]},{"druid":"druid:th998nk0722","latest_change":"2013-11-11T23:34:29Z","title":["Album
|
52
|
+
C: Photographs of the Chinese Revolution of 1911 and the Shanghai Commercial
|
53
|
+
Press"]},{"druid":"druid:ww689vs6534","latest_change":"2013-11-11T23:34:29Z","title":["Album
|
54
|
+
B: Photographs of China''s natural landscapes, urban scenes, cultural landmarks,
|
55
|
+
social customs, and people."]}],"counts":[{"collection":1},{"item":5},{"total_count":6}]}'
|
56
|
+
http_version:
|
57
|
+
recorded_at: Wed, 22 Oct 2014 18:53:15 GMT
|
58
|
+
recorded_with: VCR 2.9.3
|
@@ -0,0 +1,58 @@
|
|
1
|
+
---
|
2
|
+
http_interactions:
|
3
|
+
- request:
|
4
|
+
method: get
|
5
|
+
uri: http://127.0.0.1:3000/collection/yg867hg1375
|
6
|
+
body:
|
7
|
+
encoding: US-ASCII
|
8
|
+
string: ''
|
9
|
+
headers:
|
10
|
+
Accept:
|
11
|
+
- '*/*'
|
12
|
+
User-Agent:
|
13
|
+
- Ruby
|
14
|
+
response:
|
15
|
+
status:
|
16
|
+
code: 200
|
17
|
+
message: 'OK '
|
18
|
+
headers:
|
19
|
+
X-Frame-Options:
|
20
|
+
- SAMEORIGIN
|
21
|
+
X-Xss-Protection:
|
22
|
+
- 1; mode=block
|
23
|
+
X-Content-Type-Options:
|
24
|
+
- nosniff
|
25
|
+
Content-Type:
|
26
|
+
- application/json; charset=utf-8
|
27
|
+
Etag:
|
28
|
+
- '"682afec57f678e4d153a5841b21395dd"'
|
29
|
+
Cache-Control:
|
30
|
+
- max-age=0, private, must-revalidate
|
31
|
+
X-Request-Id:
|
32
|
+
- d35b0793-e841-496b-bce1-720bfbf2ad04
|
33
|
+
X-Runtime:
|
34
|
+
- '0.006751'
|
35
|
+
Server:
|
36
|
+
- WEBrick/1.3.1 (Ruby/2.1.2/2014-05-08)
|
37
|
+
Date:
|
38
|
+
- Wed, 22 Oct 2014 20:32:36 GMT
|
39
|
+
Content-Length:
|
40
|
+
- '1121'
|
41
|
+
Connection:
|
42
|
+
- Keep-Alive
|
43
|
+
body:
|
44
|
+
encoding: US-ASCII
|
45
|
+
string: '{"collection":[{"druid":"druid:yg867hg1375","latest_change":"2013-11-11T23:34:29Z","title":["Francis
|
46
|
+
E. Stafford photographs, 1909-1933"]}],"item":[{"druid":"druid:jf275fd6276","latest_change":"2013-11-11T23:34:29Z","title":["Album
|
47
|
+
A: Photographs of China''s natural landscapes, urban scenes, cultural landmarks,
|
48
|
+
social customs, and people."]},{"druid":"druid:nz353cp1092","latest_change":"2013-11-11T23:34:29Z","title":["Album
|
49
|
+
E: Photographs of the Seventh Day Adventist Church missionaries in China"]},{"druid":"druid:tc552kq0798","latest_change":"2013-11-11T23:34:29Z","title":["Album
|
50
|
+
D: Photographs of China''s natural landscapes, urban scenes, cultural landmarks,
|
51
|
+
social customs, and people."]},{"druid":"druid:th998nk0722","latest_change":"2013-11-11T23:34:29Z","title":["Album
|
52
|
+
C: Photographs of the Chinese Revolution of 1911 and the Shanghai Commercial
|
53
|
+
Press"]},{"druid":"druid:ww689vs6534","latest_change":"2013-11-11T23:34:29Z","title":["Album
|
54
|
+
B: Photographs of China''s natural landscapes, urban scenes, cultural landmarks,
|
55
|
+
social customs, and people."]}],"counts":[{"collection":1},{"item":5},{"total_count":6}]}'
|
56
|
+
http_version:
|
57
|
+
recorded_at: Wed, 22 Oct 2014 20:32:36 GMT
|
58
|
+
recorded_with: VCR 2.9.3
|
@@ -0,0 +1,58 @@
|
|
1
|
+
---
|
2
|
+
http_interactions:
|
3
|
+
- request:
|
4
|
+
method: get
|
5
|
+
uri: http://127.0.0.1:3000/collection/yg867hg1375
|
6
|
+
body:
|
7
|
+
encoding: US-ASCII
|
8
|
+
string: ''
|
9
|
+
headers:
|
10
|
+
Accept:
|
11
|
+
- '*/*'
|
12
|
+
User-Agent:
|
13
|
+
- Ruby
|
14
|
+
response:
|
15
|
+
status:
|
16
|
+
code: 200
|
17
|
+
message: 'OK '
|
18
|
+
headers:
|
19
|
+
X-Frame-Options:
|
20
|
+
- SAMEORIGIN
|
21
|
+
X-Xss-Protection:
|
22
|
+
- 1; mode=block
|
23
|
+
X-Content-Type-Options:
|
24
|
+
- nosniff
|
25
|
+
Content-Type:
|
26
|
+
- application/json; charset=utf-8
|
27
|
+
Etag:
|
28
|
+
- '"682afec57f678e4d153a5841b21395dd"'
|
29
|
+
Cache-Control:
|
30
|
+
- max-age=0, private, must-revalidate
|
31
|
+
X-Request-Id:
|
32
|
+
- a631e18e-8396-4699-b7a9-fd05fd115e02
|
33
|
+
X-Runtime:
|
34
|
+
- '0.006491'
|
35
|
+
Server:
|
36
|
+
- WEBrick/1.3.1 (Ruby/2.1.2/2014-05-08)
|
37
|
+
Date:
|
38
|
+
- Wed, 22 Oct 2014 20:34:01 GMT
|
39
|
+
Content-Length:
|
40
|
+
- '1121'
|
41
|
+
Connection:
|
42
|
+
- Keep-Alive
|
43
|
+
body:
|
44
|
+
encoding: US-ASCII
|
45
|
+
string: '{"collection":[{"druid":"druid:yg867hg1375","latest_change":"2013-11-11T23:34:29Z","title":["Francis
|
46
|
+
E. Stafford photographs, 1909-1933"]}],"item":[{"druid":"druid:jf275fd6276","latest_change":"2013-11-11T23:34:29Z","title":["Album
|
47
|
+
A: Photographs of China''s natural landscapes, urban scenes, cultural landmarks,
|
48
|
+
social customs, and people."]},{"druid":"druid:nz353cp1092","latest_change":"2013-11-11T23:34:29Z","title":["Album
|
49
|
+
E: Photographs of the Seventh Day Adventist Church missionaries in China"]},{"druid":"druid:tc552kq0798","latest_change":"2013-11-11T23:34:29Z","title":["Album
|
50
|
+
D: Photographs of China''s natural landscapes, urban scenes, cultural landmarks,
|
51
|
+
social customs, and people."]},{"druid":"druid:th998nk0722","latest_change":"2013-11-11T23:34:29Z","title":["Album
|
52
|
+
C: Photographs of the Chinese Revolution of 1911 and the Shanghai Commercial
|
53
|
+
Press"]},{"druid":"druid:ww689vs6534","latest_change":"2013-11-11T23:34:29Z","title":["Album
|
54
|
+
B: Photographs of China''s natural landscapes, urban scenes, cultural landmarks,
|
55
|
+
social customs, and people."]}],"counts":[{"collection":1},{"item":5},{"total_count":6}]}'
|
56
|
+
http_version:
|
57
|
+
recorded_at: Wed, 22 Oct 2014 20:34:01 GMT
|
58
|
+
recorded_with: VCR 2.9.3
|
@@ -0,0 +1,58 @@
|
|
1
|
+
---
|
2
|
+
http_interactions:
|
3
|
+
- request:
|
4
|
+
method: get
|
5
|
+
uri: http://127.0.0.1:3000/collection/yg867hg1375
|
6
|
+
body:
|
7
|
+
encoding: US-ASCII
|
8
|
+
string: ''
|
9
|
+
headers:
|
10
|
+
Accept:
|
11
|
+
- '*/*'
|
12
|
+
User-Agent:
|
13
|
+
- Ruby
|
14
|
+
response:
|
15
|
+
status:
|
16
|
+
code: 200
|
17
|
+
message: 'OK '
|
18
|
+
headers:
|
19
|
+
X-Frame-Options:
|
20
|
+
- SAMEORIGIN
|
21
|
+
X-Xss-Protection:
|
22
|
+
- 1; mode=block
|
23
|
+
X-Content-Type-Options:
|
24
|
+
- nosniff
|
25
|
+
Content-Type:
|
26
|
+
- application/json; charset=utf-8
|
27
|
+
Etag:
|
28
|
+
- '"682afec57f678e4d153a5841b21395dd"'
|
29
|
+
Cache-Control:
|
30
|
+
- max-age=0, private, must-revalidate
|
31
|
+
X-Request-Id:
|
32
|
+
- 37413f0c-a104-4df1-8a80-1873389200f4
|
33
|
+
X-Runtime:
|
34
|
+
- '0.006754'
|
35
|
+
Server:
|
36
|
+
- WEBrick/1.3.1 (Ruby/2.1.2/2014-05-08)
|
37
|
+
Date:
|
38
|
+
- Wed, 22 Oct 2014 18:42:32 GMT
|
39
|
+
Content-Length:
|
40
|
+
- '1121'
|
41
|
+
Connection:
|
42
|
+
- Keep-Alive
|
43
|
+
body:
|
44
|
+
encoding: US-ASCII
|
45
|
+
string: '{"collection":[{"druid":"druid:yg867hg1375","latest_change":"2013-11-11T23:34:29Z","title":["Francis
|
46
|
+
E. Stafford photographs, 1909-1933"]}],"item":[{"druid":"druid:jf275fd6276","latest_change":"2013-11-11T23:34:29Z","title":["Album
|
47
|
+
A: Photographs of China''s natural landscapes, urban scenes, cultural landmarks,
|
48
|
+
social customs, and people."]},{"druid":"druid:nz353cp1092","latest_change":"2013-11-11T23:34:29Z","title":["Album
|
49
|
+
E: Photographs of the Seventh Day Adventist Church missionaries in China"]},{"druid":"druid:tc552kq0798","latest_change":"2013-11-11T23:34:29Z","title":["Album
|
50
|
+
D: Photographs of China''s natural landscapes, urban scenes, cultural landmarks,
|
51
|
+
social customs, and people."]},{"druid":"druid:th998nk0722","latest_change":"2013-11-11T23:34:29Z","title":["Album
|
52
|
+
C: Photographs of the Chinese Revolution of 1911 and the Shanghai Commercial
|
53
|
+
Press"]},{"druid":"druid:ww689vs6534","latest_change":"2013-11-11T23:34:29Z","title":["Album
|
54
|
+
B: Photographs of China''s natural landscapes, urban scenes, cultural landmarks,
|
55
|
+
social customs, and people."]}],"counts":[{"collection":1},{"item":5},{"total_count":6}]}'
|
56
|
+
http_version:
|
57
|
+
recorded_at: Wed, 22 Oct 2014 18:42:32 GMT
|
58
|
+
recorded_with: VCR 2.9.3
|
data/spec/spec_helper.rb
CHANGED
@@ -4,7 +4,8 @@ describe Harvestdor::Indexer do
|
|
4
4
|
|
5
5
|
before(:all) do
|
6
6
|
@config_yml_path = File.join(File.dirname(__FILE__), "..", "config", "ap.yml")
|
7
|
-
@
|
7
|
+
@client_config_path = File.join(File.dirname(__FILE__), "../..", "config", "dor-fetcher-client.yml")
|
8
|
+
@indexer = Harvestdor::Indexer.new(@config_yml_path, @client_config_path)
|
8
9
|
require 'yaml'
|
9
10
|
@yaml = YAML.load_file(@config_yml_path)
|
10
11
|
@hdor_client = @indexer.send(:harvestdor_client)
|
@@ -65,55 +66,85 @@ describe Harvestdor::Indexer do
|
|
65
66
|
:id => @fake_druid
|
66
67
|
}
|
67
68
|
end
|
68
|
-
it "should call
|
69
|
+
it "should call dor_fetcher_client.druid_array and then call :add on rsolr connection" do
|
69
70
|
@indexer.should_receive(:druids).and_return([@fake_druid])
|
70
71
|
@indexer.solr_client.should_receive(:add).with(@doc_hash)
|
71
72
|
@indexer.solr_client.should_receive(:commit)
|
72
73
|
@indexer.harvest_and_index
|
73
74
|
end
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
75
|
+
|
76
|
+
it "should only call :commit on rsolr connection once" do
|
77
|
+
VCR.use_cassette('single_rsolr_connection_call') do
|
78
|
+
indexer = Harvestdor::Indexer.new(@config_yml_path, @client_config_path)
|
79
|
+
hdor_client = indexer.send(:harvestdor_client)
|
80
|
+
indexer.dor_fetcher_client.should_receive(:druid_array).and_return(["druid:yg867hg1375", "druid:jf275fd6276", "druid:nz353cp1092", "druid:tc552kq0798", "druid:th998nk0722", "druid:ww689vs6534"])
|
81
|
+
indexer.solr_client.should_receive(:add).exactly(6).times
|
82
|
+
indexer.solr_client.should_receive(:commit).once
|
83
|
+
indexer.harvest_and_index
|
84
|
+
end
|
84
85
|
end
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
86
|
+
|
87
|
+
it "should not process druids in blacklist" do
|
88
|
+
VCR.use_cassette('ignore_druids_in_blacklist_call') do
|
89
|
+
indexer = Harvestdor::Indexer.new(@config_yml_path, @client_config_path, {:blacklist => @blacklist_path})
|
90
|
+
hdor_client = indexer.send(:harvestdor_client)
|
91
|
+
indexer.dor_fetcher_client.should_receive(:druid_array).and_return(["druid:yg867hg1375", "druid:jf275fd6276", "druid:nz353cp1092", "druid:tc552kq0798", "druid:th998nk0722", "druid:ww689vs6534"])
|
92
|
+
indexer.solr_client.should_receive(:add).with(hash_including({:id => 'druid:nz353cp1092'}))
|
93
|
+
indexer.solr_client.should_not_receive(:add).with(hash_including({:id => 'druid:jf275fd6276'}))
|
94
|
+
indexer.solr_client.should_not_receive(:add).with(hash_including({:id => 'druid:tc552kq0798'}))
|
95
|
+
indexer.solr_client.should_receive(:add).with(hash_including({:id => 'druid:th998nk0722'}))
|
96
|
+
indexer.solr_client.should_receive(:commit)
|
97
|
+
indexer.harvest_and_index
|
98
|
+
end
|
93
99
|
end
|
94
100
|
it "should not process druid if it is in both blacklist and whitelist" do
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
+
VCR.use_cassette('ignore_druids_in_blacklist_and_whitelist_call') do
|
102
|
+
indexer = Harvestdor::Indexer.new(@config_yml_path, @client_config_path, {:blacklist => @blacklist_path, :whitelist => @whitelist_path})
|
103
|
+
hdor_client = indexer.send(:harvestdor_client)
|
104
|
+
indexer.dor_fetcher_client.should_not_receive(:druid_array)
|
105
|
+
indexer.solr_client.should_receive(:add).with(hash_including({:id => 'druid:yg867hg1375'}))
|
106
|
+
indexer.solr_client.should_not_receive(:add).with(hash_including({:id => 'druid:jf275fd6276'}))
|
107
|
+
indexer.solr_client.should_receive(:commit)
|
108
|
+
indexer.harvest_and_index
|
109
|
+
end
|
101
110
|
end
|
102
|
-
it "should only
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
111
|
+
it "should only process druids in whitelist if it exists" do
|
112
|
+
VCR.use_cassette('process_druids_whitelist_call') do
|
113
|
+
indexer = Harvestdor::Indexer.new(@config_yml_path, @client_config_path, {:whitelist => @whitelist_path})
|
114
|
+
hdor_client = indexer.send(:harvestdor_client)
|
115
|
+
indexer.dor_fetcher_client.should_not_receive(:druid_array)
|
116
|
+
indexer.solr_client.should_receive(:add).with(hash_including({:id => 'druid:yg867hg1375'}))
|
117
|
+
indexer.solr_client.should_receive(:add).with(hash_including({:id => 'druid:jf275fd6276'}))
|
118
|
+
indexer.solr_client.should_receive(:add).with(hash_including({:id => 'druid:nz353cp1092'}))
|
119
|
+
indexer.solr_client.should_receive(:commit)
|
120
|
+
indexer.harvest_and_index
|
121
|
+
end
|
109
122
|
end
|
123
|
+
|
110
124
|
end
|
111
125
|
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
126
|
+
# Check for replacement of oai harvesting with dor-fetcher
|
127
|
+
context "replacing OAI harvesting with dor-fetcher" do
|
128
|
+
it "has a dor-fetcher client" do
|
129
|
+
expect(@indexer.dor_fetcher_client).to be_an_instance_of(DorFetcher::Client)
|
130
|
+
end
|
131
|
+
|
132
|
+
it "should strip off is_member_of_collection_ and is_governed_by_ and return only the druid" do
|
133
|
+
expect(@indexer.strip_default_set_string()).to eq("yg867hg1375")
|
134
|
+
end
|
135
|
+
|
136
|
+
it "druids method should call druid_array and get_collection methods on fetcher_client" do
|
137
|
+
VCR.use_cassette('get_collection_druids_call') do
|
138
|
+
expect(@indexer.druids).to eq(["druid:yg867hg1375", "druid:jf275fd6276", "druid:nz353cp1092", "druid:tc552kq0798", "druid:th998nk0722", "druid:ww689vs6534"])
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
it "should get the configuration of the dor-fetcher client from included yml file" do
|
143
|
+
expect(@indexer.dor_fetcher_client.service_url).to eq(@indexer.client_config["dor_fetcher_service_url"])
|
144
|
+
end
|
145
|
+
|
146
|
+
end # ending replacing OAI context
|
147
|
+
|
117
148
|
context "smods_rec method" do
|
118
149
|
before(:all) do
|
119
150
|
@fake_druid = 'oo000oo0000'
|
@@ -134,7 +165,9 @@ describe Harvestdor::Indexer do
|
|
134
165
|
expect { @indexer.smods_rec(@fake_druid) }.to raise_error(RuntimeError, Regexp.new("^Empty MODS metadata for #{@fake_druid}: <"))
|
135
166
|
end
|
136
167
|
it "should raise exception if there is no MODS xml for the druid" do
|
137
|
-
|
168
|
+
VCR.use_cassette('exception_no_MODS_call') do
|
169
|
+
expect { @indexer.smods_rec(@fake_druid) }.to raise_error(Harvestdor::Errors::MissingMods)
|
170
|
+
end
|
138
171
|
end
|
139
172
|
end
|
140
173
|
|
@@ -253,30 +286,32 @@ describe Harvestdor::Indexer do
|
|
253
286
|
@indexer.send(:blacklist).size.should == 2
|
254
287
|
end
|
255
288
|
it "should be empty Array if there was no blacklist config setting" do
|
256
|
-
indexer = Harvestdor::Indexer.new(@config_yml_path)
|
289
|
+
indexer = Harvestdor::Indexer.new(@config_yml_path, @client_config_path)
|
257
290
|
indexer.send(:blacklist).should == []
|
258
291
|
end
|
259
292
|
context "load_blacklist" do
|
260
293
|
it "should not be called if there was no blacklist config setting" do
|
261
|
-
|
294
|
+
VCR.use_cassette('no_blacklist_config_call') do
|
295
|
+
indexer = Harvestdor::Indexer.new(@config_yml_path, @client_config_path)
|
262
296
|
|
263
|
-
|
297
|
+
indexer.should_not_receive(:load_blacklist)
|
264
298
|
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
299
|
+
hdor_client = indexer.send(:harvestdor_client)
|
300
|
+
indexer.dor_fetcher_client.should_receive(:druid_array).and_return([@fake_druid])
|
301
|
+
indexer.solr_client.should_receive(:add)
|
302
|
+
indexer.solr_client.should_receive(:commit)
|
303
|
+
indexer.harvest_and_index
|
304
|
+
end
|
270
305
|
end
|
271
306
|
it "should only try to load a blacklist once" do
|
272
|
-
indexer = Harvestdor::Indexer.new(@config_yml_path, {:blacklist => @blacklist_path})
|
307
|
+
indexer = Harvestdor::Indexer.new(@config_yml_path, @client_config_path, {:blacklist => @blacklist_path})
|
273
308
|
indexer.send(:blacklist)
|
274
309
|
File.any_instance.should_not_receive(:open)
|
275
310
|
indexer.send(:blacklist)
|
276
311
|
end
|
277
312
|
it "should log an error message and throw RuntimeError if it can't find the indicated blacklist file" do
|
278
313
|
exp_msg = 'Unable to find list of druids at bad_path'
|
279
|
-
indexer = Harvestdor::Indexer.new(@config_yml_path, {:blacklist => 'bad_path'})
|
314
|
+
indexer = Harvestdor::Indexer.new(@config_yml_path, @client_config_path, {:blacklist => 'bad_path'})
|
280
315
|
indexer.logger.should_receive(:fatal).with(exp_msg)
|
281
316
|
expect { indexer.send(:load_blacklist, 'bad_path') }.to raise_error(exp_msg)
|
282
317
|
end
|
@@ -287,33 +322,35 @@ describe Harvestdor::Indexer do
|
|
287
322
|
it "should be an Array with an entry for each non-empty line in the file" do
|
288
323
|
@indexer.send(:load_whitelist, @whitelist_path)
|
289
324
|
@indexer.send(:whitelist).should be_an_instance_of(Array)
|
290
|
-
@indexer.send(:whitelist).size.should ==
|
325
|
+
@indexer.send(:whitelist).size.should == 3
|
291
326
|
end
|
292
327
|
it "should be empty Array if there was no whitelist config setting" do
|
293
|
-
indexer = Harvestdor::Indexer.new(@config_yml_path)
|
328
|
+
indexer = Harvestdor::Indexer.new(@config_yml_path, @client_config_path)
|
294
329
|
indexer.send(:whitelist).should == []
|
295
330
|
end
|
296
331
|
context "load_whitelist" do
|
297
332
|
it "should not be called if there was no whitelist config setting" do
|
298
|
-
|
333
|
+
VCR.use_cassette('no_whitelist_config_call') do
|
334
|
+
indexer = Harvestdor::Indexer.new(@config_yml_path, @client_config_path)
|
299
335
|
|
300
|
-
|
336
|
+
indexer.should_not_receive(:load_whitelist)
|
301
337
|
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
338
|
+
hdor_client = indexer.send(:harvestdor_client)
|
339
|
+
indexer.dor_fetcher_client.should_receive(:druid_array).and_return([@fake_druid])
|
340
|
+
indexer.solr_client.should_receive(:add)
|
341
|
+
indexer.solr_client.should_receive(:commit)
|
342
|
+
indexer.harvest_and_index
|
343
|
+
end
|
307
344
|
end
|
308
345
|
it "should only try to load a whitelist once" do
|
309
|
-
indexer = Harvestdor::Indexer.new(@config_yml_path, {:whitelist => @whitelist_path})
|
346
|
+
indexer = Harvestdor::Indexer.new(@config_yml_path, @client_config_path, {:whitelist => @whitelist_path})
|
310
347
|
indexer.send(:whitelist)
|
311
348
|
File.any_instance.should_not_receive(:open)
|
312
349
|
indexer.send(:whitelist)
|
313
350
|
end
|
314
351
|
it "should log an error message and throw RuntimeError if it can't find the indicated whitelist file" do
|
315
352
|
exp_msg = 'Unable to find list of druids at bad_path'
|
316
|
-
indexer = Harvestdor::Indexer.new(@config_yml_path, {:whitelist => 'bad_path'})
|
353
|
+
indexer = Harvestdor::Indexer.new(@config_yml_path, @client_config_path, {:whitelist => 'bad_path'})
|
317
354
|
indexer.logger.should_receive(:fatal).with(exp_msg)
|
318
355
|
expect { indexer.send(:load_whitelist, 'bad_path') }.to raise_error(exp_msg)
|
319
356
|
end
|
@@ -321,7 +358,7 @@ describe Harvestdor::Indexer do
|
|
321
358
|
end # whitelist
|
322
359
|
|
323
360
|
it "solr_client should initialize the rsolr client using the options from the config" do
|
324
|
-
indexer = Harvestdor::Indexer.new(nil, Confstruct::Configuration.new(:solr => { :url => 'http://localhost:2345', :a => 1 }) )
|
361
|
+
indexer = Harvestdor::Indexer.new(nil, @client_config_path, Confstruct::Configuration.new(:solr => { :url => 'http://localhost:2345', :a => 1 }) )
|
325
362
|
RSolr.should_receive(:connect).with(hash_including(:a => 1, :url => 'http://localhost:2345')).and_return('foo')
|
326
363
|
indexer.solr_client
|
327
364
|
end
|
metadata
CHANGED
@@ -1,206 +1,263 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: harvestdor-indexer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naomi Dushay
|
8
|
-
|
8
|
+
- Bess Sadler
|
9
|
+
- Laney McGlohon
|
10
|
+
autorequire:
|
9
11
|
bindir: bin
|
10
12
|
cert_chain: []
|
11
|
-
date: 2014-
|
13
|
+
date: 2014-10-24 00:00:00.000000000 Z
|
12
14
|
dependencies:
|
13
15
|
- !ruby/object:Gem::Dependency
|
14
16
|
name: rsolr
|
17
|
+
version_requirements: !ruby/object:Gem::Requirement
|
18
|
+
requirements:
|
19
|
+
- - '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
15
22
|
requirement: !ruby/object:Gem::Requirement
|
16
23
|
requirements:
|
17
|
-
- -
|
24
|
+
- - '>='
|
18
25
|
- !ruby/object:Gem::Version
|
19
26
|
version: '0'
|
20
|
-
type: :runtime
|
21
27
|
prerelease: false
|
28
|
+
type: :runtime
|
29
|
+
- !ruby/object:Gem::Dependency
|
30
|
+
name: retries
|
22
31
|
version_requirements: !ruby/object:Gem::Requirement
|
23
32
|
requirements:
|
24
|
-
- -
|
33
|
+
- - '>='
|
25
34
|
- !ruby/object:Gem::Version
|
26
35
|
version: '0'
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: retries
|
29
36
|
requirement: !ruby/object:Gem::Requirement
|
30
37
|
requirements:
|
31
|
-
- -
|
38
|
+
- - '>='
|
32
39
|
- !ruby/object:Gem::Version
|
33
40
|
version: '0'
|
34
|
-
type: :runtime
|
35
41
|
prerelease: false
|
42
|
+
type: :runtime
|
43
|
+
- !ruby/object:Gem::Dependency
|
44
|
+
name: harvestdor
|
36
45
|
version_requirements: !ruby/object:Gem::Requirement
|
37
46
|
requirements:
|
38
|
-
- -
|
47
|
+
- - '>='
|
39
48
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: harvestdor
|
49
|
+
version: 0.0.14
|
43
50
|
requirement: !ruby/object:Gem::Requirement
|
44
51
|
requirements:
|
45
|
-
- -
|
52
|
+
- - '>='
|
46
53
|
- !ruby/object:Gem::Version
|
47
54
|
version: 0.0.14
|
48
|
-
type: :runtime
|
49
55
|
prerelease: false
|
56
|
+
type: :runtime
|
57
|
+
- !ruby/object:Gem::Dependency
|
58
|
+
name: stanford-mods
|
50
59
|
version_requirements: !ruby/object:Gem::Requirement
|
51
60
|
requirements:
|
52
|
-
- -
|
61
|
+
- - '>='
|
53
62
|
- !ruby/object:Gem::Version
|
54
|
-
version: 0
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: stanford-mods
|
63
|
+
version: '0'
|
57
64
|
requirement: !ruby/object:Gem::Requirement
|
58
65
|
requirements:
|
59
|
-
- -
|
66
|
+
- - '>='
|
60
67
|
- !ruby/object:Gem::Version
|
61
68
|
version: '0'
|
62
|
-
type: :runtime
|
63
69
|
prerelease: false
|
70
|
+
type: :runtime
|
71
|
+
- !ruby/object:Gem::Dependency
|
72
|
+
name: dor-fetcher
|
64
73
|
version_requirements: !ruby/object:Gem::Requirement
|
65
74
|
requirements:
|
66
|
-
- -
|
75
|
+
- - '>='
|
67
76
|
- !ruby/object:Gem::Version
|
68
|
-
version:
|
77
|
+
version: 1.0.0
|
78
|
+
requirement: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - '>='
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 1.0.0
|
83
|
+
prerelease: false
|
84
|
+
type: :runtime
|
69
85
|
- !ruby/object:Gem::Dependency
|
70
86
|
name: confstruct
|
87
|
+
version_requirements: !ruby/object:Gem::Requirement
|
88
|
+
requirements:
|
89
|
+
- - '>='
|
90
|
+
- !ruby/object:Gem::Version
|
91
|
+
version: '0'
|
71
92
|
requirement: !ruby/object:Gem::Requirement
|
72
93
|
requirements:
|
73
|
-
- -
|
94
|
+
- - '>='
|
74
95
|
- !ruby/object:Gem::Version
|
75
96
|
version: '0'
|
76
|
-
type: :runtime
|
77
97
|
prerelease: false
|
98
|
+
type: :runtime
|
99
|
+
- !ruby/object:Gem::Dependency
|
100
|
+
name: rake
|
78
101
|
version_requirements: !ruby/object:Gem::Requirement
|
79
102
|
requirements:
|
80
|
-
- -
|
103
|
+
- - '>='
|
81
104
|
- !ruby/object:Gem::Version
|
82
105
|
version: '0'
|
83
|
-
- !ruby/object:Gem::Dependency
|
84
|
-
name: rake
|
85
106
|
requirement: !ruby/object:Gem::Requirement
|
86
107
|
requirements:
|
87
|
-
- -
|
108
|
+
- - '>='
|
88
109
|
- !ruby/object:Gem::Version
|
89
110
|
version: '0'
|
90
|
-
type: :development
|
91
111
|
prerelease: false
|
112
|
+
type: :development
|
113
|
+
- !ruby/object:Gem::Dependency
|
114
|
+
name: rdoc
|
92
115
|
version_requirements: !ruby/object:Gem::Requirement
|
93
116
|
requirements:
|
94
|
-
- -
|
117
|
+
- - '>='
|
95
118
|
- !ruby/object:Gem::Version
|
96
119
|
version: '0'
|
97
|
-
- !ruby/object:Gem::Dependency
|
98
|
-
name: rdoc
|
99
120
|
requirement: !ruby/object:Gem::Requirement
|
100
121
|
requirements:
|
101
|
-
- -
|
122
|
+
- - '>='
|
102
123
|
- !ruby/object:Gem::Version
|
103
124
|
version: '0'
|
104
|
-
type: :development
|
105
125
|
prerelease: false
|
126
|
+
type: :development
|
127
|
+
- !ruby/object:Gem::Dependency
|
128
|
+
name: yard
|
106
129
|
version_requirements: !ruby/object:Gem::Requirement
|
107
130
|
requirements:
|
108
|
-
- -
|
131
|
+
- - '>='
|
109
132
|
- !ruby/object:Gem::Version
|
110
133
|
version: '0'
|
111
|
-
- !ruby/object:Gem::Dependency
|
112
|
-
name: yard
|
113
134
|
requirement: !ruby/object:Gem::Requirement
|
114
135
|
requirements:
|
115
|
-
- -
|
136
|
+
- - '>='
|
116
137
|
- !ruby/object:Gem::Version
|
117
138
|
version: '0'
|
118
|
-
type: :development
|
119
139
|
prerelease: false
|
140
|
+
type: :development
|
141
|
+
- !ruby/object:Gem::Dependency
|
142
|
+
name: rspec
|
120
143
|
version_requirements: !ruby/object:Gem::Requirement
|
121
144
|
requirements:
|
122
|
-
- -
|
145
|
+
- - '>='
|
123
146
|
- !ruby/object:Gem::Version
|
124
147
|
version: '0'
|
125
|
-
- !ruby/object:Gem::Dependency
|
126
|
-
name: rspec
|
127
148
|
requirement: !ruby/object:Gem::Requirement
|
128
149
|
requirements:
|
129
|
-
- -
|
150
|
+
- - '>='
|
130
151
|
- !ruby/object:Gem::Version
|
131
152
|
version: '0'
|
132
|
-
type: :development
|
133
153
|
prerelease: false
|
154
|
+
type: :development
|
155
|
+
- !ruby/object:Gem::Dependency
|
156
|
+
name: coveralls
|
134
157
|
version_requirements: !ruby/object:Gem::Requirement
|
135
158
|
requirements:
|
136
|
-
- -
|
159
|
+
- - '>='
|
137
160
|
- !ruby/object:Gem::Version
|
138
161
|
version: '0'
|
139
|
-
- !ruby/object:Gem::Dependency
|
140
|
-
name: coveralls
|
141
162
|
requirement: !ruby/object:Gem::Requirement
|
142
163
|
requirements:
|
143
|
-
- -
|
164
|
+
- - '>='
|
144
165
|
- !ruby/object:Gem::Version
|
145
166
|
version: '0'
|
167
|
+
prerelease: false
|
146
168
|
type: :development
|
169
|
+
- !ruby/object:Gem::Dependency
|
170
|
+
name: vcr
|
171
|
+
version_requirements: !ruby/object:Gem::Requirement
|
172
|
+
requirements:
|
173
|
+
- - '>='
|
174
|
+
- !ruby/object:Gem::Version
|
175
|
+
version: '0'
|
176
|
+
requirement: !ruby/object:Gem::Requirement
|
177
|
+
requirements:
|
178
|
+
- - '>='
|
179
|
+
- !ruby/object:Gem::Version
|
180
|
+
version: '0'
|
147
181
|
prerelease: false
|
182
|
+
type: :development
|
183
|
+
- !ruby/object:Gem::Dependency
|
184
|
+
name: webmock
|
148
185
|
version_requirements: !ruby/object:Gem::Requirement
|
149
186
|
requirements:
|
150
|
-
- -
|
187
|
+
- - '>='
|
188
|
+
- !ruby/object:Gem::Version
|
189
|
+
version: '0'
|
190
|
+
requirement: !ruby/object:Gem::Requirement
|
191
|
+
requirements:
|
192
|
+
- - '>='
|
151
193
|
- !ruby/object:Gem::Version
|
152
194
|
version: '0'
|
153
|
-
|
154
|
-
|
155
|
-
|
195
|
+
prerelease: false
|
196
|
+
type: :development
|
197
|
+
description: Harvest DOR object metadata via a relationship (e.g. hydra:isGovernedBy rdf:resource="info:fedora/druid:hy787xj5878") and dates, plus code framework to write Solr docs to index
|
156
198
|
email:
|
157
199
|
- ndushay@stanford.edu
|
200
|
+
- bess@stanford.edu
|
201
|
+
- laneymcg@stanford.edu
|
158
202
|
executables: []
|
159
203
|
extensions: []
|
160
204
|
extra_rdoc_files: []
|
161
205
|
files:
|
162
|
-
-
|
163
|
-
-
|
164
|
-
-
|
206
|
+
- .gitignore
|
207
|
+
- .travis.yml
|
208
|
+
- .yardopts
|
165
209
|
- Gemfile
|
166
210
|
- LICENSE.txt
|
167
211
|
- README.rdoc
|
168
212
|
- Rakefile
|
213
|
+
- config/dor-fetcher-client.yml
|
169
214
|
- harvestdor-indexer.gemspec
|
170
215
|
- lib/harvestdor-indexer.rb
|
171
216
|
- lib/harvestdor-indexer/version.rb
|
172
217
|
- spec/config/ap.yml
|
173
218
|
- spec/config/ap_blacklist.txt
|
174
219
|
- spec/config/ap_whitelist.txt
|
220
|
+
- spec/fixtures/vcr_cassettes/exception_no_MODS_call.yml
|
221
|
+
- spec/fixtures/vcr_cassettes/get_collection_druids_call.yml
|
222
|
+
- spec/fixtures/vcr_cassettes/ignore_druids_in_blacklist_call.yml
|
223
|
+
- spec/fixtures/vcr_cassettes/no_blacklist_config_call.yml
|
224
|
+
- spec/fixtures/vcr_cassettes/no_whitelist_config_call.yml
|
225
|
+
- spec/fixtures/vcr_cassettes/single_rsolr_connection_call.yml
|
175
226
|
- spec/spec_helper.rb
|
176
227
|
- spec/unit/harvestdor-indexer_spec.rb
|
177
228
|
homepage: https://consul.stanford.edu/display/chimera/Chimera+project
|
178
229
|
licenses: []
|
179
230
|
metadata: {}
|
180
|
-
post_install_message:
|
231
|
+
post_install_message:
|
181
232
|
rdoc_options: []
|
182
233
|
require_paths:
|
183
234
|
- lib
|
184
235
|
required_ruby_version: !ruby/object:Gem::Requirement
|
185
236
|
requirements:
|
186
|
-
- -
|
237
|
+
- - '>='
|
187
238
|
- !ruby/object:Gem::Version
|
188
239
|
version: '0'
|
189
240
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
190
241
|
requirements:
|
191
|
-
- -
|
242
|
+
- - '>='
|
192
243
|
- !ruby/object:Gem::Version
|
193
244
|
version: '0'
|
194
245
|
requirements: []
|
195
|
-
rubyforge_project:
|
246
|
+
rubyforge_project:
|
196
247
|
rubygems_version: 2.2.2
|
197
|
-
signing_key:
|
248
|
+
signing_key:
|
198
249
|
specification_version: 4
|
199
250
|
summary: Harvest DOR object metadata and index it to Solr
|
200
251
|
test_files:
|
201
252
|
- spec/config/ap.yml
|
202
253
|
- spec/config/ap_blacklist.txt
|
203
254
|
- spec/config/ap_whitelist.txt
|
255
|
+
- spec/fixtures/vcr_cassettes/exception_no_MODS_call.yml
|
256
|
+
- spec/fixtures/vcr_cassettes/get_collection_druids_call.yml
|
257
|
+
- spec/fixtures/vcr_cassettes/ignore_druids_in_blacklist_call.yml
|
258
|
+
- spec/fixtures/vcr_cassettes/no_blacklist_config_call.yml
|
259
|
+
- spec/fixtures/vcr_cassettes/no_whitelist_config_call.yml
|
260
|
+
- spec/fixtures/vcr_cassettes/single_rsolr_connection_call.yml
|
204
261
|
- spec/spec_helper.rb
|
205
262
|
- spec/unit/harvestdor-indexer_spec.rb
|
206
|
-
has_rdoc:
|
263
|
+
has_rdoc:
|