harvestdor-indexer 1.0.0 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. checksums.yaml +4 -4
  2. data/README.rdoc +1 -0
  3. data/harvestdor-indexer.gemspec +1 -1
  4. data/lib/harvestdor-indexer.rb +12 -2
  5. data/lib/harvestdor-indexer/version.rb +1 -1
  6. data/spec/fixtures/vcr_cassettes/before_all_call.yml +48 -0
  7. data/spec/fixtures/vcr_cassettes/cant_find_whitelist_call.yml +48 -0
  8. data/spec/fixtures/vcr_cassettes/empty_array_no_blacklist_config_call.yml +48 -0
  9. data/spec/fixtures/vcr_cassettes/empty_array_no_whitelist_config_call.yml +48 -0
  10. data/spec/fixtures/vcr_cassettes/get_collection_druids_call.yml +18 -20
  11. data/spec/fixtures/vcr_cassettes/ignore_druids_in_blacklist_and_whitelist_call.yml +48 -0
  12. data/spec/fixtures/vcr_cassettes/ignore_druids_in_blacklist_call.yml +61 -20
  13. data/spec/fixtures/vcr_cassettes/know_what_is_in_blacklist_call.yml +46 -0
  14. data/spec/fixtures/vcr_cassettes/know_what_is_in_whitelist_call.yml +46 -0
  15. data/spec/fixtures/vcr_cassettes/load_blacklist_once_call.yml +48 -0
  16. data/spec/fixtures/vcr_cassettes/load_whitelist_once_call.yml +48 -0
  17. data/spec/fixtures/vcr_cassettes/no_blacklist_config_call.yml +61 -20
  18. data/spec/fixtures/vcr_cassettes/no_blacklist_found_call.yml +48 -0
  19. data/spec/fixtures/vcr_cassettes/no_whitelist_config_call.yml +61 -20
  20. data/spec/fixtures/vcr_cassettes/process_druids_whitelist_call.yml +48 -0
  21. data/spec/fixtures/vcr_cassettes/rsolr_client_config_call.yml +48 -0
  22. data/spec/fixtures/vcr_cassettes/single_rsolr_connection_call.yml +61 -20
  23. data/spec/unit/harvestdor-indexer_spec.rb +114 -72
  24. metadata +30 -6
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 45d033dffd56d8c3abd90ccafd0fa4666b4379da
4
- data.tar.gz: f579c955fb390f2d7ed497aa237dbe763361d60b
3
+ metadata.gz: 8eb186d19cd3f8c6051dce74feaffa9d8bafcbe8
4
+ data.tar.gz: f8be614289308a8752c615b66a286139f45a684b
5
5
  SHA512:
6
- metadata.gz: a7e9213e27145df273eaee8384b690e9ae720aeffd7f781f64dec9bc13948c6716b942499305037c7210c59ae9b5f07747f5ff67e4fe315577a78bce7d530a81
7
- data.tar.gz: 896d1c26f157087e2bd6f935e0106024e9a7840cdb5c5db05e6a348ff1fe0579410bc70895b26b3db60a6461883bfd5bfe4a8ee43483d0f8821bf73cad4c2b1c
6
+ metadata.gz: 9949e15b1ebe035ee7317cbc9832e2b8fb3b8feaff155f5143f366a7d47829f8402f991fd78d5167d2a94c0f38994ff366eaff1f2b1d9bfa652b9ee2f8801d73
7
+ data.tar.gz: a382a7f2698a1a3001a4cb82a17fc5683d5a7d5305ba212717b1b92c758f4edc0c6564000f4ade2872ee8f0acc1dc7b552f9ccdf02f6bdc3749c2bcdc5deae79
data/README.rdoc CHANGED
@@ -118,6 +118,7 @@ I suggest you run your code on harvestdor-dev, as it is already set up to be abl
118
118
 
119
119
  == Releases
120
120
 
121
+ * <b>1.0.3</b> Implemented class level config so anything that inherits from Harvestdor::Indexer can share configuration settings
121
122
  * <b>1.0.0</b> Replaced OAI harvesting mechanism with dor-fetcher
122
123
  * <b>0.0.13</b> Upgrade to latest faraday HTTP client syntax; Use retries gem (https://github.com/ooyala/retries) to make retrying of index process more robust
123
124
  * <b>0.0.12</b> fix total_object nil error
@@ -21,7 +21,7 @@ Gem::Specification.new do |gem|
21
21
  gem.add_dependency 'retries'
22
22
  gem.add_dependency 'harvestdor', '>=0.0.14'
23
23
  gem.add_dependency 'stanford-mods'
24
- gem.add_dependency 'dor-fetcher', '>=1.0.0'
24
+ gem.add_dependency 'dor-fetcher', '=1.0.5'
25
25
 
26
26
  # Runtime dependencies
27
27
  gem.add_runtime_dependency 'confstruct'
@@ -22,6 +22,10 @@ module Harvestdor
22
22
  attr_accessor :total_time_to_parse,:total_time_to_solr
23
23
  attr_accessor :dor_fetcher_client, :client_config
24
24
 
25
+
26
+ # Class level config variable
27
+ @@config ||= Confstruct::Configuration.new()
28
+
25
29
  def initialize yml_path, client_config_path, options = {}
26
30
  @success_count=0 # the number of objects successfully indexed
27
31
  @error_count=0 # the number of objects that failed
@@ -36,8 +40,14 @@ module Harvestdor
36
40
  @dor_fetcher_client=DorFetcher::Client.new({:service_url => client_config["dor_fetcher_service_url"]})
37
41
  end
38
42
 
43
+ # to allow class level access to config variables for record_merger and solr_doc_builder
44
+ # (rather than passing a lot of params to constructor)
45
+ def self.config
46
+ @@config ||= Confstruct::Configuration.new()
47
+ end
48
+
39
49
  def config
40
- @config ||= Confstruct::Configuration.new()
50
+ Indexer.config
41
51
  end
42
52
 
43
53
  def logger
@@ -232,7 +242,7 @@ module Harvestdor
232
242
  # Get only the druid from the end of the default_set string
233
243
  # from the yml file
234
244
  def strip_default_set_string()
235
- @config.default_set.split('_').last
245
+ Indexer.config.default_set.split('_').last
236
246
  end
237
247
 
238
248
  protected #---------------------------------------------------------------------
@@ -1,6 +1,6 @@
1
1
  module Harvestdor
2
2
  class Indexer
3
3
  # this is the Ruby Gem version
4
- VERSION = "1.0.0"
4
+ VERSION = "1.0.3"
5
5
  end
6
6
  end
@@ -0,0 +1,48 @@
1
+ ---
2
+ http_interactions:
3
+ - request:
4
+ method: get
5
+ uri: http://127.0.0.1:3000/
6
+ body:
7
+ encoding: US-ASCII
8
+ string: ''
9
+ headers:
10
+ Accept:
11
+ - '*/*; q=0.5, application/xml'
12
+ Accept-Encoding:
13
+ - gzip, deflate
14
+ User-Agent:
15
+ - Ruby
16
+ response:
17
+ status:
18
+ code: 200
19
+ message: OK
20
+ headers:
21
+ X-Frame-Options:
22
+ - SAMEORIGIN
23
+ X-Xss-Protection:
24
+ - 1; mode=block
25
+ X-Content-Type-Options:
26
+ - nosniff
27
+ Content-Type:
28
+ - application/xml; charset=utf-8
29
+ Etag:
30
+ - '"444bcb3a3fcf8389296c49467f27e1d6"'
31
+ Cache-Control:
32
+ - max-age=0, private, must-revalidate
33
+ X-Meta-Request-Version:
34
+ - 0.3.4
35
+ X-Request-Id:
36
+ - e72ddd14-0ca2-4b1b-bad6-1ff7d4fc90d6
37
+ X-Runtime:
38
+ - '0.186248'
39
+ Connection:
40
+ - close
41
+ Server:
42
+ - thin 1.6.2 codename Doc Brown
43
+ body:
44
+ encoding: US-ASCII
45
+ string: ok
46
+ http_version:
47
+ recorded_at: Wed, 12 Nov 2014 18:46:07 GMT
48
+ recorded_with: VCR 2.9.3
@@ -0,0 +1,48 @@
1
+ ---
2
+ http_interactions:
3
+ - request:
4
+ method: get
5
+ uri: http://127.0.0.1:3000/
6
+ body:
7
+ encoding: US-ASCII
8
+ string: ''
9
+ headers:
10
+ Accept:
11
+ - '*/*; q=0.5, application/xml'
12
+ Accept-Encoding:
13
+ - gzip, deflate
14
+ User-Agent:
15
+ - Ruby
16
+ response:
17
+ status:
18
+ code: 200
19
+ message: OK
20
+ headers:
21
+ X-Frame-Options:
22
+ - SAMEORIGIN
23
+ X-Xss-Protection:
24
+ - 1; mode=block
25
+ X-Content-Type-Options:
26
+ - nosniff
27
+ Content-Type:
28
+ - application/xml; charset=utf-8
29
+ Etag:
30
+ - '"444bcb3a3fcf8389296c49467f27e1d6"'
31
+ Cache-Control:
32
+ - max-age=0, private, must-revalidate
33
+ X-Meta-Request-Version:
34
+ - 0.3.4
35
+ X-Request-Id:
36
+ - 3ee1c4c2-6bc7-41f3-a19a-6466c6f3ee6b
37
+ X-Runtime:
38
+ - '0.004152'
39
+ Connection:
40
+ - close
41
+ Server:
42
+ - thin 1.6.2 codename Doc Brown
43
+ body:
44
+ encoding: US-ASCII
45
+ string: ok
46
+ http_version:
47
+ recorded_at: Wed, 12 Nov 2014 18:48:31 GMT
48
+ recorded_with: VCR 2.9.3
@@ -0,0 +1,48 @@
1
+ ---
2
+ http_interactions:
3
+ - request:
4
+ method: get
5
+ uri: http://127.0.0.1:3000/
6
+ body:
7
+ encoding: US-ASCII
8
+ string: ''
9
+ headers:
10
+ Accept:
11
+ - '*/*; q=0.5, application/xml'
12
+ Accept-Encoding:
13
+ - gzip, deflate
14
+ User-Agent:
15
+ - Ruby
16
+ response:
17
+ status:
18
+ code: 200
19
+ message: OK
20
+ headers:
21
+ X-Frame-Options:
22
+ - SAMEORIGIN
23
+ X-Xss-Protection:
24
+ - 1; mode=block
25
+ X-Content-Type-Options:
26
+ - nosniff
27
+ Content-Type:
28
+ - application/xml; charset=utf-8
29
+ Etag:
30
+ - '"444bcb3a3fcf8389296c49467f27e1d6"'
31
+ Cache-Control:
32
+ - max-age=0, private, must-revalidate
33
+ X-Meta-Request-Version:
34
+ - 0.3.4
35
+ X-Request-Id:
36
+ - 1acd71cc-22d9-4c18-beda-193b44b07922
37
+ X-Runtime:
38
+ - '0.003821'
39
+ Connection:
40
+ - close
41
+ Server:
42
+ - thin 1.6.2 codename Doc Brown
43
+ body:
44
+ encoding: US-ASCII
45
+ string: ok
46
+ http_version:
47
+ recorded_at: Wed, 12 Nov 2014 18:53:12 GMT
48
+ recorded_with: VCR 2.9.3
@@ -0,0 +1,48 @@
1
+ ---
2
+ http_interactions:
3
+ - request:
4
+ method: get
5
+ uri: http://127.0.0.1:3000/
6
+ body:
7
+ encoding: US-ASCII
8
+ string: ''
9
+ headers:
10
+ Accept:
11
+ - '*/*; q=0.5, application/xml'
12
+ Accept-Encoding:
13
+ - gzip, deflate
14
+ User-Agent:
15
+ - Ruby
16
+ response:
17
+ status:
18
+ code: 200
19
+ message: OK
20
+ headers:
21
+ X-Frame-Options:
22
+ - SAMEORIGIN
23
+ X-Xss-Protection:
24
+ - 1; mode=block
25
+ X-Content-Type-Options:
26
+ - nosniff
27
+ Content-Type:
28
+ - application/xml; charset=utf-8
29
+ Etag:
30
+ - '"444bcb3a3fcf8389296c49467f27e1d6"'
31
+ Cache-Control:
32
+ - max-age=0, private, must-revalidate
33
+ X-Meta-Request-Version:
34
+ - 0.3.4
35
+ X-Request-Id:
36
+ - 02a8b8e5-2dae-469b-8613-3d5e75ea7564
37
+ X-Runtime:
38
+ - '0.006424'
39
+ Connection:
40
+ - close
41
+ Server:
42
+ - thin 1.6.2 codename Doc Brown
43
+ body:
44
+ encoding: US-ASCII
45
+ string: ok
46
+ http_version:
47
+ recorded_at: Wed, 12 Nov 2014 18:53:12 GMT
48
+ recorded_with: VCR 2.9.3
@@ -2,7 +2,7 @@
2
2
  http_interactions:
3
3
  - request:
4
4
  method: get
5
- uri: http://127.0.0.1:3000/collection/yg867hg1375
5
+ uri: http://127.0.0.1:3000/collections/yg867hg1375
6
6
  body:
7
7
  encoding: US-ASCII
8
8
  string: ''
@@ -14,7 +14,7 @@ http_interactions:
14
14
  response:
15
15
  status:
16
16
  code: 200
17
- message: 'OK '
17
+ message: OK
18
18
  headers:
19
19
  X-Frame-Options:
20
20
  - SAMEORIGIN
@@ -25,34 +25,32 @@ http_interactions:
25
25
  Content-Type:
26
26
  - application/json; charset=utf-8
27
27
  Etag:
28
- - '"682afec57f678e4d153a5841b21395dd"'
28
+ - '"c3a375a17d64792857f21d1af227ed7b"'
29
29
  Cache-Control:
30
30
  - max-age=0, private, must-revalidate
31
+ X-Meta-Request-Version:
32
+ - 0.3.4
31
33
  X-Request-Id:
32
- - 0954c447-9cb9-4eeb-8020-d87f13098f07
34
+ - 125a9964-6326-4114-9f59-fb533551d554
33
35
  X-Runtime:
34
- - '0.006736'
35
- Server:
36
- - WEBrick/1.3.1 (Ruby/2.1.2/2014-05-08)
37
- Date:
38
- - Wed, 22 Oct 2014 18:42:32 GMT
39
- Content-Length:
40
- - '1121'
36
+ - '0.011086'
41
37
  Connection:
42
- - Keep-Alive
38
+ - close
39
+ Server:
40
+ - thin 1.6.2 codename Doc Brown
43
41
  body:
44
42
  encoding: US-ASCII
45
- string: '{"collection":[{"druid":"druid:yg867hg1375","latest_change":"2013-11-11T23:34:29Z","title":["Francis
46
- E. Stafford photographs, 1909-1933"]}],"item":[{"druid":"druid:jf275fd6276","latest_change":"2013-11-11T23:34:29Z","title":["Album
43
+ string: '{"collections":[{"druid":"druid:yg867hg1375","latest_change":"2013-11-11T23:34:29Z","title":"Francis
44
+ E. Stafford photographs, 1909-1933"}],"items":[{"druid":"druid:jf275fd6276","latest_change":"2013-11-11T23:34:29Z","title":"Album
47
45
  A: Photographs of China''s natural landscapes, urban scenes, cultural landmarks,
48
- social customs, and people."]},{"druid":"druid:nz353cp1092","latest_change":"2013-11-11T23:34:29Z","title":["Album
49
- E: Photographs of the Seventh Day Adventist Church missionaries in China"]},{"druid":"druid:tc552kq0798","latest_change":"2013-11-11T23:34:29Z","title":["Album
46
+ social customs, and people."},{"druid":"druid:nz353cp1092","latest_change":"2013-11-11T23:34:29Z","title":"Album
47
+ E: Photographs of the Seventh Day Adventist Church missionaries in China"},{"druid":"druid:tc552kq0798","latest_change":"2013-11-11T23:34:29Z","title":"Album
50
48
  D: Photographs of China''s natural landscapes, urban scenes, cultural landmarks,
51
- social customs, and people."]},{"druid":"druid:th998nk0722","latest_change":"2013-11-11T23:34:29Z","title":["Album
49
+ social customs, and people."},{"druid":"druid:th998nk0722","latest_change":"2013-11-11T23:34:29Z","title":"Album
52
50
  C: Photographs of the Chinese Revolution of 1911 and the Shanghai Commercial
53
- Press"]},{"druid":"druid:ww689vs6534","latest_change":"2013-11-11T23:34:29Z","title":["Album
51
+ Press"},{"druid":"druid:ww689vs6534","latest_change":"2013-11-11T23:34:29Z","title":"Album
54
52
  B: Photographs of China''s natural landscapes, urban scenes, cultural landmarks,
55
- social customs, and people."]}],"counts":[{"collection":1},{"item":5},{"total_count":6}]}'
53
+ social customs, and people."}],"counts":{"collections":1,"items":5,"total_count":6}}'
56
54
  http_version:
57
- recorded_at: Wed, 22 Oct 2014 18:42:32 GMT
55
+ recorded_at: Wed, 12 Nov 2014 19:34:03 GMT
58
56
  recorded_with: VCR 2.9.3
@@ -0,0 +1,48 @@
1
+ ---
2
+ http_interactions:
3
+ - request:
4
+ method: get
5
+ uri: http://127.0.0.1:3000/
6
+ body:
7
+ encoding: US-ASCII
8
+ string: ''
9
+ headers:
10
+ Accept:
11
+ - '*/*; q=0.5, application/xml'
12
+ Accept-Encoding:
13
+ - gzip, deflate
14
+ User-Agent:
15
+ - Ruby
16
+ response:
17
+ status:
18
+ code: 200
19
+ message: OK
20
+ headers:
21
+ X-Frame-Options:
22
+ - SAMEORIGIN
23
+ X-Xss-Protection:
24
+ - 1; mode=block
25
+ X-Content-Type-Options:
26
+ - nosniff
27
+ Content-Type:
28
+ - application/xml; charset=utf-8
29
+ Etag:
30
+ - '"444bcb3a3fcf8389296c49467f27e1d6"'
31
+ Cache-Control:
32
+ - max-age=0, private, must-revalidate
33
+ X-Meta-Request-Version:
34
+ - 0.3.4
35
+ X-Request-Id:
36
+ - 7d9369f3-a2dc-4a1e-aa79-4d574628be4d
37
+ X-Runtime:
38
+ - '0.004594'
39
+ Connection:
40
+ - close
41
+ Server:
42
+ - thin 1.6.2 codename Doc Brown
43
+ body:
44
+ encoding: US-ASCII
45
+ string: ok
46
+ http_version:
47
+ recorded_at: Wed, 12 Nov 2014 18:46:07 GMT
48
+ recorded_with: VCR 2.9.3
@@ -2,7 +2,7 @@
2
2
  http_interactions:
3
3
  - request:
4
4
  method: get
5
- uri: http://127.0.0.1:3000/collection/yg867hg1375
5
+ uri: http://127.0.0.1:3000/
6
6
  body:
7
7
  encoding: US-ASCII
8
8
  string: ''
@@ -14,7 +14,7 @@ http_interactions:
14
14
  response:
15
15
  status:
16
16
  code: 200
17
- message: 'OK '
17
+ message: OK
18
18
  headers:
19
19
  X-Frame-Options:
20
20
  - SAMEORIGIN
@@ -23,36 +23,77 @@ http_interactions:
23
23
  X-Content-Type-Options:
24
24
  - nosniff
25
25
  Content-Type:
26
- - application/json; charset=utf-8
26
+ - text/html; charset=utf-8
27
27
  Etag:
28
- - '"682afec57f678e4d153a5841b21395dd"'
28
+ - '"444bcb3a3fcf8389296c49467f27e1d6"'
29
29
  Cache-Control:
30
30
  - max-age=0, private, must-revalidate
31
+ X-Meta-Request-Version:
32
+ - 0.3.4
31
33
  X-Request-Id:
32
- - 1e0232c6-fc39-49bf-b874-89567e225d00
34
+ - 994d795e-727d-4c83-aa8d-addfd5d1a01f
33
35
  X-Runtime:
34
- - '0.006851'
36
+ - '0.010719'
37
+ Connection:
38
+ - close
35
39
  Server:
36
- - WEBrick/1.3.1 (Ruby/2.1.2/2014-05-08)
37
- Date:
38
- - Wed, 22 Oct 2014 18:53:15 GMT
39
- Content-Length:
40
- - '1121'
40
+ - thin 1.6.2 codename Doc Brown
41
+ body:
42
+ encoding: US-ASCII
43
+ string: ok
44
+ http_version:
45
+ recorded_at: Wed, 12 Nov 2014 19:34:03 GMT
46
+ - request:
47
+ method: get
48
+ uri: http://127.0.0.1:3000/collections/yg867hg1375
49
+ body:
50
+ encoding: US-ASCII
51
+ string: ''
52
+ headers:
53
+ Accept:
54
+ - '*/*'
55
+ User-Agent:
56
+ - Ruby
57
+ response:
58
+ status:
59
+ code: 200
60
+ message: OK
61
+ headers:
62
+ X-Frame-Options:
63
+ - SAMEORIGIN
64
+ X-Xss-Protection:
65
+ - 1; mode=block
66
+ X-Content-Type-Options:
67
+ - nosniff
68
+ Content-Type:
69
+ - application/json; charset=utf-8
70
+ Etag:
71
+ - '"c3a375a17d64792857f21d1af227ed7b"'
72
+ Cache-Control:
73
+ - max-age=0, private, must-revalidate
74
+ X-Meta-Request-Version:
75
+ - 0.3.4
76
+ X-Request-Id:
77
+ - dc0faecb-c39d-42e4-bb1d-8f7be4b8c287
78
+ X-Runtime:
79
+ - '0.012066'
41
80
  Connection:
42
- - Keep-Alive
81
+ - close
82
+ Server:
83
+ - thin 1.6.2 codename Doc Brown
43
84
  body:
44
85
  encoding: US-ASCII
45
- string: '{"collection":[{"druid":"druid:yg867hg1375","latest_change":"2013-11-11T23:34:29Z","title":["Francis
46
- E. Stafford photographs, 1909-1933"]}],"item":[{"druid":"druid:jf275fd6276","latest_change":"2013-11-11T23:34:29Z","title":["Album
86
+ string: '{"collections":[{"druid":"druid:yg867hg1375","latest_change":"2013-11-11T23:34:29Z","title":"Francis
87
+ E. Stafford photographs, 1909-1933"}],"items":[{"druid":"druid:jf275fd6276","latest_change":"2013-11-11T23:34:29Z","title":"Album
47
88
  A: Photographs of China''s natural landscapes, urban scenes, cultural landmarks,
48
- social customs, and people."]},{"druid":"druid:nz353cp1092","latest_change":"2013-11-11T23:34:29Z","title":["Album
49
- E: Photographs of the Seventh Day Adventist Church missionaries in China"]},{"druid":"druid:tc552kq0798","latest_change":"2013-11-11T23:34:29Z","title":["Album
89
+ social customs, and people."},{"druid":"druid:nz353cp1092","latest_change":"2013-11-11T23:34:29Z","title":"Album
90
+ E: Photographs of the Seventh Day Adventist Church missionaries in China"},{"druid":"druid:tc552kq0798","latest_change":"2013-11-11T23:34:29Z","title":"Album
50
91
  D: Photographs of China''s natural landscapes, urban scenes, cultural landmarks,
51
- social customs, and people."]},{"druid":"druid:th998nk0722","latest_change":"2013-11-11T23:34:29Z","title":["Album
92
+ social customs, and people."},{"druid":"druid:th998nk0722","latest_change":"2013-11-11T23:34:29Z","title":"Album
52
93
  C: Photographs of the Chinese Revolution of 1911 and the Shanghai Commercial
53
- Press"]},{"druid":"druid:ww689vs6534","latest_change":"2013-11-11T23:34:29Z","title":["Album
94
+ Press"},{"druid":"druid:ww689vs6534","latest_change":"2013-11-11T23:34:29Z","title":"Album
54
95
  B: Photographs of China''s natural landscapes, urban scenes, cultural landmarks,
55
- social customs, and people."]}],"counts":[{"collection":1},{"item":5},{"total_count":6}]}'
96
+ social customs, and people."}],"counts":{"collections":1,"items":5,"total_count":6}}'
56
97
  http_version:
57
- recorded_at: Wed, 22 Oct 2014 18:53:15 GMT
98
+ recorded_at: Wed, 12 Nov 2014 19:34:03 GMT
58
99
  recorded_with: VCR 2.9.3