harvestdor-indexer 2.1.1 → 2.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +5 -0
- data/.rubocop_todo.yml +2 -221
- data/README.rdoc +12 -40
- data/Rakefile +12 -12
- data/harvestdor-indexer.gemspec +16 -16
- data/lib/harvestdor/indexer/metrics.rb +11 -13
- data/lib/harvestdor/indexer/resource.rb +25 -19
- data/lib/harvestdor/indexer/solr.rb +1 -1
- data/lib/harvestdor/indexer/version.rb +1 -1
- data/lib/harvestdor/indexer.rb +38 -24
- data/spec/fixtures/vcr_cassettes/get_collection_druids_call.yml +112 -3
- data/spec/fixtures/vcr_cassettes/process_druids_whitelist_call.yml +113 -4
- data/spec/fixtures/vcr_cassettes/single_rsolr_connection_call.yml +112 -3
- data/spec/unit/harvestdor/indexer/metrics_spec.rb +13 -13
- data/spec/unit/harvestdor-indexer-resource_spec.rb +75 -47
- data/spec/unit/harvestdor-indexer-solr_spec.rb +11 -11
- data/spec/unit/harvestdor-indexer_spec.rb +67 -53
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3088417ff80fa408e42f434dc8f55cf10010fa88
|
4
|
+
data.tar.gz: 390fb594e23b05bcaadbd5b498a205c31ada7cda
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: eb7b00d94dbd7a2266ffa10e469694cafe59ed7dacc99a1eaebe28f7a9452381855cfb7a5c940518450c8d8a01d1ac161a5e4eeb796d5828e2ddfb82f9e4619b
|
7
|
+
data.tar.gz: 294304cf33a1af2540f07edcb3a138c2f50040d48264f8223e4b9936b17d7305d063d458fd4f7b09ab84d290b8ac69f5b119da873bc7b0f0134b125d221d6d78
|
data/.rubocop.yml
CHANGED
data/.rubocop_todo.yml
CHANGED
@@ -1,29 +1,11 @@
|
|
1
1
|
# This configuration was generated by
|
2
2
|
# `rubocop --auto-gen-config`
|
3
|
-
# on 2015-10-
|
3
|
+
# on 2015-10-26 16:12:34 -0700 using RuboCop version 0.34.2.
|
4
4
|
# The point is for the user to remove these configuration records
|
5
5
|
# one by one as the offenses are removed from the code base.
|
6
6
|
# Note that changes in the inspected code, or installation of new
|
7
7
|
# versions of RuboCop, may require this file to be generated again.
|
8
8
|
|
9
|
-
# Offense count: 1
|
10
|
-
# Cop supports --auto-correct.
|
11
|
-
Lint/DeprecatedClassMethods:
|
12
|
-
Exclude:
|
13
|
-
- 'spec/unit/harvestdor-indexer_spec.rb'
|
14
|
-
|
15
|
-
# Offense count: 1
|
16
|
-
# Cop supports --auto-correct.
|
17
|
-
Lint/UnusedBlockArgument:
|
18
|
-
Exclude:
|
19
|
-
- 'lib/harvestdor/indexer/solr.rb'
|
20
|
-
|
21
|
-
# Offense count: 1
|
22
|
-
# Cop supports --auto-correct.
|
23
|
-
Lint/UnusedMethodArgument:
|
24
|
-
Exclude:
|
25
|
-
- 'lib/harvestdor/indexer.rb'
|
26
|
-
|
27
9
|
# Offense count: 4
|
28
10
|
Lint/UselessAssignment:
|
29
11
|
Exclude:
|
@@ -39,25 +21,11 @@ Metrics/AbcSize:
|
|
39
21
|
Metrics/ClassLength:
|
40
22
|
Max: 105
|
41
23
|
|
42
|
-
# Offense count:
|
24
|
+
# Offense count: 96
|
43
25
|
# Configuration parameters: AllowURI, URISchemes.
|
44
26
|
Metrics/LineLength:
|
45
27
|
Max: 207
|
46
28
|
|
47
|
-
# Offense count: 11
|
48
|
-
RSpec/DescribedClass:
|
49
|
-
Exclude:
|
50
|
-
- 'spec/unit/harvestdor-indexer-resource_spec.rb'
|
51
|
-
- 'spec/unit/harvestdor-indexer-solr_spec.rb'
|
52
|
-
- 'spec/unit/harvestdor-indexer_spec.rb'
|
53
|
-
|
54
|
-
# Offense count: 15
|
55
|
-
# Configuration parameters: CustomTransform, IgnoredWords.
|
56
|
-
RSpec/ExampleWording:
|
57
|
-
Exclude:
|
58
|
-
- 'spec/unit/harvestdor-indexer-resource_spec.rb'
|
59
|
-
- 'spec/unit/harvestdor/indexer/metrics_spec.rb'
|
60
|
-
|
61
29
|
# Offense count: 3
|
62
30
|
# Configuration parameters: CustomTransform.
|
63
31
|
RSpec/FilePath:
|
@@ -72,14 +40,6 @@ RSpec/InstanceVariable:
|
|
72
40
|
- 'spec/unit/harvestdor-indexer-resource_spec.rb'
|
73
41
|
- 'spec/unit/harvestdor-indexer_spec.rb'
|
74
42
|
|
75
|
-
# Offense count: 3
|
76
|
-
# Cop supports --auto-correct.
|
77
|
-
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
78
|
-
Style/AndOr:
|
79
|
-
Exclude:
|
80
|
-
- 'lib/harvestdor/indexer.rb'
|
81
|
-
- 'lib/harvestdor/indexer/resource.rb'
|
82
|
-
|
83
43
|
# Offense count: 3
|
84
44
|
# Cop supports --auto-correct.
|
85
45
|
# Configuration parameters: EnforcedStyle, SupportedStyles, ProceduralMethods, FunctionalMethods, IgnoredMethods.
|
@@ -101,18 +61,6 @@ Style/ClassAndModuleChildren:
|
|
101
61
|
- 'lib/harvestdor/indexer/resource.rb'
|
102
62
|
- 'lib/harvestdor/indexer/solr.rb'
|
103
63
|
|
104
|
-
# Offense count: 1
|
105
|
-
# Cop supports --auto-correct.
|
106
|
-
Style/ClosingParenthesisIndentation:
|
107
|
-
Exclude:
|
108
|
-
- 'spec/unit/harvestdor-indexer-resource_spec.rb'
|
109
|
-
|
110
|
-
# Offense count: 1
|
111
|
-
# Cop supports --auto-correct.
|
112
|
-
Style/CommentIndentation:
|
113
|
-
Exclude:
|
114
|
-
- 'harvestdor-indexer.gemspec'
|
115
|
-
|
116
64
|
# Offense count: 3
|
117
65
|
# Configuration parameters: Exclude.
|
118
66
|
Style/Documentation:
|
@@ -126,18 +74,6 @@ Style/DoubleNegation:
|
|
126
74
|
Exclude:
|
127
75
|
- 'lib/harvestdor/indexer/resource.rb'
|
128
76
|
|
129
|
-
# Offense count: 1
|
130
|
-
# Cop supports --auto-correct.
|
131
|
-
Style/EmptyLines:
|
132
|
-
Exclude:
|
133
|
-
- 'lib/harvestdor/indexer/resource.rb'
|
134
|
-
|
135
|
-
# Offense count: 1
|
136
|
-
# Cop supports --auto-correct.
|
137
|
-
Style/EmptyLinesAroundMethodBody:
|
138
|
-
Exclude:
|
139
|
-
- 'lib/harvestdor/indexer.rb'
|
140
|
-
|
141
77
|
# Offense count: 4
|
142
78
|
# Configuration parameters: Exclude.
|
143
79
|
Style/FileName:
|
@@ -153,162 +89,7 @@ Style/GuardClause:
|
|
153
89
|
Exclude:
|
154
90
|
- 'lib/harvestdor/indexer.rb'
|
155
91
|
|
156
|
-
# Offense count: 15
|
157
|
-
# Cop supports --auto-correct.
|
158
|
-
# Configuration parameters: EnforcedStyle, SupportedStyles, UseHashRocketsWithSymbolValues.
|
159
|
-
Style/HashSyntax:
|
160
|
-
Enabled: false
|
161
|
-
|
162
|
-
# Offense count: 1
|
163
|
-
# Cop supports --auto-correct.
|
164
|
-
# Configuration parameters: MaxLineLength.
|
165
|
-
Style/IfUnlessModifier:
|
166
|
-
Exclude:
|
167
|
-
- 'lib/harvestdor/indexer.rb'
|
168
|
-
|
169
|
-
# Offense count: 2
|
170
|
-
# Cop supports --auto-correct.
|
171
|
-
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
172
|
-
Style/IndentationConsistency:
|
173
|
-
Exclude:
|
174
|
-
- 'harvestdor-indexer.gemspec'
|
175
|
-
|
176
|
-
# Offense count: 1
|
177
|
-
# Cop supports --auto-correct.
|
178
|
-
# Configuration parameters: Width.
|
179
|
-
Style/IndentationWidth:
|
180
|
-
Exclude:
|
181
|
-
- 'spec/unit/harvestdor-indexer_spec.rb'
|
182
|
-
|
183
|
-
# Offense count: 1
|
184
|
-
# Cop supports --auto-correct.
|
185
|
-
Style/LeadingCommentSpace:
|
186
|
-
Exclude:
|
187
|
-
- 'spec/unit/harvestdor/indexer/metrics_spec.rb'
|
188
|
-
|
189
|
-
# Offense count: 3
|
190
|
-
# Cop supports --auto-correct.
|
191
|
-
Style/MethodCallParentheses:
|
192
|
-
Exclude:
|
193
|
-
- 'spec/unit/harvestdor-indexer_spec.rb'
|
194
|
-
|
195
|
-
# Offense count: 8
|
196
|
-
# Cop supports --auto-correct.
|
197
|
-
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
198
|
-
Style/MethodDefParentheses:
|
199
|
-
Enabled: false
|
200
|
-
|
201
92
|
# Offense count: 1
|
202
93
|
Style/MultilineBlockChain:
|
203
94
|
Exclude:
|
204
95
|
- 'lib/harvestdor/indexer.rb'
|
205
|
-
|
206
|
-
# Offense count: 4
|
207
|
-
# Cop supports --auto-correct.
|
208
|
-
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
209
|
-
Style/MultilineOperationIndentation:
|
210
|
-
Enabled: false
|
211
|
-
|
212
|
-
# Offense count: 1
|
213
|
-
# Cop supports --auto-correct.
|
214
|
-
Style/NegatedIf:
|
215
|
-
Exclude:
|
216
|
-
- 'lib/harvestdor/indexer/resource.rb'
|
217
|
-
|
218
|
-
# Offense count: 1
|
219
|
-
# Cop supports --auto-correct.
|
220
|
-
# Configuration parameters: PreferredDelimiters.
|
221
|
-
Style/PercentLiteralDelimiters:
|
222
|
-
Exclude:
|
223
|
-
- 'harvestdor-indexer.gemspec'
|
224
|
-
|
225
|
-
# Offense count: 1
|
226
|
-
# Cop supports --auto-correct.
|
227
|
-
Style/Proc:
|
228
|
-
Exclude:
|
229
|
-
- 'lib/harvestdor/indexer/solr.rb'
|
230
|
-
|
231
|
-
# Offense count: 1
|
232
|
-
# Cop supports --auto-correct.
|
233
|
-
Style/RedundantBegin:
|
234
|
-
Exclude:
|
235
|
-
- 'lib/harvestdor/indexer/metrics.rb'
|
236
|
-
|
237
|
-
# Offense count: 10
|
238
|
-
# Cop supports --auto-correct.
|
239
|
-
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
240
|
-
Style/SignalException:
|
241
|
-
Exclude:
|
242
|
-
- 'lib/harvestdor/indexer.rb'
|
243
|
-
- 'lib/harvestdor/indexer/resource.rb'
|
244
|
-
- 'spec/unit/harvestdor/indexer/metrics_spec.rb'
|
245
|
-
|
246
|
-
# Offense count: 2
|
247
|
-
# Cop supports --auto-correct.
|
248
|
-
# Configuration parameters: MultiSpaceAllowedForOperators.
|
249
|
-
Style/SpaceAroundOperators:
|
250
|
-
Exclude:
|
251
|
-
- 'lib/harvestdor/indexer/metrics.rb'
|
252
|
-
|
253
|
-
# Offense count: 1
|
254
|
-
# Cop supports --auto-correct.
|
255
|
-
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
256
|
-
Style/SpaceBeforeBlockBraces:
|
257
|
-
Enabled: false
|
258
|
-
|
259
|
-
# Offense count: 2
|
260
|
-
# Cop supports --auto-correct.
|
261
|
-
# Configuration parameters: EnforcedStyle, SupportedStyles, EnforcedStyleForEmptyBraces, SpaceBeforeBlockParameters.
|
262
|
-
Style/SpaceInsideBlockBraces:
|
263
|
-
Enabled: false
|
264
|
-
|
265
|
-
# Offense count: 2
|
266
|
-
# Cop supports --auto-correct.
|
267
|
-
Style/SpaceInsideBrackets:
|
268
|
-
Exclude:
|
269
|
-
- 'Rakefile'
|
270
|
-
|
271
|
-
# Offense count: 8
|
272
|
-
# Cop supports --auto-correct.
|
273
|
-
# Configuration parameters: EnforcedStyle, EnforcedStyleForEmptyBraces, SupportedStyles.
|
274
|
-
Style/SpaceInsideHashLiteralBraces:
|
275
|
-
Enabled: false
|
276
|
-
|
277
|
-
# Offense count: 1
|
278
|
-
# Cop supports --auto-correct.
|
279
|
-
Style/SpecialGlobalVars:
|
280
|
-
Exclude:
|
281
|
-
- 'harvestdor-indexer.gemspec'
|
282
|
-
|
283
|
-
# Offense count: 147
|
284
|
-
# Cop supports --auto-correct.
|
285
|
-
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
286
|
-
Style/StringLiterals:
|
287
|
-
Enabled: false
|
288
|
-
|
289
|
-
# Offense count: 2
|
290
|
-
# Cop supports --auto-correct.
|
291
|
-
# Configuration parameters: IgnoredMethods.
|
292
|
-
Style/SymbolProc:
|
293
|
-
Exclude:
|
294
|
-
- 'lib/harvestdor/indexer.rb'
|
295
|
-
|
296
|
-
# Offense count: 2
|
297
|
-
# Cop supports --auto-correct.
|
298
|
-
Style/Tab:
|
299
|
-
Exclude:
|
300
|
-
- 'harvestdor-indexer.gemspec'
|
301
|
-
|
302
|
-
# Offense count: 11
|
303
|
-
# Cop supports --auto-correct.
|
304
|
-
Style/TrailingWhitespace:
|
305
|
-
Exclude:
|
306
|
-
- 'lib/harvestdor/indexer/metrics.rb'
|
307
|
-
- 'spec/unit/harvestdor-indexer-resource_spec.rb'
|
308
|
-
- 'spec/unit/harvestdor/indexer/metrics_spec.rb'
|
309
|
-
|
310
|
-
# Offense count: 2
|
311
|
-
# Cop supports --auto-correct.
|
312
|
-
Style/UnneededPercentQ:
|
313
|
-
Exclude:
|
314
|
-
- 'harvestdor-indexer.gemspec'
|
data/README.rdoc
CHANGED
@@ -31,26 +31,15 @@ Create a yml config file for your collection going to a Solr index.
|
|
31
31
|
See spec/config/ap.yml for an example.
|
32
32
|
You will want to copy that file and change the following settings:
|
33
33
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
Note: Because of an update to underlying HTTP libraries, versions of this gem > 0.0.12 require an updated syntax. Errors like "unknown method timeout" might be because you're using an older version of a config file. The new configuration looks like this:
|
40
|
-
|
41
|
-
http_options:
|
42
|
-
ssl:
|
43
|
-
verify: false
|
44
|
-
# timeouts are in seconds; timeout -> open/read, open_timeout -> connection open
|
45
|
-
request:
|
46
|
-
timeout: 180
|
47
|
-
open_timeout: 180
|
34
|
+
* whitelist
|
35
|
+
* dor fetcher service_url
|
36
|
+
* solr url
|
37
|
+
* harvestdor log_dir, log_nam
|
48
38
|
|
49
39
|
|
50
40
|
==== Whitelist
|
51
41
|
|
52
|
-
|
53
|
-
can be
|
42
|
+
The whitelist is how you specify which objects to index. The whitelist can be
|
54
43
|
|
55
44
|
* an Array of druids inline in the config yml file
|
56
45
|
* a filename containing a list of druids (one per line)
|
@@ -107,32 +96,15 @@ You may want to write a script to run the code. Your script might look like thi
|
|
107
96
|
|
108
97
|
Then you run the script like so:
|
109
98
|
|
110
|
-
./bin/indexer config/(your coll).yml
|
99
|
+
$ ./bin/indexer config/(your coll).yml
|
111
100
|
|
112
101
|
Run from deployed instance, as that box is already set up to be able to talk to DOR Fetcher service and to SUL Solr indexes.
|
113
102
|
|
114
103
|
== Contributing
|
115
104
|
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
== Releases
|
124
|
-
|
125
|
-
* <b>2.0.0</b> Complete refactor to update APIs, merge configuration yml files, update to rspec 3
|
126
|
-
* <b>1.0.4</b> Set skip_heartbeat to true in the initialization of the DorFetcher::Client for ease of testing
|
127
|
-
* <b>1.0.3</b> Implemented class level config so anything that inherits from Harvestdor::Indexer can share configuration settings
|
128
|
-
* <b>1.0.0</b> Replaced OAI harvesting mechanism with dor-fetcher
|
129
|
-
* <b>0.0.13</b> Upgrade to latest faraday HTTP client syntax; Use retries gem (https://github.com/ooyala/retries) to make retrying of index process more robust
|
130
|
-
* <b>0.0.12</b> fix total_object nil error
|
131
|
-
* <b>0.0.11</b> fix error_count and success_count, allow setting of max-tries (retry solr add if error)
|
132
|
-
* <b>0.0.7</b> adding additional logging of error, success counts, and time to index and harvest
|
133
|
-
* <b>0.0.6</b> tweak error handling for public xml pieces
|
134
|
-
* <b>0.0.5</b> make rake release a no-op
|
135
|
-
* <b>0.0.4</b> add confstruct runtime dependency
|
136
|
-
* <b>0.0.3</b> add methods for public_xml, content_metadata, identity_metadata ...
|
137
|
-
* <b>0.0.2</b> better model code for index method (thanks, Bess!)
|
138
|
-
* <b>0.0.1</b> initial commit
|
105
|
+
* Fork it (https://help.github.com/articles/fork-a-repo/)
|
106
|
+
* Create your feature branch (`git checkout -b my-new-feature`)
|
107
|
+
* Write code and tests.
|
108
|
+
* Commit your changes (`git commit -am 'Added some feature'`)
|
109
|
+
* Push to the branch (`git push origin my-new-feature`)
|
110
|
+
* Create new Pull Request (https://help.github.com/articles/creating-a-pull-request/)
|
data/Rakefile
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
require
|
1
|
+
require 'bundler/gem_tasks'
|
2
2
|
|
3
3
|
require 'rake'
|
4
4
|
require 'bundler'
|
@@ -7,26 +7,26 @@ begin
|
|
7
7
|
Bundler.setup(:default, :development)
|
8
8
|
rescue Bundler::BundlerError => e
|
9
9
|
$stderr.puts e.message
|
10
|
-
$stderr.puts
|
10
|
+
$stderr.puts 'Run `bundle install` to install missing gems'
|
11
11
|
exit e.status_code
|
12
12
|
end
|
13
13
|
|
14
|
-
task :
|
14
|
+
task default: [:rspec, :rubocop]
|
15
15
|
|
16
|
-
desc
|
17
|
-
task :
|
16
|
+
desc 'run continuous integration suite (tests, coverage, docs)'
|
17
|
+
task ci: [:rspec, :doc, :rubocop]
|
18
18
|
|
19
19
|
require 'rspec/core/rake_task'
|
20
20
|
|
21
|
-
task :
|
21
|
+
task spec: :rspec
|
22
22
|
|
23
|
-
desc
|
23
|
+
desc 'run specs EXCEPT integration specs'
|
24
24
|
RSpec::Core::RakeTask.new(:spec_fast) do |spec|
|
25
|
-
spec.rspec_opts = [
|
25
|
+
spec.rspec_opts = ['-c', '-f progress', '--tty', '-t ~integration', '-r ./spec/spec_helper.rb']
|
26
26
|
end
|
27
27
|
|
28
28
|
RSpec::Core::RakeTask.new(:rspec) do |spec|
|
29
|
-
spec.rspec_opts = [
|
29
|
+
spec.rspec_opts = ['-c', '-f progress', '--tty', '-r ./spec/spec_helper.rb']
|
30
30
|
end
|
31
31
|
|
32
32
|
require 'rubocop/rake_task'
|
@@ -41,12 +41,12 @@ begin
|
|
41
41
|
|
42
42
|
YARD::Rake::YardocTask.new(:doc) do |yt|
|
43
43
|
yt.files = Dir.glob(File.join(project_root, 'lib', '**', '*.rb')) +
|
44
|
-
|
44
|
+
[File.join(project_root, 'README.rdoc')]
|
45
45
|
yt.options = ['--output-dir', doc_dest_dir, '--readme', 'README.rdoc', '--title', 'Harvestdor Gem Documentation']
|
46
46
|
end
|
47
47
|
rescue LoadError
|
48
|
-
desc
|
48
|
+
desc 'Generate YARD Documentation'
|
49
49
|
task :doc do
|
50
|
-
abort
|
50
|
+
abort 'Please install the YARD gem to generate rdoc.'
|
51
51
|
end
|
52
52
|
end
|
data/harvestdor-indexer.gemspec
CHANGED
@@ -4,38 +4,38 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
4
|
require 'harvestdor/indexer/version'
|
5
5
|
|
6
6
|
Gem::Specification.new do |gem|
|
7
|
-
gem.name =
|
7
|
+
gem.name = 'harvestdor-indexer'
|
8
8
|
gem.version = Harvestdor::Indexer::VERSION
|
9
|
-
gem.authors = [
|
10
|
-
gem.email = [
|
11
|
-
gem.description =
|
12
|
-
gem.summary =
|
13
|
-
gem.homepage =
|
9
|
+
gem.authors = ['Naomi Dushay', 'Bess Sadler', 'Laney McGlohon']
|
10
|
+
gem.email = ['ndushay@stanford.edu', 'bess@stanford.edu', 'laneymcg@stanford.edu']
|
11
|
+
gem.description = 'Harvest DOR object metadata by the item or collection, plus code framework to write Solr docs to index'
|
12
|
+
gem.summary = 'Harvest DOR object metadata and index it to Solr'
|
13
|
+
gem.homepage = 'https://github.com/sul-dlss/harvestdor-indexer'
|
14
14
|
|
15
|
-
gem.files = `git ls-files`.split(
|
16
|
-
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
15
|
+
gem.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
|
16
|
+
gem.executables = gem.files.grep(%r{^bin/}).map { |f| File.basename(f) }
|
17
17
|
gem.test_files = gem.files.grep(%r{^spec/})
|
18
|
-
gem.require_paths = [
|
18
|
+
gem.require_paths = ['lib']
|
19
19
|
|
20
20
|
gem.add_dependency 'rsolr'
|
21
21
|
gem.add_dependency 'retries'
|
22
22
|
gem.add_dependency 'harvestdor', '>=0.0.14'
|
23
23
|
gem.add_dependency 'stanford-mods'
|
24
24
|
gem.add_dependency 'dor-fetcher', '=1.0.5'
|
25
|
-
gem.add_dependency
|
26
|
-
gem.add_dependency
|
25
|
+
gem.add_dependency 'activesupport'
|
26
|
+
gem.add_dependency 'parallel'
|
27
27
|
|
28
28
|
# Runtime dependencies
|
29
29
|
gem.add_runtime_dependency 'confstruct'
|
30
30
|
|
31
31
|
# Development dependencies
|
32
|
-
gem.add_development_dependency
|
32
|
+
gem.add_development_dependency 'rake'
|
33
33
|
# docs
|
34
|
-
gem.add_development_dependency
|
35
|
-
gem.add_development_dependency
|
34
|
+
gem.add_development_dependency 'rdoc'
|
35
|
+
gem.add_development_dependency 'yard'
|
36
36
|
# tests
|
37
|
-
|
38
|
-
|
37
|
+
gem.add_development_dependency 'rspec', '~> 3.0'
|
38
|
+
gem.add_development_dependency 'coveralls'
|
39
39
|
gem.add_development_dependency 'rubocop'
|
40
40
|
gem.add_development_dependency 'rubocop-rspec'
|
41
41
|
gem.add_development_dependency 'vcr'
|
@@ -4,15 +4,15 @@ module Harvestdor
|
|
4
4
|
class Indexer::Metrics
|
5
5
|
attr_accessor :error_count, :success_count, :logger
|
6
6
|
|
7
|
-
def initialize
|
8
|
-
@success_count=0 # the number of objects successfully indexed
|
9
|
-
@error_count=0 # the number of objects that failed
|
7
|
+
def initialize(options = {})
|
8
|
+
@success_count = 0 # the number of objects successfully indexed
|
9
|
+
@error_count = 0 # the number of objects that failed
|
10
10
|
@logger = options[:logger] || Logger.new(STDERR)
|
11
11
|
end
|
12
12
|
|
13
13
|
##
|
14
14
|
# Wrap an operation in tally block; if the block completes without throwing
|
15
|
-
# an exception, tally a success. If the block throws an exception, catch it
|
15
|
+
# an exception, tally a success. If the block throws an exception, catch it
|
16
16
|
# and tally a failure.
|
17
17
|
#
|
18
18
|
# Callers can provide an :on_error handler to receive the exception and process
|
@@ -20,15 +20,13 @@ module Harvestdor
|
|
20
20
|
#
|
21
21
|
# @param [Hash] options
|
22
22
|
# @option options [#call] Callback that will receive any exception thrown by the block
|
23
|
-
def tally
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
options[:on_error].call e if options[:on_error]
|
31
|
-
end
|
23
|
+
def tally(options = {}, &block)
|
24
|
+
block.call
|
25
|
+
success!
|
26
|
+
rescue => e
|
27
|
+
error!
|
28
|
+
logger.error "Failed to process: #{e.message}"
|
29
|
+
options[:on_error].call e if options[:on_error]
|
32
30
|
end
|
33
31
|
|
34
32
|
##
|
@@ -1,3 +1,4 @@
|
|
1
|
+
# rubocop:disable Metrics/ClassLength
|
1
2
|
require 'active_support/benchmarkable'
|
2
3
|
|
3
4
|
module Harvestdor
|
@@ -7,16 +8,16 @@ module Harvestdor
|
|
7
8
|
attr_reader :indexer, :druid, :options
|
8
9
|
|
9
10
|
# @param [Harvestdor::Indexer] indexer an instance of Harvestdor::Indexer
|
10
|
-
# @param [String]
|
11
|
-
def initialize
|
11
|
+
# @param [String] druid a druid of the form 'druid:oo123oo1234'
|
12
|
+
def initialize(indexer, druid, options = {})
|
12
13
|
@indexer = indexer
|
13
|
-
@druid =
|
14
|
+
@druid = druid
|
14
15
|
@options = options
|
15
16
|
end
|
16
17
|
|
17
18
|
# @return [String] string of form oo123oo1234
|
18
19
|
def bare_druid
|
19
|
-
@bare_druid ||= druid.gsub(
|
20
|
+
@bare_druid ||= druid.gsub('druid:', '')
|
20
21
|
end
|
21
22
|
|
22
23
|
##
|
@@ -38,18 +39,18 @@ module Harvestdor
|
|
38
39
|
##
|
39
40
|
# Is this resource a collection?
|
40
41
|
def collection?
|
41
|
-
identity_metadata.xpath(
|
42
|
+
identity_metadata.xpath('/identityMetadata/objectType').any? { |x| x.text == 'collection' }
|
42
43
|
end
|
43
44
|
|
44
45
|
# get the druids from isMemberOfCollection relationships in rels-ext from public_xml
|
45
46
|
# @return [Array<String>] the druids (e.g. ww123yy1234) this object has isMemberOfColletion relationship with, or nil if none
|
46
47
|
def collections
|
47
48
|
@collections ||= begin
|
48
|
-
ns_hash = {'rdf' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', 'fedora' =>
|
49
|
+
ns_hash = { 'rdf' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', 'fedora' => 'info:fedora/fedora-system:def/relations-external#', '' => '' }
|
49
50
|
is_member_of_nodes ||= public_xml.xpath('/publicObject/rdf:RDF/rdf:Description/fedora:isMemberOfCollection/@rdf:resource', ns_hash)
|
50
51
|
|
51
52
|
is_member_of_nodes.reject { |n| n.value.empty? }.map do |n|
|
52
|
-
Harvestdor::Indexer::Resource.new(indexer, n.value.gsub(
|
53
|
+
Harvestdor::Indexer::Resource.new(indexer, n.value.gsub('info:fedora/', ''))
|
53
54
|
end
|
54
55
|
end
|
55
56
|
end
|
@@ -57,12 +58,18 @@ module Harvestdor
|
|
57
58
|
##
|
58
59
|
# Return the items in this collection
|
59
60
|
def items
|
60
|
-
|
61
|
-
|
62
|
-
|
61
|
+
return [] unless collection?
|
62
|
+
return to_enum(:items) unless block_given?
|
63
|
+
|
64
|
+
items_druids.each do |x|
|
65
|
+
yield Harvestdor::Indexer::Resource.new(indexer, x)
|
63
66
|
end
|
64
67
|
end
|
65
68
|
|
69
|
+
def items_druids
|
70
|
+
@items_druids ||= dor_fetcher_client.druid_array(dor_fetcher_client.get_collection(bare_druid, {}))
|
71
|
+
end
|
72
|
+
|
66
73
|
# given a druid, get its objectLabel from its purl page identityMetadata
|
67
74
|
# @return [String] the value of the <objectLabel> element in the identityMetadata for the object
|
68
75
|
def identity_md_obj_label
|
@@ -70,13 +77,12 @@ module Harvestdor
|
|
70
77
|
identity_metadata.xpath('identityMetadata/objectLabel').text
|
71
78
|
end
|
72
79
|
|
73
|
-
|
74
80
|
# return the MODS for the druid as a Stanford::Mods::Record object
|
75
81
|
# @return [Stanford::Mods::Record] created from the MODS xml for the druid
|
76
82
|
def smods_rec
|
77
83
|
@smods_rec ||= benchmark "smods_rec(#{druid})", level: :debug do
|
78
84
|
ng_doc = mods
|
79
|
-
|
85
|
+
fail "Empty MODS metadata for #{druid}: #{ng_doc.to_xml}" if ng_doc.root.xpath('//text()').empty?
|
80
86
|
mods_rec = Stanford::Mods::Record.new
|
81
87
|
mods_rec.from_nk_node(ng_doc.root)
|
82
88
|
mods_rec
|
@@ -92,8 +98,8 @@ module Harvestdor
|
|
92
98
|
def public_xml
|
93
99
|
@public_xml ||= benchmark "public_xml(#{druid})", level: :debug do
|
94
100
|
ng_doc = harvestdor_client.public_xml bare_druid
|
95
|
-
|
96
|
-
|
101
|
+
fail "No public xml for #{druid}" unless ng_doc
|
102
|
+
fail "Empty public xml for #{druid}: #{ng_doc.to_xml}" if ng_doc.root.xpath('//text()').empty?
|
97
103
|
ng_doc
|
98
104
|
end
|
99
105
|
end
|
@@ -121,7 +127,7 @@ module Harvestdor
|
|
121
127
|
ng_doc = benchmark "content_metadata (#{druid})", level: :debug do
|
122
128
|
harvestdor_client.content_metadata public_xml_or_druid
|
123
129
|
end
|
124
|
-
|
130
|
+
fail "No contentMetadata for \"#{druid}\"" if !ng_doc || ng_doc.children.empty?
|
125
131
|
ng_doc
|
126
132
|
end
|
127
133
|
|
@@ -131,7 +137,7 @@ module Harvestdor
|
|
131
137
|
ng_doc = benchmark "identity_metadata (#{druid})", level: :debug do
|
132
138
|
harvestdor_client.identity_metadata public_xml_or_druid
|
133
139
|
end
|
134
|
-
|
140
|
+
fail "No identityMetadata for \"#{druid}\"" if !ng_doc || ng_doc.children.empty?
|
135
141
|
ng_doc
|
136
142
|
end
|
137
143
|
|
@@ -141,7 +147,7 @@ module Harvestdor
|
|
141
147
|
ng_doc = benchmark "rights_metadata (#{druid})", level: :debug do
|
142
148
|
harvestdor_client.rights_metadata public_xml_or_druid
|
143
149
|
end
|
144
|
-
|
150
|
+
fail "No rightsMetadata for \"#{druid}\"" if !ng_doc || ng_doc.children.empty?
|
145
151
|
ng_doc
|
146
152
|
end
|
147
153
|
|
@@ -151,12 +157,12 @@ module Harvestdor
|
|
151
157
|
ng_doc = benchmark "rdf (#{druid})", level: :debug do
|
152
158
|
harvestdor_client.rdf public_xml_or_druid
|
153
159
|
end
|
154
|
-
|
160
|
+
fail "No RDF for \"#{druid}\"" if !ng_doc || ng_doc.children.empty?
|
155
161
|
ng_doc
|
156
162
|
end
|
157
163
|
|
158
164
|
def eql?(other)
|
159
|
-
other.is_a?
|
165
|
+
other.is_a?(Harvestdor::Indexer::Resource) && other.indexer == indexer && other.druid == druid
|
160
166
|
end
|
161
167
|
|
162
168
|
def hash
|
@@ -23,7 +23,7 @@ module Harvestdor
|
|
23
23
|
def add(doc)
|
24
24
|
id = doc[:id]
|
25
25
|
|
26
|
-
handler =
|
26
|
+
handler = proc do |exception, attempt_number, _total_delay|
|
27
27
|
logger.debug "#{exception.class} on attempt #{attempt_number} for #{id}"
|
28
28
|
# logger.debug exception.backtrace
|
29
29
|
end
|