harvestdor-indexer 2.0.0 → 2.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.hound.yml +2 -0
- data/.rubocop.yml +12 -0
- data/.rubocop_todo.yml +314 -0
- data/.travis.yml +3 -2
- data/README.rdoc +40 -36
- data/Rakefile +11 -9
- data/harvestdor-indexer.gemspec +5 -6
- data/lib/harvestdor/indexer/resource.rb +15 -23
- data/lib/harvestdor/indexer/solr.rb +6 -7
- data/lib/harvestdor/indexer/version.rb +1 -1
- data/lib/harvestdor/indexer.rb +11 -12
- data/spec/config/ap.yml +30 -21
- data/spec/config/ap_whitelist.txt +1 -1
- data/spec/unit/harvestdor-indexer_spec.rb +29 -29
- metadata +124 -117
- data/spec/fixtures/vcr_cassettes/cant_find_whitelist_call.yml +0 -48
- data/spec/fixtures/vcr_cassettes/empty_array_no_blacklist_config_call.yml +0 -48
- data/spec/fixtures/vcr_cassettes/empty_array_no_whitelist_config_call.yml +0 -48
- data/spec/fixtures/vcr_cassettes/ignore_druids_in_blacklist_and_whitelist_call.yml +0 -48
- data/spec/fixtures/vcr_cassettes/ignore_druids_in_blacklist_call.yml +0 -99
- data/spec/fixtures/vcr_cassettes/know_what_is_in_blacklist_call.yml +0 -46
- data/spec/fixtures/vcr_cassettes/know_what_is_in_whitelist_call.yml +0 -46
- data/spec/fixtures/vcr_cassettes/load_blacklist_once_call.yml +0 -48
- data/spec/fixtures/vcr_cassettes/load_whitelist_once_call.yml +0 -48
- data/spec/fixtures/vcr_cassettes/no_blacklist_config_call.yml +0 -99
- data/spec/fixtures/vcr_cassettes/no_blacklist_found_call.yml +0 -48
- data/spec/fixtures/vcr_cassettes/no_whitelist_config_call.yml +0 -99
- data/spec/fixtures/vcr_cassettes/rsolr_client_config_call.yml +0 -48
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e3fa52b261d36405abf85f703fa18455149a37af
|
4
|
+
data.tar.gz: ec64353f2066115d63aaaff6088cc5c83c082aab
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e186e0140e2d25192c6a897b4f65c5b7586223004778527132a4dfa782532cc7219a86984776b1f63e89fb8a49be84d3ef525574366b5fb04efcf0a73222dd49
|
7
|
+
data.tar.gz: 6c5710fcbf00a97f2da3282b9e612bbe8eed425e8aaa41a5ba3da8c5285c6721a4ad21c44be3fbf7977874bcb47792b6b078f2d4e3450514035440eea0f561bd
|
data/.hound.yml
ADDED
data/.rubocop.yml
ADDED
data/.rubocop_todo.yml
ADDED
@@ -0,0 +1,314 @@
|
|
1
|
+
# This configuration was generated by
|
2
|
+
# `rubocop --auto-gen-config`
|
3
|
+
# on 2015-10-21 18:25:46 -0700 using RuboCop version 0.34.2.
|
4
|
+
# The point is for the user to remove these configuration records
|
5
|
+
# one by one as the offenses are removed from the code base.
|
6
|
+
# Note that changes in the inspected code, or installation of new
|
7
|
+
# versions of RuboCop, may require this file to be generated again.
|
8
|
+
|
9
|
+
# Offense count: 1
|
10
|
+
# Cop supports --auto-correct.
|
11
|
+
Lint/DeprecatedClassMethods:
|
12
|
+
Exclude:
|
13
|
+
- 'spec/unit/harvestdor-indexer_spec.rb'
|
14
|
+
|
15
|
+
# Offense count: 1
|
16
|
+
# Cop supports --auto-correct.
|
17
|
+
Lint/UnusedBlockArgument:
|
18
|
+
Exclude:
|
19
|
+
- 'lib/harvestdor/indexer/solr.rb'
|
20
|
+
|
21
|
+
# Offense count: 1
|
22
|
+
# Cop supports --auto-correct.
|
23
|
+
Lint/UnusedMethodArgument:
|
24
|
+
Exclude:
|
25
|
+
- 'lib/harvestdor/indexer.rb'
|
26
|
+
|
27
|
+
# Offense count: 4
|
28
|
+
Lint/UselessAssignment:
|
29
|
+
Exclude:
|
30
|
+
- 'lib/harvestdor/indexer.rb'
|
31
|
+
- 'spec/unit/harvestdor-indexer_spec.rb'
|
32
|
+
|
33
|
+
# Offense count: 2
|
34
|
+
Metrics/AbcSize:
|
35
|
+
Max: 20
|
36
|
+
|
37
|
+
# Offense count: 1
|
38
|
+
# Configuration parameters: CountComments.
|
39
|
+
Metrics/ClassLength:
|
40
|
+
Max: 105
|
41
|
+
|
42
|
+
# Offense count: 101
|
43
|
+
# Configuration parameters: AllowURI, URISchemes.
|
44
|
+
Metrics/LineLength:
|
45
|
+
Max: 207
|
46
|
+
|
47
|
+
# Offense count: 11
|
48
|
+
RSpec/DescribedClass:
|
49
|
+
Exclude:
|
50
|
+
- 'spec/unit/harvestdor-indexer-resource_spec.rb'
|
51
|
+
- 'spec/unit/harvestdor-indexer-solr_spec.rb'
|
52
|
+
- 'spec/unit/harvestdor-indexer_spec.rb'
|
53
|
+
|
54
|
+
# Offense count: 15
|
55
|
+
# Configuration parameters: CustomTransform, IgnoredWords.
|
56
|
+
RSpec/ExampleWording:
|
57
|
+
Exclude:
|
58
|
+
- 'spec/unit/harvestdor-indexer-resource_spec.rb'
|
59
|
+
- 'spec/unit/harvestdor/indexer/metrics_spec.rb'
|
60
|
+
|
61
|
+
# Offense count: 3
|
62
|
+
# Configuration parameters: CustomTransform.
|
63
|
+
RSpec/FilePath:
|
64
|
+
Exclude:
|
65
|
+
- 'spec/unit/harvestdor-indexer-resource_spec.rb'
|
66
|
+
- 'spec/unit/harvestdor-indexer-solr_spec.rb'
|
67
|
+
- 'spec/unit/harvestdor-indexer_spec.rb'
|
68
|
+
|
69
|
+
# Offense count: 102
|
70
|
+
RSpec/InstanceVariable:
|
71
|
+
Exclude:
|
72
|
+
- 'spec/unit/harvestdor-indexer-resource_spec.rb'
|
73
|
+
- 'spec/unit/harvestdor-indexer_spec.rb'
|
74
|
+
|
75
|
+
# Offense count: 3
|
76
|
+
# Cop supports --auto-correct.
|
77
|
+
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
78
|
+
Style/AndOr:
|
79
|
+
Exclude:
|
80
|
+
- 'lib/harvestdor/indexer.rb'
|
81
|
+
- 'lib/harvestdor/indexer/resource.rb'
|
82
|
+
|
83
|
+
# Offense count: 3
|
84
|
+
# Cop supports --auto-correct.
|
85
|
+
# Configuration parameters: EnforcedStyle, SupportedStyles, ProceduralMethods, FunctionalMethods, IgnoredMethods.
|
86
|
+
Style/BlockDelimiters:
|
87
|
+
Enabled: false
|
88
|
+
|
89
|
+
# Offense count: 2
|
90
|
+
# Cop supports --auto-correct.
|
91
|
+
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
92
|
+
Style/BracesAroundHashParameters:
|
93
|
+
Exclude:
|
94
|
+
- 'spec/unit/harvestdor-indexer_spec.rb'
|
95
|
+
|
96
|
+
# Offense count: 3
|
97
|
+
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
98
|
+
Style/ClassAndModuleChildren:
|
99
|
+
Exclude:
|
100
|
+
- 'lib/harvestdor/indexer/metrics.rb'
|
101
|
+
- 'lib/harvestdor/indexer/resource.rb'
|
102
|
+
- 'lib/harvestdor/indexer/solr.rb'
|
103
|
+
|
104
|
+
# Offense count: 1
|
105
|
+
# Cop supports --auto-correct.
|
106
|
+
Style/ClosingParenthesisIndentation:
|
107
|
+
Exclude:
|
108
|
+
- 'spec/unit/harvestdor-indexer-resource_spec.rb'
|
109
|
+
|
110
|
+
# Offense count: 1
|
111
|
+
# Cop supports --auto-correct.
|
112
|
+
Style/CommentIndentation:
|
113
|
+
Exclude:
|
114
|
+
- 'harvestdor-indexer.gemspec'
|
115
|
+
|
116
|
+
# Offense count: 3
|
117
|
+
# Configuration parameters: Exclude.
|
118
|
+
Style/Documentation:
|
119
|
+
Exclude:
|
120
|
+
- 'lib/harvestdor/indexer/resource.rb'
|
121
|
+
- 'lib/harvestdor/indexer/solr.rb'
|
122
|
+
- 'lib/harvestdor/indexer/version.rb'
|
123
|
+
|
124
|
+
# Offense count: 1
|
125
|
+
Style/DoubleNegation:
|
126
|
+
Exclude:
|
127
|
+
- 'lib/harvestdor/indexer/resource.rb'
|
128
|
+
|
129
|
+
# Offense count: 1
|
130
|
+
# Cop supports --auto-correct.
|
131
|
+
Style/EmptyLines:
|
132
|
+
Exclude:
|
133
|
+
- 'lib/harvestdor/indexer/resource.rb'
|
134
|
+
|
135
|
+
# Offense count: 1
|
136
|
+
# Cop supports --auto-correct.
|
137
|
+
Style/EmptyLinesAroundMethodBody:
|
138
|
+
Exclude:
|
139
|
+
- 'lib/harvestdor/indexer.rb'
|
140
|
+
|
141
|
+
# Offense count: 4
|
142
|
+
# Configuration parameters: Exclude.
|
143
|
+
Style/FileName:
|
144
|
+
Exclude:
|
145
|
+
- 'lib/harvestdor-indexer.rb'
|
146
|
+
- 'spec/unit/harvestdor-indexer-resource_spec.rb'
|
147
|
+
- 'spec/unit/harvestdor-indexer-solr_spec.rb'
|
148
|
+
- 'spec/unit/harvestdor-indexer_spec.rb'
|
149
|
+
|
150
|
+
# Offense count: 1
|
151
|
+
# Configuration parameters: MinBodyLength.
|
152
|
+
Style/GuardClause:
|
153
|
+
Exclude:
|
154
|
+
- 'lib/harvestdor/indexer.rb'
|
155
|
+
|
156
|
+
# Offense count: 15
|
157
|
+
# Cop supports --auto-correct.
|
158
|
+
# Configuration parameters: EnforcedStyle, SupportedStyles, UseHashRocketsWithSymbolValues.
|
159
|
+
Style/HashSyntax:
|
160
|
+
Enabled: false
|
161
|
+
|
162
|
+
# Offense count: 1
|
163
|
+
# Cop supports --auto-correct.
|
164
|
+
# Configuration parameters: MaxLineLength.
|
165
|
+
Style/IfUnlessModifier:
|
166
|
+
Exclude:
|
167
|
+
- 'lib/harvestdor/indexer.rb'
|
168
|
+
|
169
|
+
# Offense count: 2
|
170
|
+
# Cop supports --auto-correct.
|
171
|
+
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
172
|
+
Style/IndentationConsistency:
|
173
|
+
Exclude:
|
174
|
+
- 'harvestdor-indexer.gemspec'
|
175
|
+
|
176
|
+
# Offense count: 1
|
177
|
+
# Cop supports --auto-correct.
|
178
|
+
# Configuration parameters: Width.
|
179
|
+
Style/IndentationWidth:
|
180
|
+
Exclude:
|
181
|
+
- 'spec/unit/harvestdor-indexer_spec.rb'
|
182
|
+
|
183
|
+
# Offense count: 1
|
184
|
+
# Cop supports --auto-correct.
|
185
|
+
Style/LeadingCommentSpace:
|
186
|
+
Exclude:
|
187
|
+
- 'spec/unit/harvestdor/indexer/metrics_spec.rb'
|
188
|
+
|
189
|
+
# Offense count: 3
|
190
|
+
# Cop supports --auto-correct.
|
191
|
+
Style/MethodCallParentheses:
|
192
|
+
Exclude:
|
193
|
+
- 'spec/unit/harvestdor-indexer_spec.rb'
|
194
|
+
|
195
|
+
# Offense count: 8
|
196
|
+
# Cop supports --auto-correct.
|
197
|
+
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
198
|
+
Style/MethodDefParentheses:
|
199
|
+
Enabled: false
|
200
|
+
|
201
|
+
# Offense count: 1
|
202
|
+
Style/MultilineBlockChain:
|
203
|
+
Exclude:
|
204
|
+
- 'lib/harvestdor/indexer.rb'
|
205
|
+
|
206
|
+
# Offense count: 4
|
207
|
+
# Cop supports --auto-correct.
|
208
|
+
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
209
|
+
Style/MultilineOperationIndentation:
|
210
|
+
Enabled: false
|
211
|
+
|
212
|
+
# Offense count: 1
|
213
|
+
# Cop supports --auto-correct.
|
214
|
+
Style/NegatedIf:
|
215
|
+
Exclude:
|
216
|
+
- 'lib/harvestdor/indexer/resource.rb'
|
217
|
+
|
218
|
+
# Offense count: 1
|
219
|
+
# Cop supports --auto-correct.
|
220
|
+
# Configuration parameters: PreferredDelimiters.
|
221
|
+
Style/PercentLiteralDelimiters:
|
222
|
+
Exclude:
|
223
|
+
- 'harvestdor-indexer.gemspec'
|
224
|
+
|
225
|
+
# Offense count: 1
|
226
|
+
# Cop supports --auto-correct.
|
227
|
+
Style/Proc:
|
228
|
+
Exclude:
|
229
|
+
- 'lib/harvestdor/indexer/solr.rb'
|
230
|
+
|
231
|
+
# Offense count: 1
|
232
|
+
# Cop supports --auto-correct.
|
233
|
+
Style/RedundantBegin:
|
234
|
+
Exclude:
|
235
|
+
- 'lib/harvestdor/indexer/metrics.rb'
|
236
|
+
|
237
|
+
# Offense count: 10
|
238
|
+
# Cop supports --auto-correct.
|
239
|
+
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
240
|
+
Style/SignalException:
|
241
|
+
Exclude:
|
242
|
+
- 'lib/harvestdor/indexer.rb'
|
243
|
+
- 'lib/harvestdor/indexer/resource.rb'
|
244
|
+
- 'spec/unit/harvestdor/indexer/metrics_spec.rb'
|
245
|
+
|
246
|
+
# Offense count: 2
|
247
|
+
# Cop supports --auto-correct.
|
248
|
+
# Configuration parameters: MultiSpaceAllowedForOperators.
|
249
|
+
Style/SpaceAroundOperators:
|
250
|
+
Exclude:
|
251
|
+
- 'lib/harvestdor/indexer/metrics.rb'
|
252
|
+
|
253
|
+
# Offense count: 1
|
254
|
+
# Cop supports --auto-correct.
|
255
|
+
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
256
|
+
Style/SpaceBeforeBlockBraces:
|
257
|
+
Enabled: false
|
258
|
+
|
259
|
+
# Offense count: 2
|
260
|
+
# Cop supports --auto-correct.
|
261
|
+
# Configuration parameters: EnforcedStyle, SupportedStyles, EnforcedStyleForEmptyBraces, SpaceBeforeBlockParameters.
|
262
|
+
Style/SpaceInsideBlockBraces:
|
263
|
+
Enabled: false
|
264
|
+
|
265
|
+
# Offense count: 2
|
266
|
+
# Cop supports --auto-correct.
|
267
|
+
Style/SpaceInsideBrackets:
|
268
|
+
Exclude:
|
269
|
+
- 'Rakefile'
|
270
|
+
|
271
|
+
# Offense count: 8
|
272
|
+
# Cop supports --auto-correct.
|
273
|
+
# Configuration parameters: EnforcedStyle, EnforcedStyleForEmptyBraces, SupportedStyles.
|
274
|
+
Style/SpaceInsideHashLiteralBraces:
|
275
|
+
Enabled: false
|
276
|
+
|
277
|
+
# Offense count: 1
|
278
|
+
# Cop supports --auto-correct.
|
279
|
+
Style/SpecialGlobalVars:
|
280
|
+
Exclude:
|
281
|
+
- 'harvestdor-indexer.gemspec'
|
282
|
+
|
283
|
+
# Offense count: 147
|
284
|
+
# Cop supports --auto-correct.
|
285
|
+
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
286
|
+
Style/StringLiterals:
|
287
|
+
Enabled: false
|
288
|
+
|
289
|
+
# Offense count: 2
|
290
|
+
# Cop supports --auto-correct.
|
291
|
+
# Configuration parameters: IgnoredMethods.
|
292
|
+
Style/SymbolProc:
|
293
|
+
Exclude:
|
294
|
+
- 'lib/harvestdor/indexer.rb'
|
295
|
+
|
296
|
+
# Offense count: 2
|
297
|
+
# Cop supports --auto-correct.
|
298
|
+
Style/Tab:
|
299
|
+
Exclude:
|
300
|
+
- 'harvestdor-indexer.gemspec'
|
301
|
+
|
302
|
+
# Offense count: 11
|
303
|
+
# Cop supports --auto-correct.
|
304
|
+
Style/TrailingWhitespace:
|
305
|
+
Exclude:
|
306
|
+
- 'lib/harvestdor/indexer/metrics.rb'
|
307
|
+
- 'spec/unit/harvestdor-indexer-resource_spec.rb'
|
308
|
+
- 'spec/unit/harvestdor/indexer/metrics_spec.rb'
|
309
|
+
|
310
|
+
# Offense count: 2
|
311
|
+
# Cop supports --auto-correct.
|
312
|
+
Style/UnneededPercentQ:
|
313
|
+
Exclude:
|
314
|
+
- 'harvestdor-indexer.gemspec'
|
data/.travis.yml
CHANGED
data/README.rdoc
CHANGED
@@ -26,7 +26,15 @@ You must override the index method and provide configuration options. It is rec
|
|
26
26
|
|
27
27
|
=== Configuration / Set up
|
28
28
|
|
29
|
-
Create a yml config file for your collection going to a Solr index.
|
29
|
+
Create a yml config file for your collection going to a Solr index.
|
30
|
+
|
31
|
+
See spec/config/ap.yml for an example.
|
32
|
+
You will want to copy that file and change the following settings:
|
33
|
+
|
34
|
+
# whitelist
|
35
|
+
# dor fetcher service_url
|
36
|
+
# solr url
|
37
|
+
# harvestdor log_dir, log_nam
|
30
38
|
|
31
39
|
Note: Because of an update to underlying HTTP libraries, versions of this gem > 0.0.12 require an updated syntax. Errors like "unknown method timeout" might be because you're using an older version of a config file. The new configuration looks like this:
|
32
40
|
|
@@ -38,45 +46,46 @@ Note: Because of an update to underlying HTTP libraries, versions of this gem >
|
|
38
46
|
timeout: 180
|
39
47
|
open_timeout: 180
|
40
48
|
|
41
|
-
See spec/config/ap.yml for an example.
|
42
|
-
You will want to copy that file and change the following settings:
|
43
|
-
1. log_name
|
44
|
-
2. default_set
|
45
|
-
3. blacklist or whitelist if you are using them
|
46
49
|
|
47
|
-
|
50
|
+
==== Whitelist
|
51
|
+
|
52
|
+
Note: the whitelist is how you specify which objects to index. The whitelist
|
53
|
+
can be
|
54
|
+
|
55
|
+
* an Array of druids inline in the config yml file
|
56
|
+
* a filename containing a list of druids (one per line)
|
57
|
+
|
58
|
+
If a druid, per the object's identityMetadata at purl page, is for a
|
59
|
+
|
60
|
+
* collection record: then we process all the item druids in that collection (as if they were included individually in the whitelist)
|
61
|
+
* non-collection record: then we process the druid as an individual item
|
48
62
|
|
49
63
|
=== Override the Harvestdor::Indexer.index method
|
50
64
|
|
51
65
|
In your code, override this method from the Harvestdor::Indexer class
|
52
66
|
|
53
|
-
# create Solr doc for the druid and add it to Solr
|
54
|
-
# NOTE: don't forget to send commit to Solr, either once at end (already in harvest_and_index), or for each add, or ...
|
55
|
-
def index
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
# logger.info("Just created Solr doc for #{druid}")
|
71
|
-
# TODO: provide call to code to update DOR object's workflow datastream??
|
72
|
-
end
|
73
|
-
end
|
67
|
+
# create Solr doc for the druid and add it to Solr
|
68
|
+
# NOTE: don't forget to send commit to Solr, either once at end (already in harvest_and_index), or for each add, or ...
|
69
|
+
def index resource
|
70
|
+
|
71
|
+
benchmark "Indexing #{resource.druid}" do
|
72
|
+
logger.debug "About to index #{resource.druid}"
|
73
|
+
doc_hash = {}
|
74
|
+
doc_hash[:id] = resource.druid
|
75
|
+
|
76
|
+
# you might add things from Indexer level class here
|
77
|
+
# (e.g. things that are the same across all documents in the harvest)
|
78
|
+
solr.add doc_hash
|
79
|
+
# TODO: provide call to code to update DOR object's workflow datastream??
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
74
83
|
|
75
84
|
=== Run it
|
76
85
|
|
77
86
|
(bundle install)
|
78
87
|
|
79
|
-
|
88
|
+
You may want to write a script to run the code. Your script might look like this:
|
80
89
|
|
81
90
|
#!/usr/bin/env ruby
|
82
91
|
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..'))
|
@@ -93,19 +102,14 @@ I suggest you write a script to run the code. Your script might look like this:
|
|
93
102
|
puts "** You must provide the full path to a collection config yml file **"
|
94
103
|
exit
|
95
104
|
end
|
96
|
-
|
97
|
-
puts "** You must provide the full path to dor-fetcher-client config yml file **"
|
98
|
-
exit
|
99
|
-
end
|
100
|
-
indexer = Harvestdor::Indexer.new(config_yml_path, client_config_path, opts)
|
105
|
+
indexer = Harvestdor::Indexer.new(config_yml_path, opts)
|
101
106
|
indexer.harvest_and_index
|
102
107
|
|
103
108
|
Then you run the script like so:
|
104
109
|
|
105
110
|
./bin/indexer config/(your coll).yml
|
106
111
|
|
107
|
-
|
108
|
-
|
112
|
+
Run from deployed instance, as that box is already set up to be able to talk to DOR Fetcher service and to SUL Solr indexes.
|
109
113
|
|
110
114
|
== Contributing
|
111
115
|
|
data/Rakefile
CHANGED
@@ -3,10 +3,6 @@ require "bundler/gem_tasks"
|
|
3
3
|
require 'rake'
|
4
4
|
require 'bundler'
|
5
5
|
|
6
|
-
require 'rspec/core/rake_task'
|
7
|
-
require 'yard'
|
8
|
-
require 'yard/rake/yardoc_task'
|
9
|
-
|
10
6
|
begin
|
11
7
|
Bundler.setup(:default, :development)
|
12
8
|
rescue Bundler::BundlerError => e
|
@@ -15,10 +11,12 @@ rescue Bundler::BundlerError => e
|
|
15
11
|
exit e.status_code
|
16
12
|
end
|
17
13
|
|
18
|
-
task :default => :
|
14
|
+
task :default => [:rspec, :rubocop]
|
15
|
+
|
16
|
+
desc "run continuous integration suite (tests, coverage, docs)"
|
17
|
+
task :ci => [:rspec, :doc, :rubocop]
|
19
18
|
|
20
|
-
|
21
|
-
task :ci => [:rspec, :doc]
|
19
|
+
require 'rspec/core/rake_task'
|
22
20
|
|
23
21
|
task :spec => :rspec
|
24
22
|
|
@@ -31,7 +29,12 @@ RSpec::Core::RakeTask.new(:rspec) do |spec|
|
|
31
29
|
spec.rspec_opts = ["-c", "-f progress", "--tty", "-r ./spec/spec_helper.rb"]
|
32
30
|
end
|
33
31
|
|
32
|
+
require 'rubocop/rake_task'
|
33
|
+
RuboCop::RakeTask.new(:rubocop)
|
34
|
+
|
34
35
|
# Use yard to build docs
|
36
|
+
require 'yard'
|
37
|
+
require 'yard/rake/yardoc_task'
|
35
38
|
begin
|
36
39
|
project_root = File.expand_path(File.dirname(__FILE__))
|
37
40
|
doc_dest_dir = File.join(project_root, 'doc')
|
@@ -46,5 +49,4 @@ rescue LoadError
|
|
46
49
|
task :doc do
|
47
50
|
abort "Please install the YARD gem to generate rdoc."
|
48
51
|
end
|
49
|
-
end
|
50
|
-
|
52
|
+
end
|
data/harvestdor-indexer.gemspec
CHANGED
@@ -16,7 +16,7 @@ Gem::Specification.new do |gem|
|
|
16
16
|
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
17
17
|
gem.test_files = gem.files.grep(%r{^spec/})
|
18
18
|
gem.require_paths = ["lib"]
|
19
|
-
|
19
|
+
|
20
20
|
gem.add_dependency 'rsolr'
|
21
21
|
gem.add_dependency 'retries'
|
22
22
|
gem.add_dependency 'harvestdor', '>=0.0.14'
|
@@ -24,13 +24,11 @@ Gem::Specification.new do |gem|
|
|
24
24
|
gem.add_dependency 'dor-fetcher', '=1.0.5'
|
25
25
|
gem.add_dependency "activesupport"
|
26
26
|
gem.add_dependency "parallel"
|
27
|
-
|
27
|
+
|
28
28
|
# Runtime dependencies
|
29
29
|
gem.add_runtime_dependency 'confstruct'
|
30
30
|
|
31
31
|
# Development dependencies
|
32
|
-
# Bundler will install these gems too if you've checked out solrmarc-wrapper source from git and run 'bundle install'
|
33
|
-
# It will not add these as dependencies if you require solrmarc-wrapper for other projects
|
34
32
|
gem.add_development_dependency "rake"
|
35
33
|
# docs
|
36
34
|
gem.add_development_dependency "rdoc"
|
@@ -38,8 +36,9 @@ Gem::Specification.new do |gem|
|
|
38
36
|
# tests
|
39
37
|
gem.add_development_dependency 'rspec', "~> 3.0"
|
40
38
|
gem.add_development_dependency 'coveralls'
|
41
|
-
|
39
|
+
gem.add_development_dependency 'rubocop'
|
40
|
+
gem.add_development_dependency 'rubocop-rspec'
|
42
41
|
gem.add_development_dependency 'vcr'
|
43
42
|
gem.add_development_dependency 'webmock'
|
44
|
-
|
43
|
+
|
45
44
|
end
|
@@ -5,13 +5,16 @@ module Harvestdor
|
|
5
5
|
include ActiveSupport::Benchmarkable
|
6
6
|
|
7
7
|
attr_reader :indexer, :druid, :options
|
8
|
-
|
9
|
-
|
8
|
+
|
9
|
+
# @param [Harvestdor::Indexer] indexer an instance of Harvestdor::Indexer
|
10
|
+
# @param [String] coll_druid a collection druid of the form 'druid:oo123oo1234'
|
11
|
+
def initialize indexer, coll_druid, options = {}
|
10
12
|
@indexer = indexer
|
11
|
-
@druid =
|
13
|
+
@druid = coll_druid
|
12
14
|
@options = options
|
13
15
|
end
|
14
16
|
|
17
|
+
# @return [String] string of form oo123oo1234
|
15
18
|
def bare_druid
|
16
19
|
@bare_druid ||= druid.gsub("druid:", "")
|
17
20
|
end
|
@@ -45,7 +48,7 @@ module Harvestdor
|
|
45
48
|
ns_hash = {'rdf' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', 'fedora' => "info:fedora/fedora-system:def/relations-external#", '' => ''}
|
46
49
|
is_member_of_nodes ||= public_xml.xpath('/publicObject/rdf:RDF/rdf:Description/fedora:isMemberOfCollection/@rdf:resource', ns_hash)
|
47
50
|
|
48
|
-
is_member_of_nodes.reject { |n| n.value.empty? }.map do |n|
|
51
|
+
is_member_of_nodes.reject { |n| n.value.empty? }.map do |n|
|
49
52
|
Harvestdor::Indexer::Resource.new(indexer, n.value.gsub("info:fedora/", ""))
|
50
53
|
end
|
51
54
|
end
|
@@ -59,18 +62,16 @@ module Harvestdor
|
|
59
62
|
druids.map { |x| Harvestdor::Indexer::Resource.new(indexer, x) }
|
60
63
|
end
|
61
64
|
end
|
62
|
-
|
65
|
+
|
63
66
|
# given a druid, get its objectLabel from its purl page identityMetadata
|
64
|
-
# @param [String] druid, e.g. ab123cd4567
|
65
67
|
# @return [String] the value of the <objectLabel> element in the identityMetadata for the object
|
66
68
|
def identity_md_obj_label
|
67
69
|
logger.error("#{druid} missing identityMetadata") unless identity_metadata
|
68
70
|
identity_metadata.xpath('identityMetadata/objectLabel').text
|
69
71
|
end
|
70
|
-
|
71
|
-
|
72
|
+
|
73
|
+
|
72
74
|
# return the MODS for the druid as a Stanford::Mods::Record object
|
73
|
-
# @param [String] druid e.g. ab123cd4567
|
74
75
|
# @return [Stanford::Mods::Record] created from the MODS xml for the druid
|
75
76
|
def smods_rec
|
76
77
|
@smods_rec ||= benchmark "smods_rec(#{druid})", level: :debug do
|
@@ -85,9 +86,8 @@ module Harvestdor
|
|
85
86
|
def mods
|
86
87
|
@mods ||= harvestdor_client.mods bare_druid
|
87
88
|
end
|
88
|
-
|
89
|
+
|
89
90
|
# the public xml for this DOR object, from the purl page
|
90
|
-
# @param [String] druid e.g. ab123cd4567
|
91
91
|
# @return [Nokogiri::XML::Document] the public xml for the DOR object
|
92
92
|
def public_xml
|
93
93
|
@public_xml ||= benchmark "public_xml(#{druid})", level: :debug do
|
@@ -114,10 +114,8 @@ module Harvestdor
|
|
114
114
|
bare_druid
|
115
115
|
end
|
116
116
|
end
|
117
|
-
|
117
|
+
|
118
118
|
# the contentMetadata for this DOR object, ultimately from the purl public xml
|
119
|
-
# @param [Object] object a String containing a druid (e.g. ab123cd4567), or
|
120
|
-
# a Nokogiri::XML::Document containing the public_xml for an object
|
121
119
|
# @return [Nokogiri::XML::Document] the contentMetadata for the DOR object
|
122
120
|
def content_metadata
|
123
121
|
ng_doc = benchmark "content_metadata (#{druid})", level: :debug do
|
@@ -126,10 +124,8 @@ module Harvestdor
|
|
126
124
|
raise "No contentMetadata for \"#{druid}\"" if !ng_doc || ng_doc.children.empty?
|
127
125
|
ng_doc
|
128
126
|
end
|
129
|
-
|
127
|
+
|
130
128
|
# the identityMetadata for this DOR object, ultimately from the purl public xml
|
131
|
-
# @param [Object] object a String containing a druid (e.g. ab123cd4567), or
|
132
|
-
# a Nokogiri::XML::Document containing the public_xml for an object
|
133
129
|
# @return [Nokogiri::XML::Document] the identityMetadata for the DOR object
|
134
130
|
def identity_metadata
|
135
131
|
ng_doc = benchmark "identity_metadata (#{druid})", level: :debug do
|
@@ -138,10 +134,8 @@ module Harvestdor
|
|
138
134
|
raise "No identityMetadata for \"#{druid}\"" if !ng_doc || ng_doc.children.empty?
|
139
135
|
ng_doc
|
140
136
|
end
|
141
|
-
|
137
|
+
|
142
138
|
# the rightsMetadata for this DOR object, ultimately from the purl public xml
|
143
|
-
# @param [Object] object a String containing a druid (e.g. ab123cd4567), or
|
144
|
-
# a Nokogiri::XML::Document containing the public_xml for an object
|
145
139
|
# @return [Nokogiri::XML::Document] the rightsMetadata for the DOR object
|
146
140
|
def rights_metadata
|
147
141
|
ng_doc = benchmark "rights_metadata (#{druid})", level: :debug do
|
@@ -150,10 +144,8 @@ module Harvestdor
|
|
150
144
|
raise "No rightsMetadata for \"#{druid}\"" if !ng_doc || ng_doc.children.empty?
|
151
145
|
ng_doc
|
152
146
|
end
|
153
|
-
|
147
|
+
|
154
148
|
# the RDF for this DOR object, ultimately from the purl public xml
|
155
|
-
# @param [Object] object a String containing a druid (e.g. ab123cd4567), or
|
156
|
-
# a Nokogiri::XML::Document containing the public_xml for an object
|
157
149
|
# @return [Nokogiri::XML::Document] the RDF for the DOR object
|
158
150
|
def rdf
|
159
151
|
ng_doc = benchmark "rdf (#{druid})", level: :debug do
|