harvestdor-indexer 2.0.0 → 2.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (29) hide show
  1. checksums.yaml +4 -4
  2. data/.hound.yml +2 -0
  3. data/.rubocop.yml +12 -0
  4. data/.rubocop_todo.yml +314 -0
  5. data/.travis.yml +3 -2
  6. data/README.rdoc +40 -36
  7. data/Rakefile +11 -9
  8. data/harvestdor-indexer.gemspec +5 -6
  9. data/lib/harvestdor/indexer/resource.rb +15 -23
  10. data/lib/harvestdor/indexer/solr.rb +6 -7
  11. data/lib/harvestdor/indexer/version.rb +1 -1
  12. data/lib/harvestdor/indexer.rb +11 -12
  13. data/spec/config/ap.yml +30 -21
  14. data/spec/config/ap_whitelist.txt +1 -1
  15. data/spec/unit/harvestdor-indexer_spec.rb +29 -29
  16. metadata +124 -117
  17. data/spec/fixtures/vcr_cassettes/cant_find_whitelist_call.yml +0 -48
  18. data/spec/fixtures/vcr_cassettes/empty_array_no_blacklist_config_call.yml +0 -48
  19. data/spec/fixtures/vcr_cassettes/empty_array_no_whitelist_config_call.yml +0 -48
  20. data/spec/fixtures/vcr_cassettes/ignore_druids_in_blacklist_and_whitelist_call.yml +0 -48
  21. data/spec/fixtures/vcr_cassettes/ignore_druids_in_blacklist_call.yml +0 -99
  22. data/spec/fixtures/vcr_cassettes/know_what_is_in_blacklist_call.yml +0 -46
  23. data/spec/fixtures/vcr_cassettes/know_what_is_in_whitelist_call.yml +0 -46
  24. data/spec/fixtures/vcr_cassettes/load_blacklist_once_call.yml +0 -48
  25. data/spec/fixtures/vcr_cassettes/load_whitelist_once_call.yml +0 -48
  26. data/spec/fixtures/vcr_cassettes/no_blacklist_config_call.yml +0 -99
  27. data/spec/fixtures/vcr_cassettes/no_blacklist_found_call.yml +0 -48
  28. data/spec/fixtures/vcr_cassettes/no_whitelist_config_call.yml +0 -99
  29. data/spec/fixtures/vcr_cassettes/rsolr_client_config_call.yml +0 -48
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 60218ff6d0f0210900b1b6a0df4d3a09a7122c34
4
- data.tar.gz: 04ba680a7f7864dca78cbc9751c8eb9f6f9c811b
3
+ metadata.gz: e3fa52b261d36405abf85f703fa18455149a37af
4
+ data.tar.gz: ec64353f2066115d63aaaff6088cc5c83c082aab
5
5
  SHA512:
6
- metadata.gz: 2df842d3b19d9750f7a5e88f9af9dab60cfffa34cc80d7584b5b16f825aff64478e4c1ba3dde05de5749a9b8c17cbabd29fb55b4c71c965253b32d446686013b
7
- data.tar.gz: 3c479f7be2c27ad39acad3316b77987dce8c487a654f47758513a4c4d094973ed7b0168f85307a5135de212d7469478e4e475285bd490250058b03869fea6619
6
+ metadata.gz: e186e0140e2d25192c6a897b4f65c5b7586223004778527132a4dfa782532cc7219a86984776b1f63e89fb8a49be84d3ef525574366b5fb04efcf0a73222dd49
7
+ data.tar.gz: 6c5710fcbf00a97f2da3282b9e612bbe8eed425e8aaa41a5ba3da8c5285c6721a4ad21c44be3fbf7977874bcb47792b6b078f2d4e3450514035440eea0f561bd
data/.hound.yml ADDED
@@ -0,0 +1,2 @@
1
+ ruby:
2
+ config_file: .rubocop.yml
data/.rubocop.yml ADDED
@@ -0,0 +1,12 @@
1
+ require: rubocop-rspec
2
+
3
+ inherit_from: .rubocop_todo.yml
4
+
5
+ Style/EmptyLinesAroundBlockBody:
6
+ Enabled: false
7
+
8
+ Style/EmptyLinesAroundClassBody:
9
+ Enabled: false
10
+
11
+ Style/TrailingBlankLines:
12
+ Enabled: false
data/.rubocop_todo.yml ADDED
@@ -0,0 +1,314 @@
1
+ # This configuration was generated by
2
+ # `rubocop --auto-gen-config`
3
+ # on 2015-10-21 18:25:46 -0700 using RuboCop version 0.34.2.
4
+ # The point is for the user to remove these configuration records
5
+ # one by one as the offenses are removed from the code base.
6
+ # Note that changes in the inspected code, or installation of new
7
+ # versions of RuboCop, may require this file to be generated again.
8
+
9
+ # Offense count: 1
10
+ # Cop supports --auto-correct.
11
+ Lint/DeprecatedClassMethods:
12
+ Exclude:
13
+ - 'spec/unit/harvestdor-indexer_spec.rb'
14
+
15
+ # Offense count: 1
16
+ # Cop supports --auto-correct.
17
+ Lint/UnusedBlockArgument:
18
+ Exclude:
19
+ - 'lib/harvestdor/indexer/solr.rb'
20
+
21
+ # Offense count: 1
22
+ # Cop supports --auto-correct.
23
+ Lint/UnusedMethodArgument:
24
+ Exclude:
25
+ - 'lib/harvestdor/indexer.rb'
26
+
27
+ # Offense count: 4
28
+ Lint/UselessAssignment:
29
+ Exclude:
30
+ - 'lib/harvestdor/indexer.rb'
31
+ - 'spec/unit/harvestdor-indexer_spec.rb'
32
+
33
+ # Offense count: 2
34
+ Metrics/AbcSize:
35
+ Max: 20
36
+
37
+ # Offense count: 1
38
+ # Configuration parameters: CountComments.
39
+ Metrics/ClassLength:
40
+ Max: 105
41
+
42
+ # Offense count: 101
43
+ # Configuration parameters: AllowURI, URISchemes.
44
+ Metrics/LineLength:
45
+ Max: 207
46
+
47
+ # Offense count: 11
48
+ RSpec/DescribedClass:
49
+ Exclude:
50
+ - 'spec/unit/harvestdor-indexer-resource_spec.rb'
51
+ - 'spec/unit/harvestdor-indexer-solr_spec.rb'
52
+ - 'spec/unit/harvestdor-indexer_spec.rb'
53
+
54
+ # Offense count: 15
55
+ # Configuration parameters: CustomTransform, IgnoredWords.
56
+ RSpec/ExampleWording:
57
+ Exclude:
58
+ - 'spec/unit/harvestdor-indexer-resource_spec.rb'
59
+ - 'spec/unit/harvestdor/indexer/metrics_spec.rb'
60
+
61
+ # Offense count: 3
62
+ # Configuration parameters: CustomTransform.
63
+ RSpec/FilePath:
64
+ Exclude:
65
+ - 'spec/unit/harvestdor-indexer-resource_spec.rb'
66
+ - 'spec/unit/harvestdor-indexer-solr_spec.rb'
67
+ - 'spec/unit/harvestdor-indexer_spec.rb'
68
+
69
+ # Offense count: 102
70
+ RSpec/InstanceVariable:
71
+ Exclude:
72
+ - 'spec/unit/harvestdor-indexer-resource_spec.rb'
73
+ - 'spec/unit/harvestdor-indexer_spec.rb'
74
+
75
+ # Offense count: 3
76
+ # Cop supports --auto-correct.
77
+ # Configuration parameters: EnforcedStyle, SupportedStyles.
78
+ Style/AndOr:
79
+ Exclude:
80
+ - 'lib/harvestdor/indexer.rb'
81
+ - 'lib/harvestdor/indexer/resource.rb'
82
+
83
+ # Offense count: 3
84
+ # Cop supports --auto-correct.
85
+ # Configuration parameters: EnforcedStyle, SupportedStyles, ProceduralMethods, FunctionalMethods, IgnoredMethods.
86
+ Style/BlockDelimiters:
87
+ Enabled: false
88
+
89
+ # Offense count: 2
90
+ # Cop supports --auto-correct.
91
+ # Configuration parameters: EnforcedStyle, SupportedStyles.
92
+ Style/BracesAroundHashParameters:
93
+ Exclude:
94
+ - 'spec/unit/harvestdor-indexer_spec.rb'
95
+
96
+ # Offense count: 3
97
+ # Configuration parameters: EnforcedStyle, SupportedStyles.
98
+ Style/ClassAndModuleChildren:
99
+ Exclude:
100
+ - 'lib/harvestdor/indexer/metrics.rb'
101
+ - 'lib/harvestdor/indexer/resource.rb'
102
+ - 'lib/harvestdor/indexer/solr.rb'
103
+
104
+ # Offense count: 1
105
+ # Cop supports --auto-correct.
106
+ Style/ClosingParenthesisIndentation:
107
+ Exclude:
108
+ - 'spec/unit/harvestdor-indexer-resource_spec.rb'
109
+
110
+ # Offense count: 1
111
+ # Cop supports --auto-correct.
112
+ Style/CommentIndentation:
113
+ Exclude:
114
+ - 'harvestdor-indexer.gemspec'
115
+
116
+ # Offense count: 3
117
+ # Configuration parameters: Exclude.
118
+ Style/Documentation:
119
+ Exclude:
120
+ - 'lib/harvestdor/indexer/resource.rb'
121
+ - 'lib/harvestdor/indexer/solr.rb'
122
+ - 'lib/harvestdor/indexer/version.rb'
123
+
124
+ # Offense count: 1
125
+ Style/DoubleNegation:
126
+ Exclude:
127
+ - 'lib/harvestdor/indexer/resource.rb'
128
+
129
+ # Offense count: 1
130
+ # Cop supports --auto-correct.
131
+ Style/EmptyLines:
132
+ Exclude:
133
+ - 'lib/harvestdor/indexer/resource.rb'
134
+
135
+ # Offense count: 1
136
+ # Cop supports --auto-correct.
137
+ Style/EmptyLinesAroundMethodBody:
138
+ Exclude:
139
+ - 'lib/harvestdor/indexer.rb'
140
+
141
+ # Offense count: 4
142
+ # Configuration parameters: Exclude.
143
+ Style/FileName:
144
+ Exclude:
145
+ - 'lib/harvestdor-indexer.rb'
146
+ - 'spec/unit/harvestdor-indexer-resource_spec.rb'
147
+ - 'spec/unit/harvestdor-indexer-solr_spec.rb'
148
+ - 'spec/unit/harvestdor-indexer_spec.rb'
149
+
150
+ # Offense count: 1
151
+ # Configuration parameters: MinBodyLength.
152
+ Style/GuardClause:
153
+ Exclude:
154
+ - 'lib/harvestdor/indexer.rb'
155
+
156
+ # Offense count: 15
157
+ # Cop supports --auto-correct.
158
+ # Configuration parameters: EnforcedStyle, SupportedStyles, UseHashRocketsWithSymbolValues.
159
+ Style/HashSyntax:
160
+ Enabled: false
161
+
162
+ # Offense count: 1
163
+ # Cop supports --auto-correct.
164
+ # Configuration parameters: MaxLineLength.
165
+ Style/IfUnlessModifier:
166
+ Exclude:
167
+ - 'lib/harvestdor/indexer.rb'
168
+
169
+ # Offense count: 2
170
+ # Cop supports --auto-correct.
171
+ # Configuration parameters: EnforcedStyle, SupportedStyles.
172
+ Style/IndentationConsistency:
173
+ Exclude:
174
+ - 'harvestdor-indexer.gemspec'
175
+
176
+ # Offense count: 1
177
+ # Cop supports --auto-correct.
178
+ # Configuration parameters: Width.
179
+ Style/IndentationWidth:
180
+ Exclude:
181
+ - 'spec/unit/harvestdor-indexer_spec.rb'
182
+
183
+ # Offense count: 1
184
+ # Cop supports --auto-correct.
185
+ Style/LeadingCommentSpace:
186
+ Exclude:
187
+ - 'spec/unit/harvestdor/indexer/metrics_spec.rb'
188
+
189
+ # Offense count: 3
190
+ # Cop supports --auto-correct.
191
+ Style/MethodCallParentheses:
192
+ Exclude:
193
+ - 'spec/unit/harvestdor-indexer_spec.rb'
194
+
195
+ # Offense count: 8
196
+ # Cop supports --auto-correct.
197
+ # Configuration parameters: EnforcedStyle, SupportedStyles.
198
+ Style/MethodDefParentheses:
199
+ Enabled: false
200
+
201
+ # Offense count: 1
202
+ Style/MultilineBlockChain:
203
+ Exclude:
204
+ - 'lib/harvestdor/indexer.rb'
205
+
206
+ # Offense count: 4
207
+ # Cop supports --auto-correct.
208
+ # Configuration parameters: EnforcedStyle, SupportedStyles.
209
+ Style/MultilineOperationIndentation:
210
+ Enabled: false
211
+
212
+ # Offense count: 1
213
+ # Cop supports --auto-correct.
214
+ Style/NegatedIf:
215
+ Exclude:
216
+ - 'lib/harvestdor/indexer/resource.rb'
217
+
218
+ # Offense count: 1
219
+ # Cop supports --auto-correct.
220
+ # Configuration parameters: PreferredDelimiters.
221
+ Style/PercentLiteralDelimiters:
222
+ Exclude:
223
+ - 'harvestdor-indexer.gemspec'
224
+
225
+ # Offense count: 1
226
+ # Cop supports --auto-correct.
227
+ Style/Proc:
228
+ Exclude:
229
+ - 'lib/harvestdor/indexer/solr.rb'
230
+
231
+ # Offense count: 1
232
+ # Cop supports --auto-correct.
233
+ Style/RedundantBegin:
234
+ Exclude:
235
+ - 'lib/harvestdor/indexer/metrics.rb'
236
+
237
+ # Offense count: 10
238
+ # Cop supports --auto-correct.
239
+ # Configuration parameters: EnforcedStyle, SupportedStyles.
240
+ Style/SignalException:
241
+ Exclude:
242
+ - 'lib/harvestdor/indexer.rb'
243
+ - 'lib/harvestdor/indexer/resource.rb'
244
+ - 'spec/unit/harvestdor/indexer/metrics_spec.rb'
245
+
246
+ # Offense count: 2
247
+ # Cop supports --auto-correct.
248
+ # Configuration parameters: MultiSpaceAllowedForOperators.
249
+ Style/SpaceAroundOperators:
250
+ Exclude:
251
+ - 'lib/harvestdor/indexer/metrics.rb'
252
+
253
+ # Offense count: 1
254
+ # Cop supports --auto-correct.
255
+ # Configuration parameters: EnforcedStyle, SupportedStyles.
256
+ Style/SpaceBeforeBlockBraces:
257
+ Enabled: false
258
+
259
+ # Offense count: 2
260
+ # Cop supports --auto-correct.
261
+ # Configuration parameters: EnforcedStyle, SupportedStyles, EnforcedStyleForEmptyBraces, SpaceBeforeBlockParameters.
262
+ Style/SpaceInsideBlockBraces:
263
+ Enabled: false
264
+
265
+ # Offense count: 2
266
+ # Cop supports --auto-correct.
267
+ Style/SpaceInsideBrackets:
268
+ Exclude:
269
+ - 'Rakefile'
270
+
271
+ # Offense count: 8
272
+ # Cop supports --auto-correct.
273
+ # Configuration parameters: EnforcedStyle, EnforcedStyleForEmptyBraces, SupportedStyles.
274
+ Style/SpaceInsideHashLiteralBraces:
275
+ Enabled: false
276
+
277
+ # Offense count: 1
278
+ # Cop supports --auto-correct.
279
+ Style/SpecialGlobalVars:
280
+ Exclude:
281
+ - 'harvestdor-indexer.gemspec'
282
+
283
+ # Offense count: 147
284
+ # Cop supports --auto-correct.
285
+ # Configuration parameters: EnforcedStyle, SupportedStyles.
286
+ Style/StringLiterals:
287
+ Enabled: false
288
+
289
+ # Offense count: 2
290
+ # Cop supports --auto-correct.
291
+ # Configuration parameters: IgnoredMethods.
292
+ Style/SymbolProc:
293
+ Exclude:
294
+ - 'lib/harvestdor/indexer.rb'
295
+
296
+ # Offense count: 2
297
+ # Cop supports --auto-correct.
298
+ Style/Tab:
299
+ Exclude:
300
+ - 'harvestdor-indexer.gemspec'
301
+
302
+ # Offense count: 11
303
+ # Cop supports --auto-correct.
304
+ Style/TrailingWhitespace:
305
+ Exclude:
306
+ - 'lib/harvestdor/indexer/metrics.rb'
307
+ - 'spec/unit/harvestdor-indexer-resource_spec.rb'
308
+ - 'spec/unit/harvestdor/indexer/metrics_spec.rb'
309
+
310
+ # Offense count: 2
311
+ # Cop supports --auto-correct.
312
+ Style/UnneededPercentQ:
313
+ Exclude:
314
+ - 'harvestdor-indexer.gemspec'
data/.travis.yml CHANGED
@@ -1,5 +1,6 @@
1
1
  language: ruby
2
- script: rake rspec
2
+ sudo: false
3
+ script: rake
3
4
  rvm:
4
5
  - 2.2.0
5
6
  - 2.1.5
@@ -11,4 +12,4 @@ notifications:
11
12
  email:
12
13
  - ndushay@stanford.edu
13
14
  - laneymcg@stanford.edu
14
- - bess@stanford.edu
15
+ - cbeer@stanford.edu
data/README.rdoc CHANGED
@@ -26,7 +26,15 @@ You must override the index method and provide configuration options. It is rec
26
26
 
27
27
  === Configuration / Set up
28
28
 
29
- Create a yml config file for your collection going to a Solr index.
29
+ Create a yml config file for your collection going to a Solr index.
30
+
31
+ See spec/config/ap.yml for an example.
32
+ You will want to copy that file and change the following settings:
33
+
34
+ # whitelist
35
+ # dor fetcher service_url
36
+ # solr url
37
+ # harvestdor log_dir, log_nam
30
38
 
31
39
  Note: Because of an update to underlying HTTP libraries, versions of this gem > 0.0.12 require an updated syntax. Errors like "unknown method timeout" might be because you're using an older version of a config file. The new configuration looks like this:
32
40
 
@@ -38,45 +46,46 @@ Note: Because of an update to underlying HTTP libraries, versions of this gem >
38
46
  timeout: 180
39
47
  open_timeout: 180
40
48
 
41
- See spec/config/ap.yml for an example.
42
- You will want to copy that file and change the following settings:
43
- 1. log_name
44
- 2. default_set
45
- 3. blacklist or whitelist if you are using them
46
49
 
47
- Update the dor-fetcher-client.yml file in the config directory with the location of the URL of the dor-fetcher-service provider. The defaulted value is the 3000 port for a localhost - dor_fetcher_service_url: http://127.0.0.1:3000
50
+ ==== Whitelist
51
+
52
+ Note: the whitelist is how you specify which objects to index. The whitelist
53
+ can be
54
+
55
+ * an Array of druids inline in the config yml file
56
+ * a filename containing a list of druids (one per line)
57
+
58
+ If a druid, per the object's identityMetadata at purl page, is for a
59
+
60
+ * collection record: then we process all the item druids in that collection (as if they were included individually in the whitelist)
61
+ * non-collection record: then we process the druid as an individual item
48
62
 
49
63
  === Override the Harvestdor::Indexer.index method
50
64
 
51
65
  In your code, override this method from the Harvestdor::Indexer class
52
66
 
53
- # create Solr doc for the druid and add it to Solr, unless it is on the blacklist.
54
- # NOTE: don't forget to send commit to Solr, either once at end (already in harvest_and_index), or for each add, or ...
55
- def index druid
56
- if blacklist.include?(druid)
57
- logger.info("Druid #{druid} is on the blacklist and will have no Solr doc created")
58
- else
59
- logger.error("You must override the index method to transform druids into Solr docs and add them to Solr")
60
-
61
- doc_hash = {}
62
- doc_hash[:id] = druid
63
- # doc_hash[:title_tsim] = smods_rec(druid).short_title
64
-
65
- # you might add things from Indexer level class here
66
- # (e.g. things that are the same across all documents in the harvest)
67
-
68
- solr_client.add(doc_hash)
69
-
70
- # logger.info("Just created Solr doc for #{druid}")
71
- # TODO: provide call to code to update DOR object's workflow datastream??
72
- end
73
- end
67
+ # create Solr doc for the druid and add it to Solr
68
+ # NOTE: don't forget to send commit to Solr, either once at end (already in harvest_and_index), or for each add, or ...
69
+ def index resource
70
+
71
+ benchmark "Indexing #{resource.druid}" do
72
+ logger.debug "About to index #{resource.druid}"
73
+ doc_hash = {}
74
+ doc_hash[:id] = resource.druid
75
+
76
+ # you might add things from Indexer level class here
77
+ # (e.g. things that are the same across all documents in the harvest)
78
+ solr.add doc_hash
79
+ # TODO: provide call to code to update DOR object's workflow datastream??
80
+ end
81
+ end
82
+
74
83
 
75
84
  === Run it
76
85
 
77
86
  (bundle install)
78
87
 
79
- I suggest you write a script to run the code. Your script might look like this:
88
+ You may want to write a script to run the code. Your script might look like this:
80
89
 
81
90
  #!/usr/bin/env ruby
82
91
  $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..'))
@@ -93,19 +102,14 @@ I suggest you write a script to run the code. Your script might look like this:
93
102
  puts "** You must provide the full path to a collection config yml file **"
94
103
  exit
95
104
  end
96
- if client_config_path.nil?
97
- puts "** You must provide the full path to dor-fetcher-client config yml file **"
98
- exit
99
- end
100
- indexer = Harvestdor::Indexer.new(config_yml_path, client_config_path, opts)
105
+ indexer = Harvestdor::Indexer.new(config_yml_path, opts)
101
106
  indexer.harvest_and_index
102
107
 
103
108
  Then you run the script like so:
104
109
 
105
110
  ./bin/indexer config/(your coll).yml
106
111
 
107
- I suggest you run your code on harvestdor-dev, as it is already set up to be able to harvest from the DorFetcher
108
-
112
+ Run from deployed instance, as that box is already set up to be able to talk to DOR Fetcher service and to SUL Solr indexes.
109
113
 
110
114
  == Contributing
111
115
 
data/Rakefile CHANGED
@@ -3,10 +3,6 @@ require "bundler/gem_tasks"
3
3
  require 'rake'
4
4
  require 'bundler'
5
5
 
6
- require 'rspec/core/rake_task'
7
- require 'yard'
8
- require 'yard/rake/yardoc_task'
9
-
10
6
  begin
11
7
  Bundler.setup(:default, :development)
12
8
  rescue Bundler::BundlerError => e
@@ -15,10 +11,12 @@ rescue Bundler::BundlerError => e
15
11
  exit e.status_code
16
12
  end
17
13
 
18
- task :default => :ci
14
+ task :default => [:rspec, :rubocop]
15
+
16
+ desc "run continuous integration suite (tests, coverage, docs)"
17
+ task :ci => [:rspec, :doc, :rubocop]
19
18
 
20
- desc "run continuous integration suite (tests, coverage, docs)"
21
- task :ci => [:rspec, :doc]
19
+ require 'rspec/core/rake_task'
22
20
 
23
21
  task :spec => :rspec
24
22
 
@@ -31,7 +29,12 @@ RSpec::Core::RakeTask.new(:rspec) do |spec|
31
29
  spec.rspec_opts = ["-c", "-f progress", "--tty", "-r ./spec/spec_helper.rb"]
32
30
  end
33
31
 
32
+ require 'rubocop/rake_task'
33
+ RuboCop::RakeTask.new(:rubocop)
34
+
34
35
  # Use yard to build docs
36
+ require 'yard'
37
+ require 'yard/rake/yardoc_task'
35
38
  begin
36
39
  project_root = File.expand_path(File.dirname(__FILE__))
37
40
  doc_dest_dir = File.join(project_root, 'doc')
@@ -46,5 +49,4 @@ rescue LoadError
46
49
  task :doc do
47
50
  abort "Please install the YARD gem to generate rdoc."
48
51
  end
49
- end
50
-
52
+ end
@@ -16,7 +16,7 @@ Gem::Specification.new do |gem|
16
16
  gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
17
  gem.test_files = gem.files.grep(%r{^spec/})
18
18
  gem.require_paths = ["lib"]
19
-
19
+
20
20
  gem.add_dependency 'rsolr'
21
21
  gem.add_dependency 'retries'
22
22
  gem.add_dependency 'harvestdor', '>=0.0.14'
@@ -24,13 +24,11 @@ Gem::Specification.new do |gem|
24
24
  gem.add_dependency 'dor-fetcher', '=1.0.5'
25
25
  gem.add_dependency "activesupport"
26
26
  gem.add_dependency "parallel"
27
-
27
+
28
28
  # Runtime dependencies
29
29
  gem.add_runtime_dependency 'confstruct'
30
30
 
31
31
  # Development dependencies
32
- # Bundler will install these gems too if you've checked out solrmarc-wrapper source from git and run 'bundle install'
33
- # It will not add these as dependencies if you require solrmarc-wrapper for other projects
34
32
  gem.add_development_dependency "rake"
35
33
  # docs
36
34
  gem.add_development_dependency "rdoc"
@@ -38,8 +36,9 @@ Gem::Specification.new do |gem|
38
36
  # tests
39
37
  gem.add_development_dependency 'rspec', "~> 3.0"
40
38
  gem.add_development_dependency 'coveralls'
41
- # gem.add_development_dependency 'ruby-debug19'
39
+ gem.add_development_dependency 'rubocop'
40
+ gem.add_development_dependency 'rubocop-rspec'
42
41
  gem.add_development_dependency 'vcr'
43
42
  gem.add_development_dependency 'webmock'
44
-
43
+
45
44
  end
@@ -5,13 +5,16 @@ module Harvestdor
5
5
  include ActiveSupport::Benchmarkable
6
6
 
7
7
  attr_reader :indexer, :druid, :options
8
-
9
- def initialize indexer, druid, options = {}
8
+
9
+ # @param [Harvestdor::Indexer] indexer an instance of Harvestdor::Indexer
10
+ # @param [String] coll_druid a collection druid of the form 'druid:oo123oo1234'
11
+ def initialize indexer, coll_druid, options = {}
10
12
  @indexer = indexer
11
- @druid = druid
13
+ @druid = coll_druid
12
14
  @options = options
13
15
  end
14
16
 
17
+ # @return [String] string of form oo123oo1234
15
18
  def bare_druid
16
19
  @bare_druid ||= druid.gsub("druid:", "")
17
20
  end
@@ -45,7 +48,7 @@ module Harvestdor
45
48
  ns_hash = {'rdf' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', 'fedora' => "info:fedora/fedora-system:def/relations-external#", '' => ''}
46
49
  is_member_of_nodes ||= public_xml.xpath('/publicObject/rdf:RDF/rdf:Description/fedora:isMemberOfCollection/@rdf:resource', ns_hash)
47
50
 
48
- is_member_of_nodes.reject { |n| n.value.empty? }.map do |n|
51
+ is_member_of_nodes.reject { |n| n.value.empty? }.map do |n|
49
52
  Harvestdor::Indexer::Resource.new(indexer, n.value.gsub("info:fedora/", ""))
50
53
  end
51
54
  end
@@ -59,18 +62,16 @@ module Harvestdor
59
62
  druids.map { |x| Harvestdor::Indexer::Resource.new(indexer, x) }
60
63
  end
61
64
  end
62
-
65
+
63
66
  # given a druid, get its objectLabel from its purl page identityMetadata
64
- # @param [String] druid, e.g. ab123cd4567
65
67
  # @return [String] the value of the <objectLabel> element in the identityMetadata for the object
66
68
  def identity_md_obj_label
67
69
  logger.error("#{druid} missing identityMetadata") unless identity_metadata
68
70
  identity_metadata.xpath('identityMetadata/objectLabel').text
69
71
  end
70
-
71
-
72
+
73
+
72
74
  # return the MODS for the druid as a Stanford::Mods::Record object
73
- # @param [String] druid e.g. ab123cd4567
74
75
  # @return [Stanford::Mods::Record] created from the MODS xml for the druid
75
76
  def smods_rec
76
77
  @smods_rec ||= benchmark "smods_rec(#{druid})", level: :debug do
@@ -85,9 +86,8 @@ module Harvestdor
85
86
  def mods
86
87
  @mods ||= harvestdor_client.mods bare_druid
87
88
  end
88
-
89
+
89
90
  # the public xml for this DOR object, from the purl page
90
- # @param [String] druid e.g. ab123cd4567
91
91
  # @return [Nokogiri::XML::Document] the public xml for the DOR object
92
92
  def public_xml
93
93
  @public_xml ||= benchmark "public_xml(#{druid})", level: :debug do
@@ -114,10 +114,8 @@ module Harvestdor
114
114
  bare_druid
115
115
  end
116
116
  end
117
-
117
+
118
118
  # the contentMetadata for this DOR object, ultimately from the purl public xml
119
- # @param [Object] object a String containing a druid (e.g. ab123cd4567), or
120
- # a Nokogiri::XML::Document containing the public_xml for an object
121
119
  # @return [Nokogiri::XML::Document] the contentMetadata for the DOR object
122
120
  def content_metadata
123
121
  ng_doc = benchmark "content_metadata (#{druid})", level: :debug do
@@ -126,10 +124,8 @@ module Harvestdor
126
124
  raise "No contentMetadata for \"#{druid}\"" if !ng_doc || ng_doc.children.empty?
127
125
  ng_doc
128
126
  end
129
-
127
+
130
128
  # the identityMetadata for this DOR object, ultimately from the purl public xml
131
- # @param [Object] object a String containing a druid (e.g. ab123cd4567), or
132
- # a Nokogiri::XML::Document containing the public_xml for an object
133
129
  # @return [Nokogiri::XML::Document] the identityMetadata for the DOR object
134
130
  def identity_metadata
135
131
  ng_doc = benchmark "identity_metadata (#{druid})", level: :debug do
@@ -138,10 +134,8 @@ module Harvestdor
138
134
  raise "No identityMetadata for \"#{druid}\"" if !ng_doc || ng_doc.children.empty?
139
135
  ng_doc
140
136
  end
141
-
137
+
142
138
  # the rightsMetadata for this DOR object, ultimately from the purl public xml
143
- # @param [Object] object a String containing a druid (e.g. ab123cd4567), or
144
- # a Nokogiri::XML::Document containing the public_xml for an object
145
139
  # @return [Nokogiri::XML::Document] the rightsMetadata for the DOR object
146
140
  def rights_metadata
147
141
  ng_doc = benchmark "rights_metadata (#{druid})", level: :debug do
@@ -150,10 +144,8 @@ module Harvestdor
150
144
  raise "No rightsMetadata for \"#{druid}\"" if !ng_doc || ng_doc.children.empty?
151
145
  ng_doc
152
146
  end
153
-
147
+
154
148
  # the RDF for this DOR object, ultimately from the purl public xml
155
- # @param [Object] object a String containing a druid (e.g. ab123cd4567), or
156
- # a Nokogiri::XML::Document containing the public_xml for an object
157
149
  # @return [Nokogiri::XML::Document] the RDF for the DOR object
158
150
  def rdf
159
151
  ng_doc = benchmark "rdf (#{druid})", level: :debug do