gdor-indexer 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5e1bc32ca2df58b2018dbc9f27aa2ce897cde0e4
4
- data.tar.gz: c9a3378b959896f6007a7bdbb187069102a7897c
3
+ metadata.gz: 64f3819be05778daa18edefbef0db3c415d8a7af
4
+ data.tar.gz: 379426de04d4160b4f2334a54d3308fc8d798edd
5
5
  SHA512:
6
- metadata.gz: c401ef958ff8bd02ce867cc066cbf9f40d6127bbc4d4916a79e08d158d694ea2ad4c27e633e2a0f27b8dccf5c623259b4f23df779692d0551f58979040ff0cf6
7
- data.tar.gz: f0a797d7d3df7bfbdd4daa75d19a215682eb08ca885c437341a7c02b02e6bfca0b35fdc165d9116cbfb0f9b6eec1cd89de84501a724d1c5b53ca0d4969bc98c7
6
+ metadata.gz: d42cbf9c5d8ee8b9a2038b2ea77d0b51dc89f0fe7633ab2a6b5b17039fd25544c8e6397ae8c6e29b648cc5ae5ba3580bbe2f17ef9f708dc1134dc3414e13f8f6
7
+ data.tar.gz: 4c44ff23c66af8336d00ef80b747dc28a833a9bc1bbf564c0fd048c0cd4b33f022c7ef4338015f5e1a9181f107ae490eb821afd125f6c6eaa6551d6f8630880e
data/.rubocop.yml CHANGED
@@ -1,3 +1,8 @@
1
1
  require: rubocop-rspec
2
2
 
3
3
  inherit_from: .rubocop_todo.yml
4
+
5
+ Style/StringLiterals:
6
+ Enabled: true
7
+ EnforcedStyle: single_quotes
8
+
data/.rubocop_todo.yml CHANGED
@@ -1,6 +1,6 @@
1
1
  # This configuration was generated by
2
2
  # `rubocop --auto-gen-config`
3
- # on 2015-10-26 16:04:23 -0700 using RuboCop version 0.34.2.
3
+ # on 2015-10-28 13:42:33 -0700 using RuboCop version 0.34.2.
4
4
  # The point is for the user to remove these configuration records
5
5
  # one by one as the offenses are removed from the code base.
6
6
  # Note that changes in the inspected code, or installation of new
@@ -37,9 +37,9 @@ Metrics/AbcSize:
37
37
  # Offense count: 1
38
38
  # Configuration parameters: CountComments.
39
39
  Metrics/ClassLength:
40
- Max: 233
40
+ Max: 242
41
41
 
42
- # Offense count: 4
42
+ # Offense count: 5
43
43
  Metrics/CyclomaticComplexity:
44
44
  Max: 9
45
45
 
@@ -67,7 +67,7 @@ RSpec/FilePath:
67
67
  - 'spec/unit/solr_doc_builder_spec.rb'
68
68
  - 'spec/unit/solr_doc_hash_spec.rb'
69
69
 
70
- # Offense count: 356
70
+ # Offense count: 359
71
71
  RSpec/InstanceVariable:
72
72
  Exclude:
73
73
  - 'spec/unit/gdor_mods_fields_spec.rb'
data/Gemfile CHANGED
@@ -2,11 +2,3 @@ source 'https://rubygems.org'
2
2
 
3
3
  # Specify your gem's dependencies in spotlight-dor-resources.gemspec
4
4
  gemspec
5
-
6
- group :deployment do
7
- gem 'capistrano', '~> 3.2'
8
- gem 'capistrano-bundler'
9
- gem 'capistrano-rvm' # gdor-indexer used to need jruby for merged records
10
- gem 'lyberteam-capistrano-devel'
11
- gem 'rainbow' # for color output
12
- end
data/gdor-indexer.gemspec CHANGED
@@ -40,4 +40,5 @@ Gem::Specification.new do |spec|
40
40
  spec.add_development_dependency 'vcr'
41
41
  spec.add_development_dependency 'jettywrapper'
42
42
  spec.add_development_dependency 'webmock'
43
+ spec.add_development_dependency 'pry-byebug'
43
44
  end
data/lib/gdor/indexer.rb CHANGED
@@ -17,7 +17,7 @@ module GDor
17
17
  class Indexer
18
18
  include Hooks
19
19
 
20
- define_hooks :before_index, :before_merge
20
+ define_hooks :before_index
21
21
 
22
22
  # local files
23
23
  require 'gdor/indexer/version'
@@ -53,6 +53,15 @@ module GDor
53
53
  end
54
54
 
55
55
  def logger
56
+ config_level =
57
+ case config.log_level
58
+ when 'debug' then Logger::DEBUG
59
+ when 'info' then Logger::INFO
60
+ when 'warn' then Logger::WARN
61
+ when 'error' then Logger::ERROR
62
+ when 'fatal' then Logger::FATAL
63
+ end
64
+ harvestdor.logger.level = config_level ? config_level : Logger::INFO
56
65
  harvestdor.logger
57
66
  end
58
67
 
@@ -160,7 +169,7 @@ module GDor
160
169
  building_facet: 'Stanford Digital Repository' # INDEX-53 add building_facet = Stanford Digital Repository here for collection
161
170
  )
162
171
 
163
- logger.info "Indexing collection object #{resource.druid} (unmerged)"
172
+ logger.info "Indexing collection object #{resource.druid}"
164
173
  doc_hash = coll_sdb.doc_hash
165
174
  doc_hash.combine fields_to_add
166
175
  validation_messages = doc_hash.validate_collection(config)
@@ -267,7 +276,7 @@ module GDor
267
276
 
268
277
  if @druids_failed_to_ix.size > 0
269
278
  body += "\n"
270
- body += "records that may have failed to index (merged recs as druids, not ckeys): \n"
279
+ body += "records that may have failed to index: \n"
271
280
  body += @druids_failed_to_ix.join("\n") + "\n"
272
281
  end
273
282
 
@@ -1,5 +1,5 @@
1
1
  module GDor
2
2
  class Indexer
3
- VERSION = '0.1.0'
3
+ VERSION = '0.2.0'
4
4
  end
5
5
  end
@@ -42,3 +42,7 @@ solr:
42
42
  read_timeout: 60
43
43
  open_timeout: 60
44
44
  max_retries: 10
45
+
46
+ # the severity level of messages to be logged. Valid values are debug, info, warn, error, fatal
47
+ # default: debug
48
+ log_level: info
@@ -7,15 +7,16 @@ describe GDor::Indexer::ModsFields do
7
7
  @mods_xml = "<mods #{@ns_decl}><note>gdor_mods_fields testing</note></mods>"
8
8
  end
9
9
 
10
- let :logger do
11
- Logger.new StringIO.new
12
- end
13
-
14
10
  def sdb_for_mods(m)
15
11
  resource = Harvestdor::Indexer::Resource.new(double, @fake_druid)
16
12
  allow(resource).to receive(:public_xml).and_return(nil)
17
13
  allow(resource).to receive(:mods).and_return(Nokogiri::XML(m))
18
- GDor::Indexer::SolrDocBuilder.new(resource, logger)
14
+ i = Harvestdor::Indexer.new
15
+ i.logger.level = Logger::WARN
16
+ allow(resource).to receive(:indexer).and_return(i)
17
+ lgr = Logger.new(StringIO.new)
18
+ lgr.level = Logger::WARN
19
+ GDor::Indexer::SolrDocBuilder.new(resource, lgr)
19
20
  end
20
21
 
21
22
  context 'doc_hash_from_mods' do
@@ -28,6 +28,9 @@ describe GDor::Indexer do
28
28
  allow(r).to receive(:public_xml?).and_return true
29
29
  allow(r).to receive(:content_metadata).and_return nil
30
30
  allow(r).to receive(:collection?).and_return false
31
+ i = Harvestdor::Indexer.new
32
+ i.logger.level = Logger::WARN
33
+ allow(r).to receive(:indexer).and_return i
31
34
  r
32
35
  end
33
36
 
@@ -40,6 +43,9 @@ describe GDor::Indexer do
40
43
  allow(r).to receive(:content_metadata).and_return nil
41
44
  allow(r).to receive(:identity_md_obj_label).and_return ''
42
45
  allow(r).to receive(:collection?).and_return true
46
+ i = Harvestdor::Indexer.new
47
+ i.logger.level = Logger::WARN
48
+ allow(r).to receive(:indexer).and_return i
43
49
  r
44
50
  end
45
51
 
@@ -48,6 +54,19 @@ describe GDor::Indexer do
48
54
  @indexer.logger.info('walters_integration_spec logging test message')
49
55
  expect(File).to exist(File.join(@yaml['harvestdor']['log_dir'], @yaml['harvestdor']['log_name']))
50
56
  end
57
+ it 'logger level defaults to INFO' do
58
+ expect(@indexer.logger.level).to eq Logger::INFO
59
+ end
60
+ it 'logger level can be specified in config field' do
61
+ indexer = described_class.new(@config_yml_path) do |config|
62
+ config.log_level = 'debug'
63
+ end
64
+ expect(indexer.logger.level).to eq Logger::DEBUG
65
+ indexer = described_class.new(@config_yml_path) do |config|
66
+ config.log_level = 'warn'
67
+ end
68
+ expect(indexer.logger.level).to eq Logger::WARN
69
+ end
51
70
  end
52
71
 
53
72
  describe '#harvest_and_index' do
@@ -74,7 +93,9 @@ describe GDor::Indexer do
74
93
  end
75
94
 
76
95
  def logger
77
- Logger.new(STDERR)
96
+ lgr = Logger.new(StringIO.new)
97
+ lgr.level = Logger::WARN
98
+ lgr
78
99
  end
79
100
  end.new(collection, resource))
80
101
 
@@ -115,120 +136,116 @@ describe GDor::Indexer do
115
136
  end
116
137
 
117
138
  context '#item_solr_document' do
118
- context 'unmerged' do
119
- it 'calls Harvestdor::Indexer.solr_add' do
120
- doc_hash = @indexer.item_solr_document(resource)
121
- expect(doc_hash).to include id: @fake_druid
122
- end
123
- it 'calls validate_item' do
124
- expect_any_instance_of(GDor::Indexer::SolrDocHash).to receive(:validate_item).and_return([])
125
- @indexer.item_solr_document resource
126
- end
127
- it 'calls GDor::Indexer::SolrDocBuilder.validate_mods' do
128
- allow_any_instance_of(GDor::Indexer::SolrDocHash).to receive(:validate_item).and_return([])
129
- expect_any_instance_of(GDor::Indexer::SolrDocHash).to receive(:validate_mods).and_return([])
130
- @indexer.item_solr_document resource
131
- end
132
- it 'calls add_coll_info' do
133
- expect(@indexer).to receive(:add_coll_info)
134
- @indexer.item_solr_document resource
135
- end
136
- it 'has fields populated from the collection record' do
137
- sdb = double
138
- allow(sdb).to receive(:doc_hash).and_return(GDor::Indexer::SolrDocHash.new)
139
- allow(sdb).to receive(:display_type)
140
- allow(sdb).to receive(:file_ids)
141
- allow(sdb.doc_hash).to receive(:validate_mods).and_return([])
142
- allow(GDor::Indexer::SolrDocBuilder).to receive(:new).and_return(sdb)
143
- allow(resource).to receive(:collections).and_return([double(druid: 'foo', bare_druid: 'foo', identity_md_obj_label: 'bar')])
144
- doc_hash = @indexer.item_solr_document resource
145
- expect(doc_hash).to include druid: @fake_druid, collection: ['foo'], collection_with_title: ['foo-|-bar']
146
- end
147
- it 'has fields populated from the MODS' do
148
- title = 'fake title in mods'
149
- ng_mods = Nokogiri::XML("<mods #{@ns_decl}><titleInfo><title>#{title}</title></titleInfo></mods>")
150
- allow(resource).to receive(:mods).and_return(ng_mods)
151
- doc_hash = @indexer.item_solr_document resource
152
- expect(doc_hash).to include id: @fake_druid, title_display: title
153
- end
154
- it 'populates url_fulltext field with purl page url' do
155
- doc_hash = @indexer.item_solr_document resource
156
- expect(doc_hash).to include id: @fake_druid, url_fulltext: "#{@yaml['harvestdor']['purl']}/#{@fake_druid}"
157
- end
158
- it 'populates druid and access_facet fields' do
159
- doc_hash = @indexer.item_solr_document resource
160
- expect(doc_hash).to include id: @fake_druid, druid: @fake_druid, access_facet: 'Online'
161
- end
162
- it 'populates display_type field by calling display_type method' do
163
- expect_any_instance_of(GDor::Indexer::SolrDocBuilder).to receive(:display_type).and_return('foo')
164
- doc_hash = @indexer.item_solr_document resource
165
- expect(doc_hash).to include id: @fake_druid, display_type: 'foo'
166
- end
167
- it 'populates file_id field by calling file_ids method' do
168
- expect_any_instance_of(GDor::Indexer::SolrDocBuilder).to receive(:file_ids).at_least(1).times.and_return(['foo'])
169
- doc_hash = @indexer.item_solr_document resource
170
- expect(doc_hash).to include id: @fake_druid, file_id: ['foo']
171
- end
172
- it 'populates building_facet field with Stanford Digital Repository' do
173
- doc_hash = @indexer.item_solr_document resource
174
- expect(doc_hash).to include id: @fake_druid, building_facet: 'Stanford Digital Repository'
175
- end
176
- end # unmerged item
139
+ it 'calls Harvestdor::Indexer.solr_add' do
140
+ doc_hash = @indexer.item_solr_document(resource)
141
+ expect(doc_hash).to include id: @fake_druid
142
+ end
143
+ it 'calls validate_item' do
144
+ expect_any_instance_of(GDor::Indexer::SolrDocHash).to receive(:validate_item).and_return([])
145
+ @indexer.item_solr_document resource
146
+ end
147
+ it 'calls GDor::Indexer::SolrDocBuilder.validate_mods' do
148
+ allow_any_instance_of(GDor::Indexer::SolrDocHash).to receive(:validate_item).and_return([])
149
+ expect_any_instance_of(GDor::Indexer::SolrDocHash).to receive(:validate_mods).and_return([])
150
+ @indexer.item_solr_document resource
151
+ end
152
+ it 'calls add_coll_info' do
153
+ expect(@indexer).to receive(:add_coll_info)
154
+ @indexer.item_solr_document resource
155
+ end
156
+ it 'has fields populated from the collection record' do
157
+ sdb = double
158
+ allow(sdb).to receive(:doc_hash).and_return(GDor::Indexer::SolrDocHash.new)
159
+ allow(sdb).to receive(:display_type)
160
+ allow(sdb).to receive(:file_ids)
161
+ allow(sdb.doc_hash).to receive(:validate_mods).and_return([])
162
+ allow(GDor::Indexer::SolrDocBuilder).to receive(:new).and_return(sdb)
163
+ allow(resource).to receive(:collections).and_return([double(druid: 'foo', bare_druid: 'foo', identity_md_obj_label: 'bar')])
164
+ doc_hash = @indexer.item_solr_document resource
165
+ expect(doc_hash).to include druid: @fake_druid, collection: ['foo'], collection_with_title: ['foo-|-bar']
166
+ end
167
+ it 'has fields populated from the MODS' do
168
+ title = 'fake title in mods'
169
+ ng_mods = Nokogiri::XML("<mods #{@ns_decl}><titleInfo><title>#{title}</title></titleInfo></mods>")
170
+ allow(resource).to receive(:mods).and_return(ng_mods)
171
+ doc_hash = @indexer.item_solr_document resource
172
+ expect(doc_hash).to include id: @fake_druid, title_display: title
173
+ end
174
+ it 'populates url_fulltext field with purl page url' do
175
+ doc_hash = @indexer.item_solr_document resource
176
+ expect(doc_hash).to include id: @fake_druid, url_fulltext: "#{@yaml['harvestdor']['purl']}/#{@fake_druid}"
177
+ end
178
+ it 'populates druid and access_facet fields' do
179
+ doc_hash = @indexer.item_solr_document resource
180
+ expect(doc_hash).to include id: @fake_druid, druid: @fake_druid, access_facet: 'Online'
181
+ end
182
+ it 'populates display_type field by calling display_type method' do
183
+ expect_any_instance_of(GDor::Indexer::SolrDocBuilder).to receive(:display_type).and_return('foo')
184
+ doc_hash = @indexer.item_solr_document resource
185
+ expect(doc_hash).to include id: @fake_druid, display_type: 'foo'
186
+ end
187
+ it 'populates file_id field by calling file_ids method' do
188
+ expect_any_instance_of(GDor::Indexer::SolrDocBuilder).to receive(:file_ids).at_least(1).times.and_return(['foo'])
189
+ doc_hash = @indexer.item_solr_document resource
190
+ expect(doc_hash).to include id: @fake_druid, file_id: ['foo']
191
+ end
192
+ it 'populates building_facet field with Stanford Digital Repository' do
193
+ doc_hash = @indexer.item_solr_document resource
194
+ expect(doc_hash).to include id: @fake_druid, building_facet: 'Stanford Digital Repository'
195
+ end
177
196
  end # item_solr_document
178
197
 
179
198
  context '#collection_solr_document' do
180
- context 'unmerged' do
181
- it 'calls validate_collection' do
182
- doc_hash = GDor::Indexer::SolrDocHash.new
183
- allow_any_instance_of(GDor::Indexer::SolrDocBuilder).to receive(:doc_hash).and_return(doc_hash) # speed up the test
184
- expect(doc_hash).to receive(:validate_collection).and_return([])
185
- doc_hash = @indexer.collection_solr_document collection
186
- end
187
- it 'calls GDor::Indexer::SolrDocBuilder.validate_mods' do
188
- doc_hash = GDor::Indexer::SolrDocHash.new
189
- allow_any_instance_of(GDor::Indexer::SolrDocBuilder).to receive(:doc_hash).and_return(doc_hash) # speed up the test
190
- expect(doc_hash).to receive(:validate_mods).and_return([])
191
- doc_hash = @indexer.collection_solr_document collection
192
- end
193
- it 'populates druid and access_facet fields' do
194
- doc_hash = @indexer.collection_solr_document collection
195
- expect(doc_hash).to include druid: @coll_druid_from_test_config, access_facet: 'Online'
196
- end
197
- it 'populates url_fulltext field with purl page url' do
199
+ it 'calls validate_collection' do
200
+ doc_hash = GDor::Indexer::SolrDocHash.new
201
+ allow_any_instance_of(GDor::Indexer::SolrDocBuilder).to receive(:doc_hash).and_return(doc_hash) # speed up the test
202
+ expect(doc_hash).to receive(:validate_collection).and_return([])
203
+ doc_hash = @indexer.collection_solr_document collection
204
+ end
205
+ it 'calls GDor::Indexer::SolrDocBuilder.validate_mods' do
206
+ doc_hash = GDor::Indexer::SolrDocHash.new
207
+ allow_any_instance_of(GDor::Indexer::SolrDocBuilder).to receive(:doc_hash).and_return(doc_hash) # speed up the test
208
+ expect(doc_hash).to receive(:validate_mods).and_return([])
209
+ doc_hash = @indexer.collection_solr_document collection
210
+ end
211
+ it 'populates druid and access_facet fields' do
212
+ doc_hash = @indexer.collection_solr_document collection
213
+ expect(doc_hash).to include druid: @coll_druid_from_test_config, access_facet: 'Online'
214
+ end
215
+ it 'populates url_fulltext field with purl page url' do
216
+ doc_hash = @indexer.collection_solr_document collection
217
+ expect(doc_hash).to include url_fulltext: "#{@yaml['harvestdor']['purl']}/#{@coll_druid_from_test_config}"
218
+ end
219
+ it "collection_type should be 'Digital Collection'" do
220
+ allow_any_instance_of(GDor::Indexer::SolrDocBuilder).to receive(:doc_hash).and_return(GDor::Indexer::SolrDocHash.new) # speed up the test
221
+
222
+ doc_hash = @indexer.collection_solr_document collection
223
+ expect(doc_hash).to include collection_type: 'Digital Collection'
224
+ end
225
+ context 'add format_main_ssim Archive/Manuscript' do
226
+ it 'no other values' do
227
+ allow_any_instance_of(GDor::Indexer::SolrDocBuilder).to receive(:doc_hash).and_return(GDor::Indexer::SolrDocHash.new)
228
+
198
229
  doc_hash = @indexer.collection_solr_document collection
199
- expect(doc_hash).to include url_fulltext: "#{@yaml['harvestdor']['purl']}/#{@coll_druid_from_test_config}"
230
+ expect(doc_hash).to include format_main_ssim: 'Archive/Manuscript'
200
231
  end
201
- it "collection_type should be 'Digital Collection'" do
202
- allow_any_instance_of(GDor::Indexer::SolrDocBuilder).to receive(:doc_hash).and_return(GDor::Indexer::SolrDocHash.new) # speed up the test
232
+ it 'other values present' do
233
+ allow_any_instance_of(GDor::Indexer::SolrDocBuilder).to receive(:doc_hash).and_return(GDor::Indexer::SolrDocHash.new({ format_main_ssim: %w(Image Video) }))
203
234
 
204
235
  doc_hash = @indexer.collection_solr_document collection
205
- expect(doc_hash).to include collection_type: 'Digital Collection'
236
+ expect(doc_hash).to include format_main_ssim: ['Image', 'Video', 'Archive/Manuscript']
206
237
  end
207
- context 'add format_main_ssim Archive/Manuscript' do
208
- it 'no other values' do
209
- allow_any_instance_of(GDor::Indexer::SolrDocBuilder).to receive(:doc_hash).and_return(GDor::Indexer::SolrDocHash.new)
238
+ it 'already has values Archive/Manuscript' do
239
+ allow_any_instance_of(GDor::Indexer::SolrDocBuilder).to receive(:doc_hash).and_return(GDor::Indexer::SolrDocHash.new({ format_main_ssim: 'Archive/Manuscript' }))
210
240
 
211
- doc_hash = @indexer.collection_solr_document collection
212
- expect(doc_hash).to include format_main_ssim: 'Archive/Manuscript'
213
- end
214
- it 'other values present' do
215
- allow_any_instance_of(GDor::Indexer::SolrDocBuilder).to receive(:doc_hash).and_return(GDor::Indexer::SolrDocHash.new({ format_main_ssim: %w(Image Video) }))
216
-
217
- doc_hash = @indexer.collection_solr_document collection
218
- expect(doc_hash).to include format_main_ssim: ['Image', 'Video', 'Archive/Manuscript']
219
- end
220
- it 'already has values Archive/Manuscript' do
221
- allow_any_instance_of(GDor::Indexer::SolrDocBuilder).to receive(:doc_hash).and_return(GDor::Indexer::SolrDocHash.new({ format_main_ssim: 'Archive/Manuscript' }))
222
-
223
- doc_hash = @indexer.collection_solr_document collection
224
- expect(doc_hash).to include format_main_ssim: ['Archive/Manuscript']
225
- end
226
- end
227
- it 'populates building_facet field with Stanford Digital Repository' do
228
241
  doc_hash = @indexer.collection_solr_document collection
229
- expect(doc_hash).to include building_facet: 'Stanford Digital Repository'
242
+ expect(doc_hash).to include format_main_ssim: ['Archive/Manuscript']
230
243
  end
231
- end # unmerged collection
244
+ end
245
+ it 'populates building_facet field with Stanford Digital Repository' do
246
+ doc_hash = @indexer.collection_solr_document collection
247
+ expect(doc_hash).to include building_facet: 'Stanford Digital Repository'
248
+ end
232
249
  end # index_coll_obj_per_config
233
250
 
234
251
  context '#add_coll_info and supporting methods' do
@@ -273,7 +290,6 @@ describe GDor::Indexer do
273
290
  @indexer.add_coll_info(doc_hash, [double(druid: coll_druid1, bare_druid: coll_druid1, public_xml: @ng_pub_xml, identity_md_obj_label: 'foo'), double(druid: coll_druid2, bare_druid: coll_druid2, public_xml: @ng_pub_xml, identity_md_obj_label: 'bar')])
274
291
  expect(doc_hash[:collection_with_title]).to match_array ["#{coll_druid1}-|-foo", "#{coll_druid2}-|-bar"]
275
292
  end
276
- # other tests show it uses druid when coll rec isn't merged
277
293
  end
278
294
 
279
295
  context '#coll_display_types_from_items' do
@@ -301,14 +317,14 @@ describe GDor::Indexer do
301
317
 
302
318
  context '#num_found_in_solr' do
303
319
  before :each do
304
- @unmerged_collection_response = { 'response' => { 'numFound' => '1', 'docs' => [{ 'id' => 'dm212rn7381', 'url_fulltext' => ['https://purl.stanford.edu/dm212rn7381'] }] } }
320
+ @collection_response = { 'response' => { 'numFound' => '1', 'docs' => [{ 'id' => 'dm212rn7381', 'url_fulltext' => ['https://purl.stanford.edu/dm212rn7381'] }] } }
305
321
  @item_response = { 'response' => { 'numFound' => '265', 'docs' => [{ 'id' => 'dm212rn7381' }] } }
306
322
  end
307
323
 
308
324
  it 'counts the items and the collection object in the solr index after indexing' do
309
325
  allow(@indexer.solr_client.client).to receive(:get) do |_wt, params|
310
326
  if params[:params][:fq].include?('id:"dm212rn7381"')
311
- @unmerged_collection_response
327
+ @collection_response
312
328
  else
313
329
  @item_response
314
330
  end
@@ -340,7 +356,7 @@ describe GDor::Indexer do
340
356
 
341
357
  it 'email body includes failed to index druids' do
342
358
  @indexer.instance_variable_set(:@druids_failed_to_ix, %w(a b))
343
- expect(subject).to match /records that may have failed to index \(merged recs as druids, not ckeys\): \na\nb\n\n/
359
+ expect(subject).to match /records that may have failed to index: \na\nb\n\n/
344
360
  end
345
361
 
346
362
  it 'email body include validation messages' do
@@ -8,9 +8,7 @@ describe GDor::Indexer::PublicXmlFields do
8
8
  @empty_pub_xml = "<publicObject id='druid:#{@fake_druid}'></publicObject>"
9
9
  end
10
10
 
11
- let :logger do
12
- Logger.new(StringIO.new)
13
- end
11
+ let(:logger) { Logger.new(StringIO.new) }
14
12
 
15
13
  def sdb_for_pub_xml(m)
16
14
  resource = Harvestdor::Indexer::Resource.new(double, @fake_druid)
@@ -9,13 +9,18 @@ describe GDor::Indexer::SolrDocBuilder do
9
9
  end
10
10
 
11
11
  let :logger do
12
- Logger.new(StringIO.new)
12
+ lgr = Logger.new(StringIO.new)
13
+ lgr.level = Logger::WARN
14
+ lgr
13
15
  end
14
16
 
15
17
  def sdb_for_data(mods, pub_xml)
16
18
  resource = Harvestdor::Indexer::Resource.new(double, @fake_druid)
17
19
  allow(resource).to receive(:mods).and_return(Nokogiri::XML(mods))
18
20
  allow(resource).to receive(:public_xml).and_return(Nokogiri::XML(pub_xml))
21
+ i = Harvestdor::Indexer.new
22
+ i.logger.level = Logger::WARN
23
+ allow(resource).to receive(:indexer).and_return i
19
24
  GDor::Indexer::SolrDocBuilder.new(resource, logger)
20
25
  end
21
26
 
@@ -34,7 +39,7 @@ describe GDor::Indexer::SolrDocBuilder do
34
39
  before(:each) do
35
40
  @doc_hash = doc_hash
36
41
  end
37
- it 'id field should be set to druid for non-merged record' do
42
+ it 'id field should be set to druid' do
38
43
  expect(@doc_hash[:id]).to eq(@fake_druid)
39
44
  end
40
45
  it 'does not have the gdor fields set in indexer.rb' do
@@ -44,7 +49,7 @@ describe GDor::Indexer::SolrDocBuilder do
44
49
  expect(@doc_hash).to_not have_key(:display_type)
45
50
  expect(@doc_hash).to_not have_key(:file_id)
46
51
  end
47
- it 'has the full MODS in the modsxml field for non-merged record' do
52
+ it 'has the full MODS in the modsxml field' do
48
53
  expect(@doc_hash[:modsxml]).to be_equivalent_to @mods_xml
49
54
  end
50
55
  end # doc hash
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gdor-indexer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naomi Dushay
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2015-10-26 00:00:00.000000000 Z
13
+ date: 2015-10-28 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: harvestdor-indexer
@@ -334,6 +334,20 @@ dependencies:
334
334
  - - ">="
335
335
  - !ruby/object:Gem::Version
336
336
  version: '0'
337
+ - !ruby/object:Gem::Dependency
338
+ name: pry-byebug
339
+ requirement: !ruby/object:Gem::Requirement
340
+ requirements:
341
+ - - ">="
342
+ - !ruby/object:Gem::Version
343
+ version: '0'
344
+ type: :development
345
+ prerelease: false
346
+ version_requirements: !ruby/object:Gem::Requirement
347
+ requirements:
348
+ - - ">="
349
+ - !ruby/object:Gem::Version
350
+ version: '0'
337
351
  description:
338
352
  email:
339
353
  - cabeer@stanford.edu
@@ -347,18 +361,12 @@ files:
347
361
  - ".rubocop.yml"
348
362
  - ".rubocop_todo.yml"
349
363
  - ".yardopts"
350
- - Capfile
351
364
  - Gemfile
352
365
  - LICENSE.txt
353
366
  - README.md
354
367
  - Rakefile
355
368
  - VERSION
356
369
  - bin/indexer
357
- - config/deploy.rb
358
- - config/deploy/dev.rb
359
- - config/deploy/fetcher.rb
360
- - config/deploy/prod.rb
361
- - config/deploy/stage.rb
362
370
  - gdor-indexer.gemspec
363
371
  - lib/gdor/indexer.rb
364
372
  - lib/gdor/indexer/mods_fields.rb
data/Capfile DELETED
@@ -1,26 +0,0 @@
1
- # Load DSL and Setup Up Stages
2
- require 'capistrano/setup'
3
-
4
- # Includes default deployment tasks
5
- require 'capistrano/deploy'
6
-
7
- # Includes tasks from other gems included in your Gemfile
8
- #
9
- # For documentation on these, see for example:
10
- #
11
- # https://github.com/capistrano/rvm
12
- # https://github.com/capistrano/rbenv
13
- # https://github.com/capistrano/chruby
14
- # https://github.com/capistrano/bundler
15
- # https://github.com/capistrano/rails
16
- #
17
- # require 'capistrano/rbenv'
18
- # require 'capistrano/chruby'
19
- require 'capistrano/bundler'
20
- # require 'capistrano/rails'
21
- require 'capistrano/rvm' # gdor-indexer needs jruby until merge-manager
22
-
23
- require 'dlss/capistrano'
24
-
25
- # Loads custom tasks from `lib/capistrano/tasks' if you have any defined.
26
- Dir.glob('lib/capistrano/tasks/*.cap').each { |r| import r }
data/config/deploy.rb DELETED
@@ -1,31 +0,0 @@
1
- set :application, 'gdor-indexer'
2
- set :repo_url, 'https://github.com/sul-dlss/gdor-indexer.git'
3
-
4
- # Default branch is :master
5
- # ask :branch, proc { `git rev-parse --abbrev-ref HEAD`.chomp }
6
-
7
- # gdor-indexer needs jruby until merge-manager
8
- set :rvm_ruby_version, 'jruby-1.7.10'
9
-
10
- set :user, 'lyberadmin'
11
- set :deploy_to, "/home/#{fetch(:user)}/#{fetch(:application)}"
12
-
13
- set :linked_dirs, %w(logs config/collections tmp solrmarc-sw)
14
- set :linked_files, %w(.ruby-version config/solr.yml bin/index-prod-image.sh bin/index-prod-hydrus.sh config/dor-fetcher-client.yml)
15
-
16
- set :stages, %w(dev stage prod fetcher)
17
-
18
- # Default value for :log_level is :debug
19
- set :log_level, :info
20
-
21
- # Default value for :format is :pretty
22
- # set :format, :pretty
23
-
24
- # Default value for :pty is false
25
- # set :pty, true
26
-
27
- # Default value for default_env is {}
28
- # set :default_env, { path: "/opt/ruby/bin:$PATH" }
29
-
30
- # Default value for keep_releases is 5
31
- set :keep_releases, 10
data/config/deploy/dev.rb DELETED
@@ -1,41 +0,0 @@
1
- # Simple Role Syntax
2
- # ==================
3
- # Supports bulk-adding hosts to roles, the primary
4
- # server in each group is considered to be the first
5
- # unless any hosts have the primary property set.
6
- # Don't declare `role :all`, it's a meta role
7
- # role :app, %w{deploy@example.com}
8
- # role :web, %w{deploy@example.com}
9
- # role :db, %w{deploy@example.com}
10
-
11
- # Extended Server Syntax
12
- # ======================
13
- # This can be used to drop a more detailed server
14
- # definition into the server list. The second argument
15
- # something that quacks like a hash can be used to set
16
- # extended properties on the server.
17
- server 'harvestdor-dev.stanford.edu', user: 'lyberadmin', roles: %w(web app db)
18
-
19
- Capistrano::OneTimeKey.generate_one_time_key!
20
-
21
- # you can set custom ssh options
22
- # it's possible to pass any option but you need to keep in mind that net/ssh understand limited list of options
23
- # you can see them in [net/ssh documentation](http://net-ssh.github.io/net-ssh/classes/Net/SSH.html#method-c-start)
24
- # set it globally
25
- # set :ssh_options, {
26
- # keys: %w(/home/rlisowski/.ssh/id_rsa),
27
- # forward_agent: false,
28
- # auth_methods: %w(password)
29
- # }
30
- # and/or per server
31
- # server 'example.com',
32
- # user: 'user_name',
33
- # roles: %w{web app},
34
- # ssh_options: {
35
- # user: 'user_name', # overrides user setting above
36
- # keys: %w(/home/user_name/.ssh/id_rsa),
37
- # forward_agent: false,
38
- # auth_methods: %w(publickey password)
39
- # password: 'please use keys'
40
- # }
41
- # setting per server overrides global ssh_options
@@ -1,6 +0,0 @@
1
- # Temporary deployment target for DorFetcher work
2
- server 'harvestdor-dev.stanford.edu', user: 'lyberadmin', roles: %w(app)
3
-
4
- Capistrano::OneTimeKey.generate_one_time_key!
5
-
6
- set :deploy_to, "/home/#{fetch(:user)}/gdor-indexer-fetcher"
@@ -1,41 +0,0 @@
1
- # Simple Role Syntax
2
- # ==================
3
- # Supports bulk-adding hosts to roles, the primary
4
- # server in each group is considered to be the first
5
- # unless any hosts have the primary property set.
6
- # Don't declare `role :all`, it's a meta role
7
- # role :app, %w{deploy@example.com}
8
- # role :web, %w{deploy@example.com}
9
- # role :db, %w{deploy@example.com}
10
-
11
- # Extended Server Syntax
12
- # ======================
13
- # This can be used to drop a more detailed server
14
- # definition into the server list. The second argument
15
- # something that quacks like a hash can be used to set
16
- # extended properties on the server.
17
- server 'harvestdor-prod.stanford.edu', user: 'lyberadmin', roles: %w(web app db)
18
-
19
- Capistrano::OneTimeKey.generate_one_time_key!
20
-
21
- # you can set custom ssh options
22
- # it's possible to pass any option but you need to keep in mind that net/ssh understand limited list of options
23
- # you can see them in [net/ssh documentation](http://net-ssh.github.io/net-ssh/classes/Net/SSH.html#method-c-start)
24
- # set it globally
25
- # set :ssh_options, {
26
- # keys: %w(/home/rlisowski/.ssh/id_rsa),
27
- # forward_agent: false,
28
- # auth_methods: %w(password)
29
- # }
30
- # and/or per server
31
- # server 'example.com',
32
- # user: 'user_name',
33
- # roles: %w{web app},
34
- # ssh_options: {
35
- # user: 'user_name', # overrides user setting above
36
- # keys: %w(/home/user_name/.ssh/id_rsa),
37
- # forward_agent: false,
38
- # auth_methods: %w(publickey password)
39
- # password: 'please use keys'
40
- # }
41
- # setting per server overrides global ssh_options
@@ -1,41 +0,0 @@
1
- # Simple Role Syntax
2
- # ==================
3
- # Supports bulk-adding hosts to roles, the primary
4
- # server in each group is considered to be the first
5
- # unless any hosts have the primary property set.
6
- # Don't declare `role :all`, it's a meta role
7
- # role :app, %w{deploy@example.com}
8
- # role :web, %w{deploy@example.com}
9
- # role :db, %w{deploy@example.com}
10
-
11
- # Extended Server Syntax
12
- # ======================
13
- # This can be used to drop a more detailed server
14
- # definition into the server list. The second argument
15
- # something that quacks like a hash can be used to set
16
- # extended properties on the server.
17
- server 'harvestdor-stage.stanford.edu', user: 'lyberadmin', roles: %w(web app db)
18
-
19
- Capistrano::OneTimeKey.generate_one_time_key!
20
-
21
- # you can set custom ssh options
22
- # it's possible to pass any option but you need to keep in mind that net/ssh understand limited list of options
23
- # you can see them in [net/ssh documentation](http://net-ssh.github.io/net-ssh/classes/Net/SSH.html#method-c-start)
24
- # set it globally
25
- # set :ssh_options, {
26
- # keys: %w(/home/rlisowski/.ssh/id_rsa),
27
- # forward_agent: false,
28
- # auth_methods: %w(password)
29
- # }
30
- # and/or per server
31
- # server 'example.com',
32
- # user: 'user_name',
33
- # roles: %w{web app},
34
- # ssh_options: {
35
- # user: 'user_name', # overrides user setting above
36
- # keys: %w(/home/user_name/.ssh/id_rsa),
37
- # forward_agent: false,
38
- # auth_methods: %w(publickey password)
39
- # password: 'please use keys'
40
- # }
41
- # setting per server overrides global ssh_options