gdor-indexer 0.6.0 → 0.7.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d023031326d6543f222881f79d0a102a6a27f0e9
4
- data.tar.gz: 5dd3419e6e38601413f737b3b3011ce3e9baecd9
3
+ metadata.gz: 37ead667bc97ed2a13e444026fe72ebb79f661fb
4
+ data.tar.gz: 418391aec671bf772462548a9aa20cf23ed2ea7c
5
5
  SHA512:
6
- metadata.gz: f182f105f741f8c21bb15c369573b6c3da905b28ef8e0d8920a421dcdacce4beea22891fcbc78dca2c60690c087e800f14f986749b657c218e88fe93247ce072
7
- data.tar.gz: 882369a75b3c62d0176746241f60e38204573644855514bc4fc5764ab3fcf97c3758bf7efcdc0fe8bd579102ab355255dfe76ec4730a6c69bffcfdab9b910e76
6
+ metadata.gz: 68c0d4599bd65d199d84e9664f5c2c6f09896fb9273d2832f9e71cd58072bf3aef32ccf5a52e6ecdfc23315b200a4a464ec8f0248687df414d66f0267d89d9ff
7
+ data.tar.gz: 6a64eaab7d20112d9b6b59fd5d0673f2f8855c91f1d5be95cf6c64d6e3ec83eefdcb106e9ef012a906f5a6f8d2211c1b766123ce66df6860bf28bc65a2a10b7b
@@ -1,11 +1,18 @@
1
+ require: rubocop-rspec
2
+
1
3
  # This configuration was generated by
2
4
  # `rubocop --auto-gen-config`
3
- # on 2016-07-20 15:53:51 -0700 using RuboCop version 0.41.2.
5
+ # on 2016-08-30 14:13:02 -0700 using RuboCop version 0.42.0.
4
6
  # The point is for the user to remove these configuration records
5
7
  # one by one as the offenses are removed from the code base.
6
8
  # Note that changes in the inspected code, or installation of new
7
9
  # versions of RuboCop, may require this file to be generated again.
8
10
 
11
+ # Offense count: 1
12
+ Lint/AmbiguousRegexpLiteral:
13
+ Exclude:
14
+ - 'spec/unit/indexer_spec.rb'
15
+
9
16
  # Offense count: 21
10
17
  Metrics/AbcSize:
11
18
  Max: 82
@@ -13,19 +20,19 @@ Metrics/AbcSize:
13
20
  # Offense count: 1
14
21
  # Configuration parameters: CountComments.
15
22
  Metrics/ClassLength:
16
- Max: 240
23
+ Max: 242
17
24
 
18
25
  # Offense count: 5
19
26
  Metrics/CyclomaticComplexity:
20
27
  Max: 9
21
28
 
22
- # Offense count: 314
29
+ # Offense count: 312
23
30
  # Configuration parameters: AllowHeredoc, AllowURI, URISchemes.
24
31
  # URISchemes: http, https
25
32
  Metrics/LineLength:
26
33
  Max: 258
27
34
 
28
- # Offense count: 13
35
+ # Offense count: 15
29
36
  # Configuration parameters: CountComments.
30
37
  Metrics/MethodLength:
31
38
  Max: 43
@@ -64,12 +71,48 @@ RSpec/FilePath:
64
71
  - 'spec/unit/solr_doc_hash_spec.rb'
65
72
 
66
73
  # Offense count: 163
74
+ # Configuration parameters: AssignmentOnly.
67
75
  RSpec/InstanceVariable:
68
76
  Exclude:
69
77
  - 'spec/unit/indexer_spec.rb'
70
78
  - 'spec/unit/public_xml_fields_spec.rb'
71
79
  - 'spec/unit/solr_doc_builder_spec.rb'
72
80
 
81
+ # Offense count: 55
82
+ # Configuration parameters: EnforcedStyle, SupportedStyles.
83
+ # SupportedStyles: allow, expect
84
+ RSpec/MessageExpectation:
85
+ Exclude:
86
+ - 'spec/unit/indexer_spec.rb'
87
+ - 'spec/unit/mods_fields_spec.rb'
88
+ - 'spec/unit/mods_pub_fields_spec.rb'
89
+ - 'spec/unit/mods_subject_fields_spec.rb'
90
+ - 'spec/unit/public_xml_fields_spec.rb'
91
+ - 'spec/unit/solr_doc_builder_spec.rb'
92
+ - 'spec/unit/solr_doc_hash_spec.rb'
93
+
94
+ # Offense count: 23
95
+ RSpec/MultipleExpectations:
96
+ Max: 8
97
+
98
+ # Offense count: 81
99
+ RSpec/NamedSubject:
100
+ Exclude:
101
+ - 'spec/unit/indexer_spec.rb'
102
+ - 'spec/unit/solr_doc_hash_spec.rb'
103
+
104
+ # Offense count: 50
105
+ # Configuration parameters: MaxNesting.
106
+ RSpec/NestedGroups:
107
+ Exclude:
108
+ - 'spec/unit/indexer_spec.rb'
109
+ - 'spec/unit/mods_fields_spec.rb'
110
+ - 'spec/unit/mods_pub_fields_spec.rb'
111
+ - 'spec/unit/mods_subject_fields_spec.rb'
112
+ - 'spec/unit/public_xml_fields_spec.rb'
113
+ - 'spec/unit/solr_doc_builder_spec.rb'
114
+ - 'spec/unit/solr_doc_hash_spec.rb'
115
+
73
116
  # Offense count: 7
74
117
  # Configuration parameters: IgnoreSymbolicNames.
75
118
  RSpec/VerifiedDoubles:
@@ -94,14 +137,6 @@ Style/ClassAndModuleChildren:
94
137
  - 'lib/gdor/indexer/solr_doc_builder.rb'
95
138
  - 'lib/gdor/indexer/solr_doc_hash.rb'
96
139
 
97
- # Offense count: 1
98
- # Cop supports --auto-correct.
99
- # Configuration parameters: EnforcedStyle, SupportedStyles.
100
- # SupportedStyles: empty_lines, no_empty_lines
101
- Style/EmptyLinesAroundBlockBody:
102
- Exclude:
103
- - 'spec/unit/mods_pub_fields_spec.rb'
104
-
105
140
  # Offense count: 22
106
141
  # Cop supports --auto-correct.
107
142
  # Configuration parameters: EnforcedStyle, SupportedStyles, IndentationWidth.
@@ -46,7 +46,7 @@ module GDor
46
46
  @total_time_to_parse = 0
47
47
  @retries = 0
48
48
  @druids_failed_to_ix = []
49
- @validation_messages = []
49
+ @validation_messages = Tempfile.new('gdor-indexer-validation-messages')
50
50
  @config ||= Confstruct::Configuration.new options
51
51
  @config.configure(YAML.load_file(yml_path)) if yml_path && File.exist?(yml_path)
52
52
  yield @config if block_given?
@@ -149,7 +149,7 @@ module GDor
149
149
  add_coll_info doc_hash, resource.collections # defined in public_xml_fields
150
150
  validation_messages = fields_to_add.validate_item(config)
151
151
  validation_messages.concat doc_hash.validate_mods(config)
152
- @validation_messages.concat(validation_messages)
152
+ @validation_messages.puts(validation_messages.join("\n"))
153
153
  doc_hash.to_h
154
154
  end
155
155
 
@@ -175,7 +175,7 @@ module GDor
175
175
  doc_hash.combine fields_to_add
176
176
  validation_messages = doc_hash.validate_collection(config)
177
177
  validation_messages.concat doc_hash.validate_mods(config)
178
- @validation_messages.concat(validation_messages)
178
+ @validation_messages.puts(validation_messages.join("\n"))
179
179
  doc_hash.to_h
180
180
  end
181
181
 
@@ -260,12 +260,12 @@ module GDor
260
260
  record_count_msgs.each do |msg|
261
261
  logger.info msg
262
262
  end
263
- logger.info("Avg solr commit time per object (successful): #{(@total_time_to_solr / metrics.success_count).round(2)} seconds") unless metrics.success_count == 0
264
- logger.info("Avg solr commit time per object (all): #{(@total_time_to_solr / metrics.total).round(2)} seconds") unless metrics.total == 0
265
- logger.info("Avg parse time per object (successful): #{(@total_time_to_parse / metrics.success_count).round(2)} seconds") unless metrics.success_count == 0
266
- logger.info("Avg parse time per object (all): #{(@total_time_to_parse / metrics.total).round(2)} seconds") unless metrics.total == 0
267
- logger.info("Avg complete index time per object (successful): #{(@total_time / metrics.success_count).round(2)} seconds") unless metrics.success_count == 0
268
- logger.info("Avg complete index time per object (all): #{(@total_time / metrics.total).round(2)} seconds") unless metrics.total == 0
263
+ logger.info("Avg solr commit time per object (successful): #{(@total_time_to_solr / metrics.success_count).round(2)} seconds") unless metrics.success_count.zero?
264
+ logger.info("Avg solr commit time per object (all): #{(@total_time_to_solr / metrics.total).round(2)} seconds") unless metrics.total.zero?
265
+ logger.info("Avg parse time per object (successful): #{(@total_time_to_parse / metrics.success_count).round(2)} seconds") unless metrics.success_count.zero?
266
+ logger.info("Avg parse time per object (all): #{(@total_time_to_parse / metrics.total).round(2)} seconds") unless metrics.total.zero?
267
+ logger.info("Avg complete index time per object (successful): #{(@total_time / metrics.success_count).round(2)} seconds") unless metrics.success_count.zero?
268
+ logger.info("Avg complete index time per object (all): #{(@total_time / metrics.total).round(2)} seconds") unless metrics.total.zero?
269
269
  end
270
270
 
271
271
  # @return [String] the composed email body
@@ -282,7 +282,8 @@ module GDor
282
282
  body += "full log is at gdor_indexer/shared/#{config.harvestdor.log_dir}/#{config.harvestdor.log_name} on #{Socket.gethostname}"
283
283
  body += "\n"
284
284
 
285
- body + @validation_messages.join("\n") + "\n"
285
+ @validation_messages.rewind
286
+ body + @validation_messages.read + "\n"
286
287
  end
287
288
 
288
289
  # email the results of indexing if we are on one of the harvestdor boxes
@@ -13,12 +13,14 @@ class GDor::Indexer
13
13
  # when exp_val is a Regexp, looks for String value that matches, or Array with a String member that matches
14
14
  # @return [Boolean] true if the field is non-trivially present in the hash, false otherwise
15
15
  def field_present?(field, exp_val = nil)
16
- return false unless include?(field) && Array(self[field]).any? { |v| !v.blank? }
16
+ return false unless include?(field)
17
+ return false unless Array(self[field]).any?(&:present?)
18
+
17
19
  case exp_val
18
20
  when nil
19
21
  true
20
22
  when Regexp
21
- Array(self[field]).index { |s| exp_val.match(s) }
23
+ Array(self[field]).any? { |s| exp_val.match(s) }
22
24
  else
23
25
  Array(self[field]).include? exp_val
24
26
  end
@@ -1,5 +1,5 @@
1
1
  module GDor
2
2
  class Indexer
3
- VERSION = '0.6.0'.freeze
3
+ VERSION = '0.7.1'.freeze
4
4
  end
5
5
  end
@@ -12,7 +12,7 @@ describe GDor::Indexer do
12
12
  @pub_xml = "<publicObject id='druid#{@fake_druid}'></publicObject>"
13
13
  @ng_pub_xml = Nokogiri::XML("<publicObject id='druid#{@fake_druid}'></publicObject>")
14
14
  end
15
- before(:each) do
15
+ before do
16
16
  @indexer = described_class.new(@config_yml_path) do |config|
17
17
  config.whitelist = ['druid:ww121ss5000']
18
18
  end
@@ -69,7 +69,7 @@ describe GDor::Indexer do
69
69
  end
70
70
 
71
71
  describe '#harvest_and_index' do
72
- before :each do
72
+ before do
73
73
  allow(@indexer.harvestdor).to receive(:each_resource)
74
74
  allow(@indexer).to receive(:solr_client).and_return(double(commit!: nil))
75
75
  allow(@indexer).to receive(:log_results)
@@ -246,7 +246,7 @@ describe GDor::Indexer do
246
246
  end # index_coll_obj_per_config
247
247
 
248
248
  context '#add_coll_info and supporting methods' do
249
- before(:each) do
249
+ before do
250
250
  @coll_druids_array = [collection]
251
251
  end
252
252
  let(:doc_hash) { GDor::Indexer::SolrDocHash.new({}) }
@@ -288,7 +288,7 @@ describe GDor::Indexer do
288
288
  end
289
289
 
290
290
  context '#coll_display_types_from_items' do
291
- before(:each) do
291
+ before do
292
292
  @indexer.coll_display_types_from_items(collection)
293
293
  end
294
294
  it 'gets single item display_type for single collection (and no dups)' do
@@ -311,7 +311,7 @@ describe GDor::Indexer do
311
311
  end # add_coll_info
312
312
 
313
313
  context '#num_found_in_solr' do
314
- before :each do
314
+ before do
315
315
  @collection_response = { 'response' => { 'numFound' => '1', 'docs' => [{ 'id' => 'dm212rn7381', 'url_fulltext' => ['https://purl.stanford.edu/dm212rn7381'] }] } }
316
316
  @item_response = { 'response' => { 'numFound' => '265', 'docs' => [{ 'id' => 'dm212rn7381' }] } }
317
317
  end
@@ -329,7 +329,7 @@ describe GDor::Indexer do
329
329
  end # num_found_in_solr
330
330
 
331
331
  context '#email_report_body' do
332
- before :each do
332
+ before do
333
333
  @indexer.config.notification = 'notification-list@example.com'
334
334
  allow(@indexer).to receive(:num_found_in_solr).and_return(500)
335
335
  allow(@indexer.harvestdor).to receive(:resources).and_return([collection])
@@ -355,8 +355,8 @@ describe GDor::Indexer do
355
355
  end
356
356
 
357
357
  it 'email body include validation messages' do
358
- @indexer.instance_variable_set(:@validation_messages, ['this is a validation message'])
359
- expect(subject).to match(/this is a validation message/)
358
+ @indexer.instance_variable_set(:@validation_messages, instance_double(File, rewind: 0, read: 'this is a validation message'))
359
+ expect(subject).to match /this is a validation message/
360
360
  end
361
361
 
362
362
  it 'email includes reference to full log' do
@@ -365,7 +365,7 @@ describe GDor::Indexer do
365
365
  end
366
366
 
367
367
  describe '#email_results' do
368
- before :each do
368
+ before do
369
369
  @indexer.config.notification = 'notification-list@example.com'
370
370
  allow(@indexer).to receive(:send_email)
371
371
  allow(@indexer).to receive(:email_report_body).and_return('Report Body')
@@ -20,7 +20,6 @@ describe GDor::Indexer::ModsFields do
20
20
  let(:sdb) { sdb_for_mods(mods_xml) }
21
21
 
22
22
  context 'publication date fields' do
23
-
24
23
  RSpec.shared_examples 'expected (dateIssued)' do |solr_field_sym, mods_field_val, exp_val|
25
24
  it "#{exp_val} for #{mods_field_val}" do
26
25
  m = mods_origin_info_start_str +
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gdor-indexer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0
4
+ version: 0.7.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naomi Dushay
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2016-08-06 00:00:00.000000000 Z
13
+ date: 2016-08-30 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: harvestdor-indexer