gdor-indexer 0.6.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d023031326d6543f222881f79d0a102a6a27f0e9
4
- data.tar.gz: 5dd3419e6e38601413f737b3b3011ce3e9baecd9
3
+ metadata.gz: 37ead667bc97ed2a13e444026fe72ebb79f661fb
4
+ data.tar.gz: 418391aec671bf772462548a9aa20cf23ed2ea7c
5
5
  SHA512:
6
- metadata.gz: f182f105f741f8c21bb15c369573b6c3da905b28ef8e0d8920a421dcdacce4beea22891fcbc78dca2c60690c087e800f14f986749b657c218e88fe93247ce072
7
- data.tar.gz: 882369a75b3c62d0176746241f60e38204573644855514bc4fc5764ab3fcf97c3758bf7efcdc0fe8bd579102ab355255dfe76ec4730a6c69bffcfdab9b910e76
6
+ metadata.gz: 68c0d4599bd65d199d84e9664f5c2c6f09896fb9273d2832f9e71cd58072bf3aef32ccf5a52e6ecdfc23315b200a4a464ec8f0248687df414d66f0267d89d9ff
7
+ data.tar.gz: 6a64eaab7d20112d9b6b59fd5d0673f2f8855c91f1d5be95cf6c64d6e3ec83eefdcb106e9ef012a906f5a6f8d2211c1b766123ce66df6860bf28bc65a2a10b7b
@@ -1,11 +1,18 @@
1
+ require: rubocop-rspec
2
+
1
3
  # This configuration was generated by
2
4
  # `rubocop --auto-gen-config`
3
- # on 2016-07-20 15:53:51 -0700 using RuboCop version 0.41.2.
5
+ # on 2016-08-30 14:13:02 -0700 using RuboCop version 0.42.0.
4
6
  # The point is for the user to remove these configuration records
5
7
  # one by one as the offenses are removed from the code base.
6
8
  # Note that changes in the inspected code, or installation of new
7
9
  # versions of RuboCop, may require this file to be generated again.
8
10
 
11
+ # Offense count: 1
12
+ Lint/AmbiguousRegexpLiteral:
13
+ Exclude:
14
+ - 'spec/unit/indexer_spec.rb'
15
+
9
16
  # Offense count: 21
10
17
  Metrics/AbcSize:
11
18
  Max: 82
@@ -13,19 +20,19 @@ Metrics/AbcSize:
13
20
  # Offense count: 1
14
21
  # Configuration parameters: CountComments.
15
22
  Metrics/ClassLength:
16
- Max: 240
23
+ Max: 242
17
24
 
18
25
  # Offense count: 5
19
26
  Metrics/CyclomaticComplexity:
20
27
  Max: 9
21
28
 
22
- # Offense count: 314
29
+ # Offense count: 312
23
30
  # Configuration parameters: AllowHeredoc, AllowURI, URISchemes.
24
31
  # URISchemes: http, https
25
32
  Metrics/LineLength:
26
33
  Max: 258
27
34
 
28
- # Offense count: 13
35
+ # Offense count: 15
29
36
  # Configuration parameters: CountComments.
30
37
  Metrics/MethodLength:
31
38
  Max: 43
@@ -64,12 +71,48 @@ RSpec/FilePath:
64
71
  - 'spec/unit/solr_doc_hash_spec.rb'
65
72
 
66
73
  # Offense count: 163
74
+ # Configuration parameters: AssignmentOnly.
67
75
  RSpec/InstanceVariable:
68
76
  Exclude:
69
77
  - 'spec/unit/indexer_spec.rb'
70
78
  - 'spec/unit/public_xml_fields_spec.rb'
71
79
  - 'spec/unit/solr_doc_builder_spec.rb'
72
80
 
81
+ # Offense count: 55
82
+ # Configuration parameters: EnforcedStyle, SupportedStyles.
83
+ # SupportedStyles: allow, expect
84
+ RSpec/MessageExpectation:
85
+ Exclude:
86
+ - 'spec/unit/indexer_spec.rb'
87
+ - 'spec/unit/mods_fields_spec.rb'
88
+ - 'spec/unit/mods_pub_fields_spec.rb'
89
+ - 'spec/unit/mods_subject_fields_spec.rb'
90
+ - 'spec/unit/public_xml_fields_spec.rb'
91
+ - 'spec/unit/solr_doc_builder_spec.rb'
92
+ - 'spec/unit/solr_doc_hash_spec.rb'
93
+
94
+ # Offense count: 23
95
+ RSpec/MultipleExpectations:
96
+ Max: 8
97
+
98
+ # Offense count: 81
99
+ RSpec/NamedSubject:
100
+ Exclude:
101
+ - 'spec/unit/indexer_spec.rb'
102
+ - 'spec/unit/solr_doc_hash_spec.rb'
103
+
104
+ # Offense count: 50
105
+ # Configuration parameters: MaxNesting.
106
+ RSpec/NestedGroups:
107
+ Exclude:
108
+ - 'spec/unit/indexer_spec.rb'
109
+ - 'spec/unit/mods_fields_spec.rb'
110
+ - 'spec/unit/mods_pub_fields_spec.rb'
111
+ - 'spec/unit/mods_subject_fields_spec.rb'
112
+ - 'spec/unit/public_xml_fields_spec.rb'
113
+ - 'spec/unit/solr_doc_builder_spec.rb'
114
+ - 'spec/unit/solr_doc_hash_spec.rb'
115
+
73
116
  # Offense count: 7
74
117
  # Configuration parameters: IgnoreSymbolicNames.
75
118
  RSpec/VerifiedDoubles:
@@ -94,14 +137,6 @@ Style/ClassAndModuleChildren:
94
137
  - 'lib/gdor/indexer/solr_doc_builder.rb'
95
138
  - 'lib/gdor/indexer/solr_doc_hash.rb'
96
139
 
97
- # Offense count: 1
98
- # Cop supports --auto-correct.
99
- # Configuration parameters: EnforcedStyle, SupportedStyles.
100
- # SupportedStyles: empty_lines, no_empty_lines
101
- Style/EmptyLinesAroundBlockBody:
102
- Exclude:
103
- - 'spec/unit/mods_pub_fields_spec.rb'
104
-
105
140
  # Offense count: 22
106
141
  # Cop supports --auto-correct.
107
142
  # Configuration parameters: EnforcedStyle, SupportedStyles, IndentationWidth.
@@ -46,7 +46,7 @@ module GDor
46
46
  @total_time_to_parse = 0
47
47
  @retries = 0
48
48
  @druids_failed_to_ix = []
49
- @validation_messages = []
49
+ @validation_messages = Tempfile.new('gdor-indexer-validation-messages')
50
50
  @config ||= Confstruct::Configuration.new options
51
51
  @config.configure(YAML.load_file(yml_path)) if yml_path && File.exist?(yml_path)
52
52
  yield @config if block_given?
@@ -149,7 +149,7 @@ module GDor
149
149
  add_coll_info doc_hash, resource.collections # defined in public_xml_fields
150
150
  validation_messages = fields_to_add.validate_item(config)
151
151
  validation_messages.concat doc_hash.validate_mods(config)
152
- @validation_messages.concat(validation_messages)
152
+ @validation_messages.puts(validation_messages.join("\n"))
153
153
  doc_hash.to_h
154
154
  end
155
155
 
@@ -175,7 +175,7 @@ module GDor
175
175
  doc_hash.combine fields_to_add
176
176
  validation_messages = doc_hash.validate_collection(config)
177
177
  validation_messages.concat doc_hash.validate_mods(config)
178
- @validation_messages.concat(validation_messages)
178
+ @validation_messages.puts(validation_messages.join("\n"))
179
179
  doc_hash.to_h
180
180
  end
181
181
 
@@ -260,12 +260,12 @@ module GDor
260
260
  record_count_msgs.each do |msg|
261
261
  logger.info msg
262
262
  end
263
- logger.info("Avg solr commit time per object (successful): #{(@total_time_to_solr / metrics.success_count).round(2)} seconds") unless metrics.success_count == 0
264
- logger.info("Avg solr commit time per object (all): #{(@total_time_to_solr / metrics.total).round(2)} seconds") unless metrics.total == 0
265
- logger.info("Avg parse time per object (successful): #{(@total_time_to_parse / metrics.success_count).round(2)} seconds") unless metrics.success_count == 0
266
- logger.info("Avg parse time per object (all): #{(@total_time_to_parse / metrics.total).round(2)} seconds") unless metrics.total == 0
267
- logger.info("Avg complete index time per object (successful): #{(@total_time / metrics.success_count).round(2)} seconds") unless metrics.success_count == 0
268
- logger.info("Avg complete index time per object (all): #{(@total_time / metrics.total).round(2)} seconds") unless metrics.total == 0
263
+ logger.info("Avg solr commit time per object (successful): #{(@total_time_to_solr / metrics.success_count).round(2)} seconds") unless metrics.success_count.zero?
264
+ logger.info("Avg solr commit time per object (all): #{(@total_time_to_solr / metrics.total).round(2)} seconds") unless metrics.total.zero?
265
+ logger.info("Avg parse time per object (successful): #{(@total_time_to_parse / metrics.success_count).round(2)} seconds") unless metrics.success_count.zero?
266
+ logger.info("Avg parse time per object (all): #{(@total_time_to_parse / metrics.total).round(2)} seconds") unless metrics.total.zero?
267
+ logger.info("Avg complete index time per object (successful): #{(@total_time / metrics.success_count).round(2)} seconds") unless metrics.success_count.zero?
268
+ logger.info("Avg complete index time per object (all): #{(@total_time / metrics.total).round(2)} seconds") unless metrics.total.zero?
269
269
  end
270
270
 
271
271
  # @return [String] the composed email body
@@ -282,7 +282,8 @@ module GDor
282
282
  body += "full log is at gdor_indexer/shared/#{config.harvestdor.log_dir}/#{config.harvestdor.log_name} on #{Socket.gethostname}"
283
283
  body += "\n"
284
284
 
285
- body + @validation_messages.join("\n") + "\n"
285
+ @validation_messages.rewind
286
+ body + @validation_messages.read + "\n"
286
287
  end
287
288
 
288
289
  # email the results of indexing if we are on one of the harvestdor boxes
@@ -13,12 +13,14 @@ class GDor::Indexer
13
13
  # when exp_val is a Regexp, looks for String value that matches, or Array with a String member that matches
14
14
  # @return [Boolean] true if the field is non-trivially present in the hash, false otherwise
15
15
  def field_present?(field, exp_val = nil)
16
- return false unless include?(field) && Array(self[field]).any? { |v| !v.blank? }
16
+ return false unless include?(field)
17
+ return false unless Array(self[field]).any?(&:present?)
18
+
17
19
  case exp_val
18
20
  when nil
19
21
  true
20
22
  when Regexp
21
- Array(self[field]).index { |s| exp_val.match(s) }
23
+ Array(self[field]).any? { |s| exp_val.match(s) }
22
24
  else
23
25
  Array(self[field]).include? exp_val
24
26
  end
@@ -1,5 +1,5 @@
1
1
  module GDor
2
2
  class Indexer
3
- VERSION = '0.6.0'.freeze
3
+ VERSION = '0.7.1'.freeze
4
4
  end
5
5
  end
@@ -12,7 +12,7 @@ describe GDor::Indexer do
12
12
  @pub_xml = "<publicObject id='druid#{@fake_druid}'></publicObject>"
13
13
  @ng_pub_xml = Nokogiri::XML("<publicObject id='druid#{@fake_druid}'></publicObject>")
14
14
  end
15
- before(:each) do
15
+ before do
16
16
  @indexer = described_class.new(@config_yml_path) do |config|
17
17
  config.whitelist = ['druid:ww121ss5000']
18
18
  end
@@ -69,7 +69,7 @@ describe GDor::Indexer do
69
69
  end
70
70
 
71
71
  describe '#harvest_and_index' do
72
- before :each do
72
+ before do
73
73
  allow(@indexer.harvestdor).to receive(:each_resource)
74
74
  allow(@indexer).to receive(:solr_client).and_return(double(commit!: nil))
75
75
  allow(@indexer).to receive(:log_results)
@@ -246,7 +246,7 @@ describe GDor::Indexer do
246
246
  end # index_coll_obj_per_config
247
247
 
248
248
  context '#add_coll_info and supporting methods' do
249
- before(:each) do
249
+ before do
250
250
  @coll_druids_array = [collection]
251
251
  end
252
252
  let(:doc_hash) { GDor::Indexer::SolrDocHash.new({}) }
@@ -288,7 +288,7 @@ describe GDor::Indexer do
288
288
  end
289
289
 
290
290
  context '#coll_display_types_from_items' do
291
- before(:each) do
291
+ before do
292
292
  @indexer.coll_display_types_from_items(collection)
293
293
  end
294
294
  it 'gets single item display_type for single collection (and no dups)' do
@@ -311,7 +311,7 @@ describe GDor::Indexer do
311
311
  end # add_coll_info
312
312
 
313
313
  context '#num_found_in_solr' do
314
- before :each do
314
+ before do
315
315
  @collection_response = { 'response' => { 'numFound' => '1', 'docs' => [{ 'id' => 'dm212rn7381', 'url_fulltext' => ['https://purl.stanford.edu/dm212rn7381'] }] } }
316
316
  @item_response = { 'response' => { 'numFound' => '265', 'docs' => [{ 'id' => 'dm212rn7381' }] } }
317
317
  end
@@ -329,7 +329,7 @@ describe GDor::Indexer do
329
329
  end # num_found_in_solr
330
330
 
331
331
  context '#email_report_body' do
332
- before :each do
332
+ before do
333
333
  @indexer.config.notification = 'notification-list@example.com'
334
334
  allow(@indexer).to receive(:num_found_in_solr).and_return(500)
335
335
  allow(@indexer.harvestdor).to receive(:resources).and_return([collection])
@@ -355,8 +355,8 @@ describe GDor::Indexer do
355
355
  end
356
356
 
357
357
  it 'email body include validation messages' do
358
- @indexer.instance_variable_set(:@validation_messages, ['this is a validation message'])
359
- expect(subject).to match(/this is a validation message/)
358
+ @indexer.instance_variable_set(:@validation_messages, instance_double(File, rewind: 0, read: 'this is a validation message'))
359
+ expect(subject).to match /this is a validation message/
360
360
  end
361
361
 
362
362
  it 'email includes reference to full log' do
@@ -365,7 +365,7 @@ describe GDor::Indexer do
365
365
  end
366
366
 
367
367
  describe '#email_results' do
368
- before :each do
368
+ before do
369
369
  @indexer.config.notification = 'notification-list@example.com'
370
370
  allow(@indexer).to receive(:send_email)
371
371
  allow(@indexer).to receive(:email_report_body).and_return('Report Body')
@@ -20,7 +20,6 @@ describe GDor::Indexer::ModsFields do
20
20
  let(:sdb) { sdb_for_mods(mods_xml) }
21
21
 
22
22
  context 'publication date fields' do
23
-
24
23
  RSpec.shared_examples 'expected (dateIssued)' do |solr_field_sym, mods_field_val, exp_val|
25
24
  it "#{exp_val} for #{mods_field_val}" do
26
25
  m = mods_origin_info_start_str +
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gdor-indexer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0
4
+ version: 0.7.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naomi Dushay
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2016-08-06 00:00:00.000000000 Z
13
+ date: 2016-08-30 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: harvestdor-indexer