gdor-indexer 0.6.0 → 0.7.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +47 -12
- data/lib/gdor/indexer.rb +11 -10
- data/lib/gdor/indexer/solr_doc_hash.rb +4 -2
- data/lib/gdor/indexer/version.rb +1 -1
- data/spec/unit/indexer_spec.rb +9 -9
- data/spec/unit/mods_pub_fields_spec.rb +0 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 37ead667bc97ed2a13e444026fe72ebb79f661fb
|
4
|
+
data.tar.gz: 418391aec671bf772462548a9aa20cf23ed2ea7c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 68c0d4599bd65d199d84e9664f5c2c6f09896fb9273d2832f9e71cd58072bf3aef32ccf5a52e6ecdfc23315b200a4a464ec8f0248687df414d66f0267d89d9ff
|
7
|
+
data.tar.gz: 6a64eaab7d20112d9b6b59fd5d0673f2f8855c91f1d5be95cf6c64d6e3ec83eefdcb106e9ef012a906f5a6f8d2211c1b766123ce66df6860bf28bc65a2a10b7b
|
data/.rubocop_todo.yml
CHANGED
@@ -1,11 +1,18 @@
|
|
1
|
+
require: rubocop-rspec
|
2
|
+
|
1
3
|
# This configuration was generated by
|
2
4
|
# `rubocop --auto-gen-config`
|
3
|
-
# on 2016-
|
5
|
+
# on 2016-08-30 14:13:02 -0700 using RuboCop version 0.42.0.
|
4
6
|
# The point is for the user to remove these configuration records
|
5
7
|
# one by one as the offenses are removed from the code base.
|
6
8
|
# Note that changes in the inspected code, or installation of new
|
7
9
|
# versions of RuboCop, may require this file to be generated again.
|
8
10
|
|
11
|
+
# Offense count: 1
|
12
|
+
Lint/AmbiguousRegexpLiteral:
|
13
|
+
Exclude:
|
14
|
+
- 'spec/unit/indexer_spec.rb'
|
15
|
+
|
9
16
|
# Offense count: 21
|
10
17
|
Metrics/AbcSize:
|
11
18
|
Max: 82
|
@@ -13,19 +20,19 @@ Metrics/AbcSize:
|
|
13
20
|
# Offense count: 1
|
14
21
|
# Configuration parameters: CountComments.
|
15
22
|
Metrics/ClassLength:
|
16
|
-
Max:
|
23
|
+
Max: 242
|
17
24
|
|
18
25
|
# Offense count: 5
|
19
26
|
Metrics/CyclomaticComplexity:
|
20
27
|
Max: 9
|
21
28
|
|
22
|
-
# Offense count:
|
29
|
+
# Offense count: 312
|
23
30
|
# Configuration parameters: AllowHeredoc, AllowURI, URISchemes.
|
24
31
|
# URISchemes: http, https
|
25
32
|
Metrics/LineLength:
|
26
33
|
Max: 258
|
27
34
|
|
28
|
-
# Offense count:
|
35
|
+
# Offense count: 15
|
29
36
|
# Configuration parameters: CountComments.
|
30
37
|
Metrics/MethodLength:
|
31
38
|
Max: 43
|
@@ -64,12 +71,48 @@ RSpec/FilePath:
|
|
64
71
|
- 'spec/unit/solr_doc_hash_spec.rb'
|
65
72
|
|
66
73
|
# Offense count: 163
|
74
|
+
# Configuration parameters: AssignmentOnly.
|
67
75
|
RSpec/InstanceVariable:
|
68
76
|
Exclude:
|
69
77
|
- 'spec/unit/indexer_spec.rb'
|
70
78
|
- 'spec/unit/public_xml_fields_spec.rb'
|
71
79
|
- 'spec/unit/solr_doc_builder_spec.rb'
|
72
80
|
|
81
|
+
# Offense count: 55
|
82
|
+
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
83
|
+
# SupportedStyles: allow, expect
|
84
|
+
RSpec/MessageExpectation:
|
85
|
+
Exclude:
|
86
|
+
- 'spec/unit/indexer_spec.rb'
|
87
|
+
- 'spec/unit/mods_fields_spec.rb'
|
88
|
+
- 'spec/unit/mods_pub_fields_spec.rb'
|
89
|
+
- 'spec/unit/mods_subject_fields_spec.rb'
|
90
|
+
- 'spec/unit/public_xml_fields_spec.rb'
|
91
|
+
- 'spec/unit/solr_doc_builder_spec.rb'
|
92
|
+
- 'spec/unit/solr_doc_hash_spec.rb'
|
93
|
+
|
94
|
+
# Offense count: 23
|
95
|
+
RSpec/MultipleExpectations:
|
96
|
+
Max: 8
|
97
|
+
|
98
|
+
# Offense count: 81
|
99
|
+
RSpec/NamedSubject:
|
100
|
+
Exclude:
|
101
|
+
- 'spec/unit/indexer_spec.rb'
|
102
|
+
- 'spec/unit/solr_doc_hash_spec.rb'
|
103
|
+
|
104
|
+
# Offense count: 50
|
105
|
+
# Configuration parameters: MaxNesting.
|
106
|
+
RSpec/NestedGroups:
|
107
|
+
Exclude:
|
108
|
+
- 'spec/unit/indexer_spec.rb'
|
109
|
+
- 'spec/unit/mods_fields_spec.rb'
|
110
|
+
- 'spec/unit/mods_pub_fields_spec.rb'
|
111
|
+
- 'spec/unit/mods_subject_fields_spec.rb'
|
112
|
+
- 'spec/unit/public_xml_fields_spec.rb'
|
113
|
+
- 'spec/unit/solr_doc_builder_spec.rb'
|
114
|
+
- 'spec/unit/solr_doc_hash_spec.rb'
|
115
|
+
|
73
116
|
# Offense count: 7
|
74
117
|
# Configuration parameters: IgnoreSymbolicNames.
|
75
118
|
RSpec/VerifiedDoubles:
|
@@ -94,14 +137,6 @@ Style/ClassAndModuleChildren:
|
|
94
137
|
- 'lib/gdor/indexer/solr_doc_builder.rb'
|
95
138
|
- 'lib/gdor/indexer/solr_doc_hash.rb'
|
96
139
|
|
97
|
-
# Offense count: 1
|
98
|
-
# Cop supports --auto-correct.
|
99
|
-
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
100
|
-
# SupportedStyles: empty_lines, no_empty_lines
|
101
|
-
Style/EmptyLinesAroundBlockBody:
|
102
|
-
Exclude:
|
103
|
-
- 'spec/unit/mods_pub_fields_spec.rb'
|
104
|
-
|
105
140
|
# Offense count: 22
|
106
141
|
# Cop supports --auto-correct.
|
107
142
|
# Configuration parameters: EnforcedStyle, SupportedStyles, IndentationWidth.
|
data/lib/gdor/indexer.rb
CHANGED
@@ -46,7 +46,7 @@ module GDor
|
|
46
46
|
@total_time_to_parse = 0
|
47
47
|
@retries = 0
|
48
48
|
@druids_failed_to_ix = []
|
49
|
-
@validation_messages =
|
49
|
+
@validation_messages = Tempfile.new('gdor-indexer-validation-messages')
|
50
50
|
@config ||= Confstruct::Configuration.new options
|
51
51
|
@config.configure(YAML.load_file(yml_path)) if yml_path && File.exist?(yml_path)
|
52
52
|
yield @config if block_given?
|
@@ -149,7 +149,7 @@ module GDor
|
|
149
149
|
add_coll_info doc_hash, resource.collections # defined in public_xml_fields
|
150
150
|
validation_messages = fields_to_add.validate_item(config)
|
151
151
|
validation_messages.concat doc_hash.validate_mods(config)
|
152
|
-
@validation_messages.
|
152
|
+
@validation_messages.puts(validation_messages.join("\n"))
|
153
153
|
doc_hash.to_h
|
154
154
|
end
|
155
155
|
|
@@ -175,7 +175,7 @@ module GDor
|
|
175
175
|
doc_hash.combine fields_to_add
|
176
176
|
validation_messages = doc_hash.validate_collection(config)
|
177
177
|
validation_messages.concat doc_hash.validate_mods(config)
|
178
|
-
@validation_messages.
|
178
|
+
@validation_messages.puts(validation_messages.join("\n"))
|
179
179
|
doc_hash.to_h
|
180
180
|
end
|
181
181
|
|
@@ -260,12 +260,12 @@ module GDor
|
|
260
260
|
record_count_msgs.each do |msg|
|
261
261
|
logger.info msg
|
262
262
|
end
|
263
|
-
logger.info("Avg solr commit time per object (successful): #{(@total_time_to_solr / metrics.success_count).round(2)} seconds") unless metrics.success_count
|
264
|
-
logger.info("Avg solr commit time per object (all): #{(@total_time_to_solr / metrics.total).round(2)} seconds") unless metrics.total
|
265
|
-
logger.info("Avg parse time per object (successful): #{(@total_time_to_parse / metrics.success_count).round(2)} seconds") unless metrics.success_count
|
266
|
-
logger.info("Avg parse time per object (all): #{(@total_time_to_parse / metrics.total).round(2)} seconds") unless metrics.total
|
267
|
-
logger.info("Avg complete index time per object (successful): #{(@total_time / metrics.success_count).round(2)} seconds") unless metrics.success_count
|
268
|
-
logger.info("Avg complete index time per object (all): #{(@total_time / metrics.total).round(2)} seconds") unless metrics.total
|
263
|
+
logger.info("Avg solr commit time per object (successful): #{(@total_time_to_solr / metrics.success_count).round(2)} seconds") unless metrics.success_count.zero?
|
264
|
+
logger.info("Avg solr commit time per object (all): #{(@total_time_to_solr / metrics.total).round(2)} seconds") unless metrics.total.zero?
|
265
|
+
logger.info("Avg parse time per object (successful): #{(@total_time_to_parse / metrics.success_count).round(2)} seconds") unless metrics.success_count.zero?
|
266
|
+
logger.info("Avg parse time per object (all): #{(@total_time_to_parse / metrics.total).round(2)} seconds") unless metrics.total.zero?
|
267
|
+
logger.info("Avg complete index time per object (successful): #{(@total_time / metrics.success_count).round(2)} seconds") unless metrics.success_count.zero?
|
268
|
+
logger.info("Avg complete index time per object (all): #{(@total_time / metrics.total).round(2)} seconds") unless metrics.total.zero?
|
269
269
|
end
|
270
270
|
|
271
271
|
# @return [String] the composed email body
|
@@ -282,7 +282,8 @@ module GDor
|
|
282
282
|
body += "full log is at gdor_indexer/shared/#{config.harvestdor.log_dir}/#{config.harvestdor.log_name} on #{Socket.gethostname}"
|
283
283
|
body += "\n"
|
284
284
|
|
285
|
-
|
285
|
+
@validation_messages.rewind
|
286
|
+
body + @validation_messages.read + "\n"
|
286
287
|
end
|
287
288
|
|
288
289
|
# email the results of indexing if we are on one of the harvestdor boxes
|
@@ -13,12 +13,14 @@ class GDor::Indexer
|
|
13
13
|
# when exp_val is a Regexp, looks for String value that matches, or Array with a String member that matches
|
14
14
|
# @return [Boolean] true if the field is non-trivially present in the hash, false otherwise
|
15
15
|
def field_present?(field, exp_val = nil)
|
16
|
-
return false unless include?(field)
|
16
|
+
return false unless include?(field)
|
17
|
+
return false unless Array(self[field]).any?(&:present?)
|
18
|
+
|
17
19
|
case exp_val
|
18
20
|
when nil
|
19
21
|
true
|
20
22
|
when Regexp
|
21
|
-
Array(self[field]).
|
23
|
+
Array(self[field]).any? { |s| exp_val.match(s) }
|
22
24
|
else
|
23
25
|
Array(self[field]).include? exp_val
|
24
26
|
end
|
data/lib/gdor/indexer/version.rb
CHANGED
data/spec/unit/indexer_spec.rb
CHANGED
@@ -12,7 +12,7 @@ describe GDor::Indexer do
|
|
12
12
|
@pub_xml = "<publicObject id='druid#{@fake_druid}'></publicObject>"
|
13
13
|
@ng_pub_xml = Nokogiri::XML("<publicObject id='druid#{@fake_druid}'></publicObject>")
|
14
14
|
end
|
15
|
-
before
|
15
|
+
before do
|
16
16
|
@indexer = described_class.new(@config_yml_path) do |config|
|
17
17
|
config.whitelist = ['druid:ww121ss5000']
|
18
18
|
end
|
@@ -69,7 +69,7 @@ describe GDor::Indexer do
|
|
69
69
|
end
|
70
70
|
|
71
71
|
describe '#harvest_and_index' do
|
72
|
-
before
|
72
|
+
before do
|
73
73
|
allow(@indexer.harvestdor).to receive(:each_resource)
|
74
74
|
allow(@indexer).to receive(:solr_client).and_return(double(commit!: nil))
|
75
75
|
allow(@indexer).to receive(:log_results)
|
@@ -246,7 +246,7 @@ describe GDor::Indexer do
|
|
246
246
|
end # index_coll_obj_per_config
|
247
247
|
|
248
248
|
context '#add_coll_info and supporting methods' do
|
249
|
-
before
|
249
|
+
before do
|
250
250
|
@coll_druids_array = [collection]
|
251
251
|
end
|
252
252
|
let(:doc_hash) { GDor::Indexer::SolrDocHash.new({}) }
|
@@ -288,7 +288,7 @@ describe GDor::Indexer do
|
|
288
288
|
end
|
289
289
|
|
290
290
|
context '#coll_display_types_from_items' do
|
291
|
-
before
|
291
|
+
before do
|
292
292
|
@indexer.coll_display_types_from_items(collection)
|
293
293
|
end
|
294
294
|
it 'gets single item display_type for single collection (and no dups)' do
|
@@ -311,7 +311,7 @@ describe GDor::Indexer do
|
|
311
311
|
end # add_coll_info
|
312
312
|
|
313
313
|
context '#num_found_in_solr' do
|
314
|
-
before
|
314
|
+
before do
|
315
315
|
@collection_response = { 'response' => { 'numFound' => '1', 'docs' => [{ 'id' => 'dm212rn7381', 'url_fulltext' => ['https://purl.stanford.edu/dm212rn7381'] }] } }
|
316
316
|
@item_response = { 'response' => { 'numFound' => '265', 'docs' => [{ 'id' => 'dm212rn7381' }] } }
|
317
317
|
end
|
@@ -329,7 +329,7 @@ describe GDor::Indexer do
|
|
329
329
|
end # num_found_in_solr
|
330
330
|
|
331
331
|
context '#email_report_body' do
|
332
|
-
before
|
332
|
+
before do
|
333
333
|
@indexer.config.notification = 'notification-list@example.com'
|
334
334
|
allow(@indexer).to receive(:num_found_in_solr).and_return(500)
|
335
335
|
allow(@indexer.harvestdor).to receive(:resources).and_return([collection])
|
@@ -355,8 +355,8 @@ describe GDor::Indexer do
|
|
355
355
|
end
|
356
356
|
|
357
357
|
it 'email body include validation messages' do
|
358
|
-
@indexer.instance_variable_set(:@validation_messages,
|
359
|
-
expect(subject).to match
|
358
|
+
@indexer.instance_variable_set(:@validation_messages, instance_double(File, rewind: 0, read: 'this is a validation message'))
|
359
|
+
expect(subject).to match /this is a validation message/
|
360
360
|
end
|
361
361
|
|
362
362
|
it 'email includes reference to full log' do
|
@@ -365,7 +365,7 @@ describe GDor::Indexer do
|
|
365
365
|
end
|
366
366
|
|
367
367
|
describe '#email_results' do
|
368
|
-
before
|
368
|
+
before do
|
369
369
|
@indexer.config.notification = 'notification-list@example.com'
|
370
370
|
allow(@indexer).to receive(:send_email)
|
371
371
|
allow(@indexer).to receive(:email_report_body).and_return('Report Body')
|
@@ -20,7 +20,6 @@ describe GDor::Indexer::ModsFields do
|
|
20
20
|
let(:sdb) { sdb_for_mods(mods_xml) }
|
21
21
|
|
22
22
|
context 'publication date fields' do
|
23
|
-
|
24
23
|
RSpec.shared_examples 'expected (dateIssued)' do |solr_field_sym, mods_field_val, exp_val|
|
25
24
|
it "#{exp_val} for #{mods_field_val}" do
|
26
25
|
m = mods_origin_info_start_str +
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gdor-indexer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naomi Dushay
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2016-08-
|
13
|
+
date: 2016-08-30 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: harvestdor-indexer
|