gdor-indexer 0.6.0 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +47 -12
- data/lib/gdor/indexer.rb +11 -10
- data/lib/gdor/indexer/solr_doc_hash.rb +4 -2
- data/lib/gdor/indexer/version.rb +1 -1
- data/spec/unit/indexer_spec.rb +9 -9
- data/spec/unit/mods_pub_fields_spec.rb +0 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 37ead667bc97ed2a13e444026fe72ebb79f661fb
|
|
4
|
+
data.tar.gz: 418391aec671bf772462548a9aa20cf23ed2ea7c
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 68c0d4599bd65d199d84e9664f5c2c6f09896fb9273d2832f9e71cd58072bf3aef32ccf5a52e6ecdfc23315b200a4a464ec8f0248687df414d66f0267d89d9ff
|
|
7
|
+
data.tar.gz: 6a64eaab7d20112d9b6b59fd5d0673f2f8855c91f1d5be95cf6c64d6e3ec83eefdcb106e9ef012a906f5a6f8d2211c1b766123ce66df6860bf28bc65a2a10b7b
|
data/.rubocop_todo.yml
CHANGED
|
@@ -1,11 +1,18 @@
|
|
|
1
|
+
require: rubocop-rspec
|
|
2
|
+
|
|
1
3
|
# This configuration was generated by
|
|
2
4
|
# `rubocop --auto-gen-config`
|
|
3
|
-
# on 2016-
|
|
5
|
+
# on 2016-08-30 14:13:02 -0700 using RuboCop version 0.42.0.
|
|
4
6
|
# The point is for the user to remove these configuration records
|
|
5
7
|
# one by one as the offenses are removed from the code base.
|
|
6
8
|
# Note that changes in the inspected code, or installation of new
|
|
7
9
|
# versions of RuboCop, may require this file to be generated again.
|
|
8
10
|
|
|
11
|
+
# Offense count: 1
|
|
12
|
+
Lint/AmbiguousRegexpLiteral:
|
|
13
|
+
Exclude:
|
|
14
|
+
- 'spec/unit/indexer_spec.rb'
|
|
15
|
+
|
|
9
16
|
# Offense count: 21
|
|
10
17
|
Metrics/AbcSize:
|
|
11
18
|
Max: 82
|
|
@@ -13,19 +20,19 @@ Metrics/AbcSize:
|
|
|
13
20
|
# Offense count: 1
|
|
14
21
|
# Configuration parameters: CountComments.
|
|
15
22
|
Metrics/ClassLength:
|
|
16
|
-
Max:
|
|
23
|
+
Max: 242
|
|
17
24
|
|
|
18
25
|
# Offense count: 5
|
|
19
26
|
Metrics/CyclomaticComplexity:
|
|
20
27
|
Max: 9
|
|
21
28
|
|
|
22
|
-
# Offense count:
|
|
29
|
+
# Offense count: 312
|
|
23
30
|
# Configuration parameters: AllowHeredoc, AllowURI, URISchemes.
|
|
24
31
|
# URISchemes: http, https
|
|
25
32
|
Metrics/LineLength:
|
|
26
33
|
Max: 258
|
|
27
34
|
|
|
28
|
-
# Offense count:
|
|
35
|
+
# Offense count: 15
|
|
29
36
|
# Configuration parameters: CountComments.
|
|
30
37
|
Metrics/MethodLength:
|
|
31
38
|
Max: 43
|
|
@@ -64,12 +71,48 @@ RSpec/FilePath:
|
|
|
64
71
|
- 'spec/unit/solr_doc_hash_spec.rb'
|
|
65
72
|
|
|
66
73
|
# Offense count: 163
|
|
74
|
+
# Configuration parameters: AssignmentOnly.
|
|
67
75
|
RSpec/InstanceVariable:
|
|
68
76
|
Exclude:
|
|
69
77
|
- 'spec/unit/indexer_spec.rb'
|
|
70
78
|
- 'spec/unit/public_xml_fields_spec.rb'
|
|
71
79
|
- 'spec/unit/solr_doc_builder_spec.rb'
|
|
72
80
|
|
|
81
|
+
# Offense count: 55
|
|
82
|
+
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
|
83
|
+
# SupportedStyles: allow, expect
|
|
84
|
+
RSpec/MessageExpectation:
|
|
85
|
+
Exclude:
|
|
86
|
+
- 'spec/unit/indexer_spec.rb'
|
|
87
|
+
- 'spec/unit/mods_fields_spec.rb'
|
|
88
|
+
- 'spec/unit/mods_pub_fields_spec.rb'
|
|
89
|
+
- 'spec/unit/mods_subject_fields_spec.rb'
|
|
90
|
+
- 'spec/unit/public_xml_fields_spec.rb'
|
|
91
|
+
- 'spec/unit/solr_doc_builder_spec.rb'
|
|
92
|
+
- 'spec/unit/solr_doc_hash_spec.rb'
|
|
93
|
+
|
|
94
|
+
# Offense count: 23
|
|
95
|
+
RSpec/MultipleExpectations:
|
|
96
|
+
Max: 8
|
|
97
|
+
|
|
98
|
+
# Offense count: 81
|
|
99
|
+
RSpec/NamedSubject:
|
|
100
|
+
Exclude:
|
|
101
|
+
- 'spec/unit/indexer_spec.rb'
|
|
102
|
+
- 'spec/unit/solr_doc_hash_spec.rb'
|
|
103
|
+
|
|
104
|
+
# Offense count: 50
|
|
105
|
+
# Configuration parameters: MaxNesting.
|
|
106
|
+
RSpec/NestedGroups:
|
|
107
|
+
Exclude:
|
|
108
|
+
- 'spec/unit/indexer_spec.rb'
|
|
109
|
+
- 'spec/unit/mods_fields_spec.rb'
|
|
110
|
+
- 'spec/unit/mods_pub_fields_spec.rb'
|
|
111
|
+
- 'spec/unit/mods_subject_fields_spec.rb'
|
|
112
|
+
- 'spec/unit/public_xml_fields_spec.rb'
|
|
113
|
+
- 'spec/unit/solr_doc_builder_spec.rb'
|
|
114
|
+
- 'spec/unit/solr_doc_hash_spec.rb'
|
|
115
|
+
|
|
73
116
|
# Offense count: 7
|
|
74
117
|
# Configuration parameters: IgnoreSymbolicNames.
|
|
75
118
|
RSpec/VerifiedDoubles:
|
|
@@ -94,14 +137,6 @@ Style/ClassAndModuleChildren:
|
|
|
94
137
|
- 'lib/gdor/indexer/solr_doc_builder.rb'
|
|
95
138
|
- 'lib/gdor/indexer/solr_doc_hash.rb'
|
|
96
139
|
|
|
97
|
-
# Offense count: 1
|
|
98
|
-
# Cop supports --auto-correct.
|
|
99
|
-
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
|
100
|
-
# SupportedStyles: empty_lines, no_empty_lines
|
|
101
|
-
Style/EmptyLinesAroundBlockBody:
|
|
102
|
-
Exclude:
|
|
103
|
-
- 'spec/unit/mods_pub_fields_spec.rb'
|
|
104
|
-
|
|
105
140
|
# Offense count: 22
|
|
106
141
|
# Cop supports --auto-correct.
|
|
107
142
|
# Configuration parameters: EnforcedStyle, SupportedStyles, IndentationWidth.
|
data/lib/gdor/indexer.rb
CHANGED
|
@@ -46,7 +46,7 @@ module GDor
|
|
|
46
46
|
@total_time_to_parse = 0
|
|
47
47
|
@retries = 0
|
|
48
48
|
@druids_failed_to_ix = []
|
|
49
|
-
@validation_messages =
|
|
49
|
+
@validation_messages = Tempfile.new('gdor-indexer-validation-messages')
|
|
50
50
|
@config ||= Confstruct::Configuration.new options
|
|
51
51
|
@config.configure(YAML.load_file(yml_path)) if yml_path && File.exist?(yml_path)
|
|
52
52
|
yield @config if block_given?
|
|
@@ -149,7 +149,7 @@ module GDor
|
|
|
149
149
|
add_coll_info doc_hash, resource.collections # defined in public_xml_fields
|
|
150
150
|
validation_messages = fields_to_add.validate_item(config)
|
|
151
151
|
validation_messages.concat doc_hash.validate_mods(config)
|
|
152
|
-
@validation_messages.
|
|
152
|
+
@validation_messages.puts(validation_messages.join("\n"))
|
|
153
153
|
doc_hash.to_h
|
|
154
154
|
end
|
|
155
155
|
|
|
@@ -175,7 +175,7 @@ module GDor
|
|
|
175
175
|
doc_hash.combine fields_to_add
|
|
176
176
|
validation_messages = doc_hash.validate_collection(config)
|
|
177
177
|
validation_messages.concat doc_hash.validate_mods(config)
|
|
178
|
-
@validation_messages.
|
|
178
|
+
@validation_messages.puts(validation_messages.join("\n"))
|
|
179
179
|
doc_hash.to_h
|
|
180
180
|
end
|
|
181
181
|
|
|
@@ -260,12 +260,12 @@ module GDor
|
|
|
260
260
|
record_count_msgs.each do |msg|
|
|
261
261
|
logger.info msg
|
|
262
262
|
end
|
|
263
|
-
logger.info("Avg solr commit time per object (successful): #{(@total_time_to_solr / metrics.success_count).round(2)} seconds") unless metrics.success_count
|
|
264
|
-
logger.info("Avg solr commit time per object (all): #{(@total_time_to_solr / metrics.total).round(2)} seconds") unless metrics.total
|
|
265
|
-
logger.info("Avg parse time per object (successful): #{(@total_time_to_parse / metrics.success_count).round(2)} seconds") unless metrics.success_count
|
|
266
|
-
logger.info("Avg parse time per object (all): #{(@total_time_to_parse / metrics.total).round(2)} seconds") unless metrics.total
|
|
267
|
-
logger.info("Avg complete index time per object (successful): #{(@total_time / metrics.success_count).round(2)} seconds") unless metrics.success_count
|
|
268
|
-
logger.info("Avg complete index time per object (all): #{(@total_time / metrics.total).round(2)} seconds") unless metrics.total
|
|
263
|
+
logger.info("Avg solr commit time per object (successful): #{(@total_time_to_solr / metrics.success_count).round(2)} seconds") unless metrics.success_count.zero?
|
|
264
|
+
logger.info("Avg solr commit time per object (all): #{(@total_time_to_solr / metrics.total).round(2)} seconds") unless metrics.total.zero?
|
|
265
|
+
logger.info("Avg parse time per object (successful): #{(@total_time_to_parse / metrics.success_count).round(2)} seconds") unless metrics.success_count.zero?
|
|
266
|
+
logger.info("Avg parse time per object (all): #{(@total_time_to_parse / metrics.total).round(2)} seconds") unless metrics.total.zero?
|
|
267
|
+
logger.info("Avg complete index time per object (successful): #{(@total_time / metrics.success_count).round(2)} seconds") unless metrics.success_count.zero?
|
|
268
|
+
logger.info("Avg complete index time per object (all): #{(@total_time / metrics.total).round(2)} seconds") unless metrics.total.zero?
|
|
269
269
|
end
|
|
270
270
|
|
|
271
271
|
# @return [String] the composed email body
|
|
@@ -282,7 +282,8 @@ module GDor
|
|
|
282
282
|
body += "full log is at gdor_indexer/shared/#{config.harvestdor.log_dir}/#{config.harvestdor.log_name} on #{Socket.gethostname}"
|
|
283
283
|
body += "\n"
|
|
284
284
|
|
|
285
|
-
|
|
285
|
+
@validation_messages.rewind
|
|
286
|
+
body + @validation_messages.read + "\n"
|
|
286
287
|
end
|
|
287
288
|
|
|
288
289
|
# email the results of indexing if we are on one of the harvestdor boxes
|
|
@@ -13,12 +13,14 @@ class GDor::Indexer
|
|
|
13
13
|
# when exp_val is a Regexp, looks for String value that matches, or Array with a String member that matches
|
|
14
14
|
# @return [Boolean] true if the field is non-trivially present in the hash, false otherwise
|
|
15
15
|
def field_present?(field, exp_val = nil)
|
|
16
|
-
return false unless include?(field)
|
|
16
|
+
return false unless include?(field)
|
|
17
|
+
return false unless Array(self[field]).any?(&:present?)
|
|
18
|
+
|
|
17
19
|
case exp_val
|
|
18
20
|
when nil
|
|
19
21
|
true
|
|
20
22
|
when Regexp
|
|
21
|
-
Array(self[field]).
|
|
23
|
+
Array(self[field]).any? { |s| exp_val.match(s) }
|
|
22
24
|
else
|
|
23
25
|
Array(self[field]).include? exp_val
|
|
24
26
|
end
|
data/lib/gdor/indexer/version.rb
CHANGED
data/spec/unit/indexer_spec.rb
CHANGED
|
@@ -12,7 +12,7 @@ describe GDor::Indexer do
|
|
|
12
12
|
@pub_xml = "<publicObject id='druid#{@fake_druid}'></publicObject>"
|
|
13
13
|
@ng_pub_xml = Nokogiri::XML("<publicObject id='druid#{@fake_druid}'></publicObject>")
|
|
14
14
|
end
|
|
15
|
-
before
|
|
15
|
+
before do
|
|
16
16
|
@indexer = described_class.new(@config_yml_path) do |config|
|
|
17
17
|
config.whitelist = ['druid:ww121ss5000']
|
|
18
18
|
end
|
|
@@ -69,7 +69,7 @@ describe GDor::Indexer do
|
|
|
69
69
|
end
|
|
70
70
|
|
|
71
71
|
describe '#harvest_and_index' do
|
|
72
|
-
before
|
|
72
|
+
before do
|
|
73
73
|
allow(@indexer.harvestdor).to receive(:each_resource)
|
|
74
74
|
allow(@indexer).to receive(:solr_client).and_return(double(commit!: nil))
|
|
75
75
|
allow(@indexer).to receive(:log_results)
|
|
@@ -246,7 +246,7 @@ describe GDor::Indexer do
|
|
|
246
246
|
end # index_coll_obj_per_config
|
|
247
247
|
|
|
248
248
|
context '#add_coll_info and supporting methods' do
|
|
249
|
-
before
|
|
249
|
+
before do
|
|
250
250
|
@coll_druids_array = [collection]
|
|
251
251
|
end
|
|
252
252
|
let(:doc_hash) { GDor::Indexer::SolrDocHash.new({}) }
|
|
@@ -288,7 +288,7 @@ describe GDor::Indexer do
|
|
|
288
288
|
end
|
|
289
289
|
|
|
290
290
|
context '#coll_display_types_from_items' do
|
|
291
|
-
before
|
|
291
|
+
before do
|
|
292
292
|
@indexer.coll_display_types_from_items(collection)
|
|
293
293
|
end
|
|
294
294
|
it 'gets single item display_type for single collection (and no dups)' do
|
|
@@ -311,7 +311,7 @@ describe GDor::Indexer do
|
|
|
311
311
|
end # add_coll_info
|
|
312
312
|
|
|
313
313
|
context '#num_found_in_solr' do
|
|
314
|
-
before
|
|
314
|
+
before do
|
|
315
315
|
@collection_response = { 'response' => { 'numFound' => '1', 'docs' => [{ 'id' => 'dm212rn7381', 'url_fulltext' => ['https://purl.stanford.edu/dm212rn7381'] }] } }
|
|
316
316
|
@item_response = { 'response' => { 'numFound' => '265', 'docs' => [{ 'id' => 'dm212rn7381' }] } }
|
|
317
317
|
end
|
|
@@ -329,7 +329,7 @@ describe GDor::Indexer do
|
|
|
329
329
|
end # num_found_in_solr
|
|
330
330
|
|
|
331
331
|
context '#email_report_body' do
|
|
332
|
-
before
|
|
332
|
+
before do
|
|
333
333
|
@indexer.config.notification = 'notification-list@example.com'
|
|
334
334
|
allow(@indexer).to receive(:num_found_in_solr).and_return(500)
|
|
335
335
|
allow(@indexer.harvestdor).to receive(:resources).and_return([collection])
|
|
@@ -355,8 +355,8 @@ describe GDor::Indexer do
|
|
|
355
355
|
end
|
|
356
356
|
|
|
357
357
|
it 'email body include validation messages' do
|
|
358
|
-
@indexer.instance_variable_set(:@validation_messages,
|
|
359
|
-
expect(subject).to match
|
|
358
|
+
@indexer.instance_variable_set(:@validation_messages, instance_double(File, rewind: 0, read: 'this is a validation message'))
|
|
359
|
+
expect(subject).to match /this is a validation message/
|
|
360
360
|
end
|
|
361
361
|
|
|
362
362
|
it 'email includes reference to full log' do
|
|
@@ -365,7 +365,7 @@ describe GDor::Indexer do
|
|
|
365
365
|
end
|
|
366
366
|
|
|
367
367
|
describe '#email_results' do
|
|
368
|
-
before
|
|
368
|
+
before do
|
|
369
369
|
@indexer.config.notification = 'notification-list@example.com'
|
|
370
370
|
allow(@indexer).to receive(:send_email)
|
|
371
371
|
allow(@indexer).to receive(:email_report_body).and_return('Report Body')
|
|
@@ -20,7 +20,6 @@ describe GDor::Indexer::ModsFields do
|
|
|
20
20
|
let(:sdb) { sdb_for_mods(mods_xml) }
|
|
21
21
|
|
|
22
22
|
context 'publication date fields' do
|
|
23
|
-
|
|
24
23
|
RSpec.shared_examples 'expected (dateIssued)' do |solr_field_sym, mods_field_val, exp_val|
|
|
25
24
|
it "#{exp_val} for #{mods_field_val}" do
|
|
26
25
|
m = mods_origin_info_start_str +
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: gdor-indexer
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.7.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Naomi Dushay
|
|
@@ -10,7 +10,7 @@ authors:
|
|
|
10
10
|
autorequire:
|
|
11
11
|
bindir: bin
|
|
12
12
|
cert_chain: []
|
|
13
|
-
date: 2016-08-
|
|
13
|
+
date: 2016-08-30 00:00:00.000000000 Z
|
|
14
14
|
dependencies:
|
|
15
15
|
- !ruby/object:Gem::Dependency
|
|
16
16
|
name: harvestdor-indexer
|