search_solr_tools 5.0.1 → 5.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/README.md +3 -3
- data/bin/search_solr_tools +58 -2
- data/lib/search_solr_tools.rb +3 -0
- data/lib/search_solr_tools/errors/harvest_error.rb +88 -0
- data/lib/search_solr_tools/harvesters/auto_suggest.rb +4 -1
- data/lib/search_solr_tools/harvesters/base.rb +57 -9
- data/lib/search_solr_tools/harvesters/nsidc_json.rb +32 -5
- data/lib/search_solr_tools/helpers/harvest_status.rb +44 -0
- data/lib/search_solr_tools/version.rb +1 -1
- data/search_solr_tools.gemspec +1 -1
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a961c7f8cdb3a126f31ba685db351337dda8816f64de94b82aad774f53b49a0c
|
4
|
+
data.tar.gz: bcadb76963b19f66567c3e5a233dd599a66d642073eb5c043b3fc5653ce2502f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1a238ac9143e968252c4f37474c99ed38a6920160f9f3d8ba8753b72848dbf7152ba0bc0b142298141eae6693102481a34f20d7c461e869b1a1606b7a3fcb471
|
7
|
+
data.tar.gz: 0ca3f51c62c0683d58652928ea2bf8a09987cee68d8af691ee523186e1d3ce7dc0f4bec0e294cdb82206eb8ebb5d2b6ad6085bed651b6f0944f979cecb0875bb
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
+
## v5.1.0 (2020-07-23)
|
2
|
+
|
3
|
+
- Added a CLI method to "ping" the Solr and Source servers for a given
|
4
|
+
data center.
|
5
|
+
- Added a CLI method "errcode" to get information about the various
|
6
|
+
error codes that may be returned during harvest
|
7
|
+
- Updated the CLI harvest to return more useful error codes on failure.
|
8
|
+
|
1
9
|
## v5.0.1 (2020-07-02)
|
2
10
|
|
3
11
|
- Bug fix: some requires weren't included that needed to be.
|
data/README.md
CHANGED
@@ -124,9 +124,9 @@ tagging, and publishing to RubyGems.
|
|
124
124
|
|---------------------------|-------------|
|
125
125
|
| `rake release:pre[false]` | Increase the current prerelease version number, push changes |
|
126
126
|
| `rake release:pre[true]` | Increase the current prerelease version number, publish release\* |
|
127
|
-
| `rake release:none` | Drop the prerelease version, publish release
|
128
|
-
| `rake release:minor` | Increase the minor version number, publish release
|
129
|
-
| `rake release:major` | Increase the major version number, publish release
|
127
|
+
| `rake release:none` | Drop the prerelease version, publish release\*, then `pre[false]` (does a patch release) |
|
128
|
+
| `rake release:minor` | Increase the minor version number, publish release\*, then `pre[false]` |
|
129
|
+
| `rake release:major` | Increase the major version number, publish release\*, then `pre[false]` |
|
130
130
|
|
131
131
|
\*"publish release" means each of the following occurs:
|
132
132
|
|
data/bin/search_solr_tools
CHANGED
@@ -4,6 +4,7 @@
|
|
4
4
|
require 'search_solr_tools'
|
5
5
|
require 'thor'
|
6
6
|
|
7
|
+
# rubocop:disable Metrics/AbcSize
|
7
8
|
class SolrHarvestCLI < Thor
|
8
9
|
map %w[--version -v] => :__print_version
|
9
10
|
|
@@ -12,6 +13,48 @@ class SolrHarvestCLI < Thor
|
|
12
13
|
puts SearchSolrTools::VERSION
|
13
14
|
end
|
14
15
|
|
16
|
+
desc 'errcode CODE', 'Print all exit codes bundled in CODE. Omit CODE to print all codes'
|
17
|
+
def errcode(code = -1)
|
18
|
+
codes = SearchSolrTools::Errors::HarvestError.describe_exit_code(code)
|
19
|
+
|
20
|
+
puts 'CODE | DESCRIPTION'
|
21
|
+
puts '-----+------------'
|
22
|
+
codes.each do |c, text|
|
23
|
+
puts format('%4<code>d | %<text>s', code: c, text: text)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
desc 'ping', 'Ping the solr and harvesting endpoints related to the specified data center(s)'
|
28
|
+
option :data_center, type: :array, required: true
|
29
|
+
option :environment, required: true
|
30
|
+
def ping
|
31
|
+
solr_success = true
|
32
|
+
source_success = true
|
33
|
+
options[:data_center].each do |target|
|
34
|
+
begin
|
35
|
+
harvest_class = get_harvester_class(target)
|
36
|
+
harvester = harvest_class.new(options[:environment])
|
37
|
+
solr_status = harvester.ping_solr
|
38
|
+
source_status = harvester.ping_source
|
39
|
+
rescue StandardError => e
|
40
|
+
solr_status = false
|
41
|
+
source_status = false
|
42
|
+
puts "Error trying to ping for #{target}: #{e}"
|
43
|
+
end
|
44
|
+
solr_success &&= solr_status
|
45
|
+
source_success &&= source_status
|
46
|
+
puts "Target: #{target}, Solr ping OK? #{solr_status}, data center ping OK? #{source_status}"
|
47
|
+
end
|
48
|
+
|
49
|
+
ping_status = SearchSolrTools::Helpers::HarvestStatus.new(
|
50
|
+
SearchSolrTools::Helpers::HarvestStatus::PING_SOLR => solr_success,
|
51
|
+
SearchSolrTools::Helpers::HarvestStatus::PING_SOURCE => source_success
|
52
|
+
)
|
53
|
+
raise SearchSolrTools::Errors::HarvestError, ping_status unless ping_status.ok?
|
54
|
+
rescue SearchSolrTools::Errors::HarvestError => e
|
55
|
+
exit e.exit_code
|
56
|
+
end
|
57
|
+
|
15
58
|
desc 'harvest', 'Harvest from the specified data centers'
|
16
59
|
option :data_center, type: :array, required: true
|
17
60
|
option :environment, required: true
|
@@ -22,10 +65,21 @@ class SolrHarvestCLI < Thor
|
|
22
65
|
begin
|
23
66
|
harvest_class = get_harvester_class(target)
|
24
67
|
harvester = harvest_class.new(options[:environment], die_on_failure)
|
68
|
+
ping_status = SearchSolrTools::Helpers::HarvestStatus.new(
|
69
|
+
SearchSolrTools::Helpers::HarvestStatus::PING_SOLR => harvester.ping_solr,
|
70
|
+
SearchSolrTools::Helpers::HarvestStatus::PING_SOURCE => harvester.ping_source
|
71
|
+
)
|
72
|
+
raise SearchSolrTools::Errors::HarvestError, ping_status unless ping_status.ok?
|
73
|
+
|
25
74
|
harvester.harvest_and_delete
|
75
|
+
rescue SearchSolrTools::Errors::HarvestError => e
|
76
|
+
puts "THERE WERE HARVEST STATUS ERRORS:\n#{e.message}"
|
77
|
+
exit e.exit_code
|
26
78
|
rescue StandardError => e
|
79
|
+
# If it gets here, there is an error that we aren't expecting.
|
27
80
|
puts "harvest failed for #{target}: #{e.message}"
|
28
|
-
|
81
|
+
puts e.backtrace
|
82
|
+
exit SearchSolrTools::Errors::HarvestError::ERRCODE_OTHER
|
29
83
|
end
|
30
84
|
end
|
31
85
|
end
|
@@ -85,10 +139,12 @@ class SolrHarvestCLI < Thor
|
|
85
139
|
|
86
140
|
def get_harvester_class(data_center_name)
|
87
141
|
name = data_center_name.downcase.to_s
|
88
|
-
raise("Invalid data center #{name}") unless harvester_map.key?(name)
|
142
|
+
raise SearchSolrTools::Errors::HarvestError.new(nil, "Invalid data center #{name}") unless harvester_map.key?(name)
|
89
143
|
|
90
144
|
harvester_map[name]
|
91
145
|
end
|
92
146
|
end
|
93
147
|
end
|
148
|
+
# rubocop:enable Metrics/AbcSize
|
149
|
+
|
94
150
|
SolrHarvestCLI.start(ARGV)
|
data/lib/search_solr_tools.rb
CHANGED
@@ -2,6 +2,9 @@ require_relative 'search_solr_tools/config/environments'
|
|
2
2
|
require_relative 'search_solr_tools/version'
|
3
3
|
|
4
4
|
require_relative 'search_solr_tools/helpers/selectors'
|
5
|
+
require_relative 'search_solr_tools/helpers/harvest_status'
|
6
|
+
require_relative 'search_solr_tools/errors/harvest_error'
|
7
|
+
|
5
8
|
%w( selectors harvesters translators ).each do |subdir|
|
6
9
|
Dir[File.join(__dir__, 'search_solr_tools', subdir, '*.rb')].each { |file| require file }
|
7
10
|
end
|
@@ -0,0 +1,88 @@
|
|
1
|
+
module SearchSolrTools
|
2
|
+
module Errors
|
3
|
+
class HarvestError < StandardError
|
4
|
+
ERRCODE_SOLR_PING = 1
|
5
|
+
ERRCODE_SOURCE_PING = 2
|
6
|
+
ERRCODE_SOURCE_NO_RESULTS = 4
|
7
|
+
ERRCODE_SOURCE_HARVEST_ERROR = 8
|
8
|
+
ERRCODE_DOCUMENT_INVALID = 16
|
9
|
+
ERRCODE_INGEST_ERROR = 32
|
10
|
+
ERRCODE_OTHER = 128
|
11
|
+
|
12
|
+
ERRCODE_DESC = {
|
13
|
+
ERRCODE_SOLR_PING => 'Solr instance did not return a successful ping',
|
14
|
+
ERRCODE_SOURCE_PING => 'Source to be harvested did not return a successful ping',
|
15
|
+
ERRCODE_SOURCE_NO_RESULTS => 'Source to be harvested returned no documents matching query',
|
16
|
+
ERRCODE_SOURCE_HARVEST_ERROR => 'One or more source documents returned an error when trying to retrieve or translate',
|
17
|
+
ERRCODE_DOCUMENT_INVALID => 'One or more documents to be harvested was invalid (malformed)',
|
18
|
+
ERRCODE_INGEST_ERROR => 'Solr returned an error trying to ingest one or more harvested documents',
|
19
|
+
ERRCODE_OTHER => 'General error code for non-harvest related issues'
|
20
|
+
}.freeze
|
21
|
+
|
22
|
+
PING_ERRCODE_MAP = {
|
23
|
+
'ping_solr' => ERRCODE_SOLR_PING,
|
24
|
+
'ping_source' => ERRCODE_SOURCE_PING,
|
25
|
+
}
|
26
|
+
|
27
|
+
STATUS_ERRCODE_MAP = {
|
28
|
+
Helpers::HarvestStatus::HARVEST_NO_DOCS => ERRCODE_SOURCE_NO_RESULTS,
|
29
|
+
Helpers::HarvestStatus::HARVEST_FAILURE => ERRCODE_SOURCE_HARVEST_ERROR,
|
30
|
+
Helpers::HarvestStatus::INGEST_ERR_INVALID_DOC => ERRCODE_DOCUMENT_INVALID,
|
31
|
+
Helpers::HarvestStatus::INGEST_ERR_SOLR_ERROR => ERRCODE_INGEST_ERROR,
|
32
|
+
Helpers::HarvestStatus::OTHER_ERROR => ERRCODE_OTHER
|
33
|
+
}.freeze
|
34
|
+
|
35
|
+
# If code is -1, it means display all error codes
|
36
|
+
def self.describe_exit_code(code = -1)
|
37
|
+
code = code.to_i
|
38
|
+
code_list = []
|
39
|
+
|
40
|
+
# Loop through all bit-flag values
|
41
|
+
[128, 64, 32, 16, 8, 4, 2, 1].each do |k|
|
42
|
+
if code >= k || code == -1
|
43
|
+
code_list.prepend k
|
44
|
+
code -= k unless code == -1
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
codes = {}
|
49
|
+
code_list.each do |k|
|
50
|
+
next if code == -1 && !ERRCODE_DESC.keys.include?(k) # skip INVALID CODE if showing all codes
|
51
|
+
codes[k] = ERRCODE_DESC.keys.include?(k) ? ERRCODE_DESC[k] : 'INVALID CODE NUMBER'
|
52
|
+
end
|
53
|
+
|
54
|
+
codes
|
55
|
+
end
|
56
|
+
|
57
|
+
def initialize(status, message=nil)
|
58
|
+
@status_data = status
|
59
|
+
@other_message = message
|
60
|
+
end
|
61
|
+
|
62
|
+
def exit_code
|
63
|
+
if @status_data.nil?
|
64
|
+
puts "OTHER ERROR REPORTED: #{@other_message}"
|
65
|
+
return ERRCODE_OTHER
|
66
|
+
end
|
67
|
+
|
68
|
+
puts "EXIT CODE STATUS:\n#{@status_data.status}"
|
69
|
+
|
70
|
+
code = 0
|
71
|
+
code += ERRCODE_SOLR_PING unless @status_data.ping_solr
|
72
|
+
code += ERRCODE_SOURCE_PING unless @status_data.ping_source
|
73
|
+
code += ERRCODE_SOURCE_NO_RESULTS if @status_data.status[Helpers::HarvestStatus::HARVEST_NO_DOCS] > 0
|
74
|
+
code += ERRCODE_SOURCE_HARVEST_ERROR if @status_data.status[Helpers::HarvestStatus::HARVEST_FAILURE] > 0
|
75
|
+
code += ERRCODE_DOCUMENT_INVALID if @status_data.status[Helpers::HarvestStatus::INGEST_ERR_INVALID_DOC] > 0
|
76
|
+
code += ERRCODE_INGEST_ERROR if @status_data.status[Helpers::HarvestStatus::INGEST_ERR_SOLR_ERROR] > 0
|
77
|
+
|
78
|
+
code = ERRCODE_OTHER if code == 0
|
79
|
+
|
80
|
+
code
|
81
|
+
end
|
82
|
+
|
83
|
+
def message
|
84
|
+
self.class.describe_exit_code(exit_code).map{|c,v| v}.join("\n")
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
@@ -46,8 +46,11 @@ module SearchSolrTools
|
|
46
46
|
end
|
47
47
|
|
48
48
|
def add_documents_to_solr(add_docs)
|
49
|
-
|
49
|
+
status = insert_solr_doc add_docs, Base::JSON_CONTENT_TYPE, @env_settings[:auto_suggest_collection_name]
|
50
|
+
|
51
|
+
if status == Helpers::HarvestStatus::INGEST_OK
|
50
52
|
puts "Added #{add_docs.size} auto suggest documents in one commit"
|
53
|
+
return Helpers::HarvestStatus.new(Helpers::HarvestStatus::INGEST_OK => add_docs)
|
51
54
|
else
|
52
55
|
puts "Failed adding #{add_docs.size} documents in single commit, retrying one by one"
|
53
56
|
new_add_docs = []
|
@@ -39,10 +39,40 @@ module SearchSolrTools
|
|
39
39
|
url
|
40
40
|
end
|
41
41
|
|
42
|
+
# Ping the Solr instance to ensure that it's running.
|
43
|
+
# The ping query is specified to manually check the title, as it's possible
|
44
|
+
# there is no "default" query in the solr instance.
|
45
|
+
def ping_solr(core = SolrEnvironments[@environment][:collection_name])
|
46
|
+
url = solr_url + "/#{core}/admin/ping?df=title"
|
47
|
+
success = false
|
48
|
+
|
49
|
+
# Some docs will cause solr to time out during the POST
|
50
|
+
begin
|
51
|
+
RestClient.get(url) do |response, _request, _result|
|
52
|
+
success = response.code == 200
|
53
|
+
puts "Error in ping request: #{response.body}" unless success
|
54
|
+
end
|
55
|
+
rescue => e
|
56
|
+
puts "Rest exception while pinging Solr: #{e}"
|
57
|
+
end
|
58
|
+
success
|
59
|
+
end
|
60
|
+
|
61
|
+
# This should be overridden by child classes to implement the ability
|
62
|
+
# to "ping" the data center. Returns true if the ping is successful (or, as
|
63
|
+
# in this default, no ping method was defined)
|
64
|
+
def ping_source
|
65
|
+
puts "Harvester does not have ping method defined, assuming true"
|
66
|
+
true
|
67
|
+
end
|
68
|
+
|
42
69
|
def harvest_and_delete(harvest_method, delete_constraints, solr_core = SolrEnvironments[@environment][:collection_name])
|
43
70
|
start_time = Time.now.utc.iso8601
|
44
|
-
|
71
|
+
|
72
|
+
harvest_status = harvest_method.call
|
45
73
|
delete_old_documents start_time, delete_constraints, solr_core
|
74
|
+
|
75
|
+
harvest_status
|
46
76
|
end
|
47
77
|
|
48
78
|
def delete_old_documents(timestamp, constraints, solr_core, force = false)
|
@@ -83,21 +113,31 @@ module SearchSolrTools
|
|
83
113
|
def insert_solr_docs(docs, content_type = XML_CONTENT_TYPE, core = SolrEnvironments[@environment][:collection_name])
|
84
114
|
success = 0
|
85
115
|
failure = 0
|
116
|
+
|
117
|
+
status = Helpers::HarvestStatus.new
|
118
|
+
|
86
119
|
docs.each do |doc|
|
87
|
-
insert_solr_doc(doc, content_type, core)
|
120
|
+
doc_status = insert_solr_doc(doc, content_type, core)
|
121
|
+
status.record_status doc_status
|
122
|
+
doc_status == Helpers::HarvestStatus::INGEST_OK ? success += 1 : failure += 1
|
88
123
|
end
|
89
124
|
puts "#{success} document#{success == 1 ? '' : 's'} successfully added to Solr."
|
90
125
|
puts "#{failure} document#{failure == 1 ? '' : 's'} not added to Solr."
|
91
|
-
|
126
|
+
|
127
|
+
status
|
92
128
|
end
|
93
129
|
|
130
|
+
# TODO Need to return a specific type of failure:
|
131
|
+
# - Bad record content identified and no ingest attempted
|
132
|
+
# - Solr tries to ingest document and fails (bad content not detected prior to ingest)
|
133
|
+
# - Solr cannot insert document for reasons other than the document structure and content.
|
94
134
|
def insert_solr_doc(doc, content_type = XML_CONTENT_TYPE, core = SolrEnvironments[@environment][:collection_name])
|
95
135
|
url = solr_url + "/#{core}/update?commit=true"
|
96
|
-
|
136
|
+
status = Helpers::HarvestStatus::INGEST_OK
|
97
137
|
|
98
138
|
# Some of the docs will cause Solr to crash - CPU goes to 195% with `top` and it
|
99
139
|
# doesn't seem to recover.
|
100
|
-
return
|
140
|
+
return Helpers::HarvestStatus::INGEST_ERR_INVALID_DOC if content_type == XML_CONTENT_TYPE && !doc_valid?(doc)
|
101
141
|
|
102
142
|
doc_serialized = get_serialized_doc(doc, content_type)
|
103
143
|
|
@@ -105,13 +145,18 @@ module SearchSolrTools
|
|
105
145
|
begin
|
106
146
|
RestClient.post(url, doc_serialized, content_type: content_type) do |response, _request, _result|
|
107
147
|
success = response.code == 200
|
108
|
-
|
148
|
+
unless success
|
149
|
+
puts "Error for #{doc_serialized}\n\n response: #{response.body}"
|
150
|
+
status = Helpers::HarvestStatus::INGEST_ERR_SOLR_ERROR
|
151
|
+
end
|
109
152
|
end
|
110
153
|
rescue => e
|
154
|
+
# TODO Need to provide more detail re: this failure so we know whether to
|
155
|
+
# exit the job with a status != 0
|
111
156
|
puts "Rest exception while POSTing to Solr: #{e}, for doc: #{doc_serialized}"
|
157
|
+
status = Helpers::HarvestStatus::INGEST_ERR_SOLR_ERROR
|
112
158
|
end
|
113
|
-
|
114
|
-
success
|
159
|
+
status
|
115
160
|
end
|
116
161
|
|
117
162
|
def get_serialized_doc(doc, content_type)
|
@@ -124,7 +169,7 @@ module SearchSolrTools
|
|
124
169
|
end
|
125
170
|
end
|
126
171
|
|
127
|
-
# Get results from
|
172
|
+
# Get results from an end point specified in the request_url
|
128
173
|
def get_results(request_url, metadata_path, content_type = 'application/xml')
|
129
174
|
timeout = 300
|
130
175
|
retries_left = 3
|
@@ -140,6 +185,9 @@ module SearchSolrTools
|
|
140
185
|
|
141
186
|
retry if retries_left > 0
|
142
187
|
|
188
|
+
# TODO - Do we really need this "die_on_failure" anymore? The empty return
|
189
|
+
# will cause the "No Documents" error to be thrown in the harvester class
|
190
|
+
# now, so it will pretty much always "die on failure"
|
143
191
|
raise e if @die_on_failure
|
144
192
|
return
|
145
193
|
end
|
@@ -3,6 +3,7 @@ require 'rest-client'
|
|
3
3
|
|
4
4
|
require 'search_solr_tools'
|
5
5
|
|
6
|
+
|
6
7
|
module SearchSolrTools
|
7
8
|
module Harvesters
|
8
9
|
# Harvests data from NSIDC OAI and inserts it into Solr after it has been translated
|
@@ -13,6 +14,17 @@ module SearchSolrTools
|
|
13
14
|
Helpers::FacetConfiguration.import_bin_configuration(env)
|
14
15
|
end
|
15
16
|
|
17
|
+
def ping_source
|
18
|
+
begin
|
19
|
+
RestClient.options(nsidc_json_url) do |response, _request, _result|
|
20
|
+
return response.code == 200
|
21
|
+
end
|
22
|
+
rescue => e
|
23
|
+
puts "Error trying to get options for #{nsidc_json_url} (ping)"
|
24
|
+
end
|
25
|
+
false
|
26
|
+
end
|
27
|
+
|
16
28
|
def harvest_and_delete
|
17
29
|
puts "Running harvest of NSIDC catalog from #{nsidc_json_url}"
|
18
30
|
super(method(:harvest_nsidc_json_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:NSIDC][:long_name]}\"")
|
@@ -22,8 +34,22 @@ module SearchSolrTools
|
|
22
34
|
# this is the main entry point for the class
|
23
35
|
def harvest_nsidc_json_into_solr
|
24
36
|
result = docs_with_translated_entries_from_nsidc
|
25
|
-
|
26
|
-
|
37
|
+
|
38
|
+
status = insert_solr_docs result[:add_docs], Base::JSON_CONTENT_TYPE
|
39
|
+
|
40
|
+
status.record_status(Helpers::HarvestStatus::HARVEST_NO_DOCS) if result[:num_docs] == 0
|
41
|
+
|
42
|
+
# Record the number of harvest failures; note that if this is 0, thats OK, the status will stay at 0
|
43
|
+
status.record_status(Helpers::HarvestStatus::HARVEST_FAILURE, result[:failure_ids].length)
|
44
|
+
|
45
|
+
raise Errors::HarvestError, status unless status.ok?
|
46
|
+
rescue Errors::HarvestError => e
|
47
|
+
raise e
|
48
|
+
rescue StandardError => e
|
49
|
+
puts "An unexpected exception occurred while trying to harvest or insert: #{e}"
|
50
|
+
puts e.backtrace
|
51
|
+
status = Helpers::HarvestStatus.new(Helpers::HarvestStatus::OTHER_ERROR => e)
|
52
|
+
raise Errors::HarvestError, status
|
27
53
|
end
|
28
54
|
|
29
55
|
def nsidc_json_url
|
@@ -33,7 +59,7 @@ module SearchSolrTools
|
|
33
59
|
def result_ids_from_nsidc
|
34
60
|
url = SolrEnvironments[@environment][:nsidc_dataset_metadata_url] +
|
35
61
|
SolrEnvironments[@environment][:nsidc_oai_identifiers_url]
|
36
|
-
get_results
|
62
|
+
get_results(url, '//xmlns:identifier') || []
|
37
63
|
end
|
38
64
|
|
39
65
|
# Fetch a JSON representation of a dataset's metadata
|
@@ -48,7 +74,8 @@ module SearchSolrTools
|
|
48
74
|
docs = []
|
49
75
|
failure_ids = []
|
50
76
|
|
51
|
-
|
77
|
+
all_docs = result_ids_from_nsidc
|
78
|
+
all_docs.each do |r|
|
52
79
|
# Each result looks like:
|
53
80
|
# oai:nsidc.org/AE_L2A
|
54
81
|
id = r.text.split('/').last
|
@@ -60,7 +87,7 @@ module SearchSolrTools
|
|
60
87
|
end
|
61
88
|
end
|
62
89
|
|
63
|
-
{ add_docs: docs, failure_ids: failure_ids }
|
90
|
+
{ num_docs: all_docs.size, add_docs: docs, failure_ids: failure_ids }
|
64
91
|
end
|
65
92
|
end
|
66
93
|
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
module SearchSolrTools
|
2
|
+
module Helpers
|
3
|
+
class HarvestStatus
|
4
|
+
INGEST_OK = :ok
|
5
|
+
HARVEST_NO_DOCS = :harvest_none
|
6
|
+
HARVEST_FAILURE = :harvest_fail
|
7
|
+
INGEST_ERR_INVALID_DOC = :invalid
|
8
|
+
INGEST_ERR_SOLR_ERROR = :solr_error
|
9
|
+
OTHER_ERROR = :other
|
10
|
+
PING_SOLR = :ping_solr # used for initialize only
|
11
|
+
PING_SOURCE = :ping_source # used for initialize only
|
12
|
+
|
13
|
+
ERROR_STATUS = [HARVEST_NO_DOCS, HARVEST_FAILURE, INGEST_ERR_INVALID_DOC, INGEST_ERR_SOLR_ERROR, OTHER_ERROR]
|
14
|
+
|
15
|
+
attr_reader :status, :ping_solr, :ping_source
|
16
|
+
attr_writer :ping_solr, :ping_source
|
17
|
+
|
18
|
+
# init_info is an optional hash that contains the various status keys and the documents to
|
19
|
+
# associate with them
|
20
|
+
def initialize(init_info={})
|
21
|
+
@status = { INGEST_OK => 0 }
|
22
|
+
@ping_solr = true
|
23
|
+
@ping_source = true
|
24
|
+
ERROR_STATUS.each { |s| @status[s] = 0 }
|
25
|
+
|
26
|
+
init_info.each do |key, count|
|
27
|
+
@status[key] = count if @status.include? key
|
28
|
+
end
|
29
|
+
|
30
|
+
@ping_solr = init_info[PING_SOLR] if init_info.include? PING_SOLR
|
31
|
+
@ping_source = init_info[PING_SOURCE] if init_info.include? PING_SOURCE
|
32
|
+
end
|
33
|
+
|
34
|
+
def record_status(status, count = 1)
|
35
|
+
@status[status] += count
|
36
|
+
end
|
37
|
+
|
38
|
+
def ok?
|
39
|
+
ERROR_STATUS.each { |s| return false unless @status[s] == 0 }
|
40
|
+
@ping_solr && @ping_source
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
data/search_solr_tools.gemspec
CHANGED
@@ -16,7 +16,7 @@ Gem::Specification.new do |spec|
|
|
16
16
|
metadata into a working solr instance.
|
17
17
|
EOF
|
18
18
|
spec.homepage = 'https://github.com/nsidc/search-solr-tools'
|
19
|
-
spec.license = '
|
19
|
+
spec.license = 'GPL-3.0-or-later'
|
20
20
|
|
21
21
|
spec.files = `git ls-files -z #{gem_files}`.split("\x0")
|
22
22
|
spec.executables = spec.files.grep(/^bin\//) { |f| File.basename(f) }
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: search_solr_tools
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.0
|
4
|
+
version: 5.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Chris Chalstrom
|
@@ -14,7 +14,7 @@ authors:
|
|
14
14
|
autorequire:
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
|
-
date: 2020-07-
|
17
|
+
date: 2020-07-23 00:00:00.000000000 Z
|
18
18
|
dependencies:
|
19
19
|
- !ruby/object:Gem::Dependency
|
20
20
|
name: ffi-geos
|
@@ -292,6 +292,7 @@ files:
|
|
292
292
|
- lib/search_solr_tools.rb
|
293
293
|
- lib/search_solr_tools/config/environments.rb
|
294
294
|
- lib/search_solr_tools/config/environments.yaml
|
295
|
+
- lib/search_solr_tools/errors/harvest_error.rb
|
295
296
|
- lib/search_solr_tools/harvesters/adc.rb
|
296
297
|
- lib/search_solr_tools/harvesters/ade_auto_suggest.rb
|
297
298
|
- lib/search_solr_tools/harvesters/auto_suggest.rb
|
@@ -317,6 +318,7 @@ files:
|
|
317
318
|
- lib/search_solr_tools/helpers/csw_iso_query_builder.rb
|
318
319
|
- lib/search_solr_tools/helpers/data_one_format.rb
|
319
320
|
- lib/search_solr_tools/helpers/facet_configuration.rb
|
321
|
+
- lib/search_solr_tools/helpers/harvest_status.rb
|
320
322
|
- lib/search_solr_tools/helpers/iso_namespaces.rb
|
321
323
|
- lib/search_solr_tools/helpers/iso_to_solr.rb
|
322
324
|
- lib/search_solr_tools/helpers/iso_to_solr_format.rb
|
@@ -349,7 +351,7 @@ files:
|
|
349
351
|
- search_solr_tools.gemspec
|
350
352
|
homepage: https://github.com/nsidc/search-solr-tools
|
351
353
|
licenses:
|
352
|
-
-
|
354
|
+
- GPL-3.0-or-later
|
353
355
|
metadata: {}
|
354
356
|
post_install_message:
|
355
357
|
rdoc_options: []
|