search_solr_tools 5.0.0 → 5.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +19 -1
- data/README.md +3 -3
- data/bin/search_solr_tools +58 -2
- data/lib/search_solr_tools/config/environments.yaml +1 -1
- data/lib/search_solr_tools/errors/harvest_error.rb +88 -0
- data/lib/search_solr_tools/harvesters/auto_suggest.rb +4 -1
- data/lib/search_solr_tools/harvesters/base.rb +63 -9
- data/lib/search_solr_tools/harvesters/ices.rb +2 -0
- data/lib/search_solr_tools/harvesters/ncdc_paleo.rb +2 -0
- data/lib/search_solr_tools/harvesters/nodc.rb +1 -0
- data/lib/search_solr_tools/harvesters/nsidc_json.rb +32 -5
- data/lib/search_solr_tools/harvesters/oai.rb +1 -0
- data/lib/search_solr_tools/harvesters/usgs.rb +1 -0
- data/lib/search_solr_tools/helpers/harvest_status.rb +44 -0
- data/lib/search_solr_tools/translators/bcodmo_json.rb +3 -0
- data/lib/search_solr_tools/translators/eol_to_solr.rb +4 -0
- data/lib/search_solr_tools/translators/gtnp_json.rb +3 -0
- data/lib/search_solr_tools/translators/nsidc_json.rb +3 -0
- data/lib/search_solr_tools/version.rb +1 -1
- data/lib/search_solr_tools.rb +3 -0
- data/search_solr_tools.gemspec +1 -1
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9b3fc4d94ab6cb6b394ebff1a2c50fb36908d8201199e0702d613ead1cc91621
|
4
|
+
data.tar.gz: 8c8e9a764304e2c09e05ee88f28e4f388a846d7e2e61a23b12b123167affdc1b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e812f59ba6e2776db5b52ecff4a38121b815f645d632812a88010b57f3c34013d00596b8305f9e5e29464f33259ee072b62c4fc94b084ed63ca1e13f4d59b8a9
|
7
|
+
data.tar.gz: 65570df8008e4fd9ea82c492a88cfa0ec9805ff28b688b43e222ca588e969b87d8264be33c3a2cf5034af0788cd496615698fc1ea863df0ba6fef65032e2e755
|
data/CHANGELOG.md
CHANGED
@@ -1,4 +1,22 @@
|
|
1
|
-
##
|
1
|
+
## v5.2.0 (2022-08-31)
|
2
|
+
|
3
|
+
- Updated the call for identifiers for the json harvester to use the
|
4
|
+
proper "metadataPrefix" parameter, and request the dif identifiers
|
5
|
+
instead of iso.
|
6
|
+
|
7
|
+
## v5.1.0 (2020-07-23)
|
8
|
+
|
9
|
+
- Added a CLI method to "ping" the Solr and Source servers for a given
|
10
|
+
data center.
|
11
|
+
- Added a CLI method "errcode" to get information about the various
|
12
|
+
error codes that may be returned during harvest
|
13
|
+
- Updated the CLI harvest to return more useful error codes on failure.
|
14
|
+
|
15
|
+
## v5.0.1 (2020-07-02)
|
16
|
+
|
17
|
+
- Bug fix: some requires weren't included that needed to be.
|
18
|
+
|
19
|
+
## v5.0.0 (2020-07-02)
|
2
20
|
|
3
21
|
- Update Ruby to 2.6.5, update gem dependencies to more recent version.
|
4
22
|
- Updates to correspond with an update to Solr 8.5.2
|
data/README.md
CHANGED
@@ -124,9 +124,9 @@ tagging, and publishing to RubyGems.
|
|
124
124
|
|---------------------------|-------------|
|
125
125
|
| `rake release:pre[false]` | Increase the current prerelease version number, push changes |
|
126
126
|
| `rake release:pre[true]` | Increase the current prerelease version number, publish release\* |
|
127
|
-
| `rake release:none` | Drop the prerelease version, publish release
|
128
|
-
| `rake release:minor` | Increase the minor version number, publish release
|
129
|
-
| `rake release:major` | Increase the major version number, publish release
|
127
|
+
| `rake release:none` | Drop the prerelease version, publish release\*, then `pre[false]` (does a patch release) |
|
128
|
+
| `rake release:minor` | Increase the minor version number, publish release\*, then `pre[false]` |
|
129
|
+
| `rake release:major` | Increase the major version number, publish release\*, then `pre[false]` |
|
130
130
|
|
131
131
|
\*"publish release" means each of the following occurs:
|
132
132
|
|
data/bin/search_solr_tools
CHANGED
@@ -4,6 +4,7 @@
|
|
4
4
|
require 'search_solr_tools'
|
5
5
|
require 'thor'
|
6
6
|
|
7
|
+
# rubocop:disable Metrics/AbcSize
|
7
8
|
class SolrHarvestCLI < Thor
|
8
9
|
map %w[--version -v] => :__print_version
|
9
10
|
|
@@ -12,6 +13,48 @@ class SolrHarvestCLI < Thor
|
|
12
13
|
puts SearchSolrTools::VERSION
|
13
14
|
end
|
14
15
|
|
16
|
+
desc 'errcode CODE', 'Print all exit codes bundled in CODE. Omit CODE to print all codes'
|
17
|
+
def errcode(code = -1)
|
18
|
+
codes = SearchSolrTools::Errors::HarvestError.describe_exit_code(code)
|
19
|
+
|
20
|
+
puts 'CODE | DESCRIPTION'
|
21
|
+
puts '-----+------------'
|
22
|
+
codes.each do |c, text|
|
23
|
+
puts format('%4<code>d | %<text>s', code: c, text: text)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
desc 'ping', 'Ping the solr and harvesting endpoints related to the specified data center(s)'
|
28
|
+
option :data_center, type: :array, required: true
|
29
|
+
option :environment, required: true
|
30
|
+
def ping
|
31
|
+
solr_success = true
|
32
|
+
source_success = true
|
33
|
+
options[:data_center].each do |target|
|
34
|
+
begin
|
35
|
+
harvest_class = get_harvester_class(target)
|
36
|
+
harvester = harvest_class.new(options[:environment])
|
37
|
+
solr_status = harvester.ping_solr
|
38
|
+
source_status = harvester.ping_source
|
39
|
+
rescue StandardError => e
|
40
|
+
solr_status = false
|
41
|
+
source_status = false
|
42
|
+
puts "Error trying to ping for #{target}: #{e}"
|
43
|
+
end
|
44
|
+
solr_success &&= solr_status
|
45
|
+
source_success &&= source_status
|
46
|
+
puts "Target: #{target}, Solr ping OK? #{solr_status}, data center ping OK? #{source_status}"
|
47
|
+
end
|
48
|
+
|
49
|
+
ping_status = SearchSolrTools::Helpers::HarvestStatus.new(
|
50
|
+
SearchSolrTools::Helpers::HarvestStatus::PING_SOLR => solr_success,
|
51
|
+
SearchSolrTools::Helpers::HarvestStatus::PING_SOURCE => source_success
|
52
|
+
)
|
53
|
+
raise SearchSolrTools::Errors::HarvestError, ping_status unless ping_status.ok?
|
54
|
+
rescue SearchSolrTools::Errors::HarvestError => e
|
55
|
+
exit e.exit_code
|
56
|
+
end
|
57
|
+
|
15
58
|
desc 'harvest', 'Harvest from the specified data centers'
|
16
59
|
option :data_center, type: :array, required: true
|
17
60
|
option :environment, required: true
|
@@ -22,10 +65,21 @@ class SolrHarvestCLI < Thor
|
|
22
65
|
begin
|
23
66
|
harvest_class = get_harvester_class(target)
|
24
67
|
harvester = harvest_class.new(options[:environment], die_on_failure)
|
68
|
+
ping_status = SearchSolrTools::Helpers::HarvestStatus.new(
|
69
|
+
SearchSolrTools::Helpers::HarvestStatus::PING_SOLR => harvester.ping_solr,
|
70
|
+
SearchSolrTools::Helpers::HarvestStatus::PING_SOURCE => harvester.ping_source
|
71
|
+
)
|
72
|
+
raise SearchSolrTools::Errors::HarvestError, ping_status unless ping_status.ok?
|
73
|
+
|
25
74
|
harvester.harvest_and_delete
|
75
|
+
rescue SearchSolrTools::Errors::HarvestError => e
|
76
|
+
puts "THERE WERE HARVEST STATUS ERRORS:\n#{e.message}"
|
77
|
+
exit e.exit_code
|
26
78
|
rescue StandardError => e
|
79
|
+
# If it gets here, there is an error that we aren't expecting.
|
27
80
|
puts "harvest failed for #{target}: #{e.message}"
|
28
|
-
|
81
|
+
puts e.backtrace
|
82
|
+
exit SearchSolrTools::Errors::HarvestError::ERRCODE_OTHER
|
29
83
|
end
|
30
84
|
end
|
31
85
|
end
|
@@ -85,10 +139,12 @@ class SolrHarvestCLI < Thor
|
|
85
139
|
|
86
140
|
def get_harvester_class(data_center_name)
|
87
141
|
name = data_center_name.downcase.to_s
|
88
|
-
raise("Invalid data center #{name}") unless harvester_map.key?(name)
|
142
|
+
raise SearchSolrTools::Errors::HarvestError.new(nil, "Invalid data center #{name}") unless harvester_map.key?(name)
|
89
143
|
|
90
144
|
harvester_map[name]
|
91
145
|
end
|
92
146
|
end
|
93
147
|
end
|
148
|
+
# rubocop:enable Metrics/AbcSize
|
149
|
+
|
94
150
|
SolrHarvestCLI.start(ARGV)
|
@@ -39,7 +39,7 @@
|
|
39
39
|
# Not using DCS API v2 here because not all retired datasets have their "retired"
|
40
40
|
# flag checked. For example, GLA01.033 is retired; GLA01.018 is not, but it
|
41
41
|
# should be. GLA01.018 will show up if we use DCS API v2.
|
42
|
-
:nsidc_oai_identifiers_url: oai?verb=ListIdentifiers&
|
42
|
+
:nsidc_oai_identifiers_url: oai?verb=ListIdentifiers&metadataPrefix=dif&retired=false
|
43
43
|
|
44
44
|
:local:
|
45
45
|
:host: localhost
|
@@ -0,0 +1,88 @@
|
|
1
|
+
module SearchSolrTools
|
2
|
+
module Errors
|
3
|
+
class HarvestError < StandardError
|
4
|
+
ERRCODE_SOLR_PING = 1
|
5
|
+
ERRCODE_SOURCE_PING = 2
|
6
|
+
ERRCODE_SOURCE_NO_RESULTS = 4
|
7
|
+
ERRCODE_SOURCE_HARVEST_ERROR = 8
|
8
|
+
ERRCODE_DOCUMENT_INVALID = 16
|
9
|
+
ERRCODE_INGEST_ERROR = 32
|
10
|
+
ERRCODE_OTHER = 128
|
11
|
+
|
12
|
+
ERRCODE_DESC = {
|
13
|
+
ERRCODE_SOLR_PING => 'Solr instance did not return a successful ping',
|
14
|
+
ERRCODE_SOURCE_PING => 'Source to be harvested did not return a successful ping',
|
15
|
+
ERRCODE_SOURCE_NO_RESULTS => 'Source to be harvested returned no documents matching query',
|
16
|
+
ERRCODE_SOURCE_HARVEST_ERROR => 'One or more source documents returned an error when trying to retrieve or translate',
|
17
|
+
ERRCODE_DOCUMENT_INVALID => 'One or more documents to be harvested was invalid (malformed)',
|
18
|
+
ERRCODE_INGEST_ERROR => 'Solr returned an error trying to ingest one or more harvested documents',
|
19
|
+
ERRCODE_OTHER => 'General error code for non-harvest related issues'
|
20
|
+
}.freeze
|
21
|
+
|
22
|
+
PING_ERRCODE_MAP = {
|
23
|
+
'ping_solr' => ERRCODE_SOLR_PING,
|
24
|
+
'ping_source' => ERRCODE_SOURCE_PING,
|
25
|
+
}
|
26
|
+
|
27
|
+
STATUS_ERRCODE_MAP = {
|
28
|
+
Helpers::HarvestStatus::HARVEST_NO_DOCS => ERRCODE_SOURCE_NO_RESULTS,
|
29
|
+
Helpers::HarvestStatus::HARVEST_FAILURE => ERRCODE_SOURCE_HARVEST_ERROR,
|
30
|
+
Helpers::HarvestStatus::INGEST_ERR_INVALID_DOC => ERRCODE_DOCUMENT_INVALID,
|
31
|
+
Helpers::HarvestStatus::INGEST_ERR_SOLR_ERROR => ERRCODE_INGEST_ERROR,
|
32
|
+
Helpers::HarvestStatus::OTHER_ERROR => ERRCODE_OTHER
|
33
|
+
}.freeze
|
34
|
+
|
35
|
+
# If code is -1, it means display all error codes
|
36
|
+
def self.describe_exit_code(code = -1)
|
37
|
+
code = code.to_i
|
38
|
+
code_list = []
|
39
|
+
|
40
|
+
# Loop through all bit-flag values
|
41
|
+
[128, 64, 32, 16, 8, 4, 2, 1].each do |k|
|
42
|
+
if code >= k || code == -1
|
43
|
+
code_list.prepend k
|
44
|
+
code -= k unless code == -1
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
codes = {}
|
49
|
+
code_list.each do |k|
|
50
|
+
next if code == -1 && !ERRCODE_DESC.keys.include?(k) # skip INVALID CODE if showing all codes
|
51
|
+
codes[k] = ERRCODE_DESC.keys.include?(k) ? ERRCODE_DESC[k] : 'INVALID CODE NUMBER'
|
52
|
+
end
|
53
|
+
|
54
|
+
codes
|
55
|
+
end
|
56
|
+
|
57
|
+
def initialize(status, message=nil)
|
58
|
+
@status_data = status
|
59
|
+
@other_message = message
|
60
|
+
end
|
61
|
+
|
62
|
+
def exit_code
|
63
|
+
if @status_data.nil?
|
64
|
+
puts "OTHER ERROR REPORTED: #{@other_message}"
|
65
|
+
return ERRCODE_OTHER
|
66
|
+
end
|
67
|
+
|
68
|
+
puts "EXIT CODE STATUS:\n#{@status_data.status}"
|
69
|
+
|
70
|
+
code = 0
|
71
|
+
code += ERRCODE_SOLR_PING unless @status_data.ping_solr
|
72
|
+
code += ERRCODE_SOURCE_PING unless @status_data.ping_source
|
73
|
+
code += ERRCODE_SOURCE_NO_RESULTS if @status_data.status[Helpers::HarvestStatus::HARVEST_NO_DOCS] > 0
|
74
|
+
code += ERRCODE_SOURCE_HARVEST_ERROR if @status_data.status[Helpers::HarvestStatus::HARVEST_FAILURE] > 0
|
75
|
+
code += ERRCODE_DOCUMENT_INVALID if @status_data.status[Helpers::HarvestStatus::INGEST_ERR_INVALID_DOC] > 0
|
76
|
+
code += ERRCODE_INGEST_ERROR if @status_data.status[Helpers::HarvestStatus::INGEST_ERR_SOLR_ERROR] > 0
|
77
|
+
|
78
|
+
code = ERRCODE_OTHER if code == 0
|
79
|
+
|
80
|
+
code
|
81
|
+
end
|
82
|
+
|
83
|
+
def message
|
84
|
+
self.class.describe_exit_code(exit_code).map{|c,v| v}.join("\n")
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
@@ -46,8 +46,11 @@ module SearchSolrTools
|
|
46
46
|
end
|
47
47
|
|
48
48
|
def add_documents_to_solr(add_docs)
|
49
|
-
|
49
|
+
status = insert_solr_doc add_docs, Base::JSON_CONTENT_TYPE, @env_settings[:auto_suggest_collection_name]
|
50
|
+
|
51
|
+
if status == Helpers::HarvestStatus::INGEST_OK
|
50
52
|
puts "Added #{add_docs.size} auto suggest documents in one commit"
|
53
|
+
return Helpers::HarvestStatus.new(Helpers::HarvestStatus::INGEST_OK => add_docs)
|
51
54
|
else
|
52
55
|
puts "Failed adding #{add_docs.size} documents in single commit, retrying one by one"
|
53
56
|
new_add_docs = []
|
@@ -5,6 +5,12 @@ require 'rest-client'
|
|
5
5
|
require 'rsolr'
|
6
6
|
require 'time'
|
7
7
|
|
8
|
+
require 'search_solr_tools'
|
9
|
+
require_relative '../helpers/iso_namespaces'
|
10
|
+
require_relative '../helpers/solr_format'
|
11
|
+
require_relative '../helpers/iso_to_solr'
|
12
|
+
|
13
|
+
|
8
14
|
module SearchSolrTools
|
9
15
|
module Harvesters
|
10
16
|
# base class for solr harvesters
|
@@ -33,10 +39,40 @@ module SearchSolrTools
|
|
33
39
|
url
|
34
40
|
end
|
35
41
|
|
42
|
+
# Ping the Solr instance to ensure that it's running.
|
43
|
+
# The ping query is specified to manually check the title, as it's possible
|
44
|
+
# there is no "default" query in the solr instance.
|
45
|
+
def ping_solr(core = SolrEnvironments[@environment][:collection_name])
|
46
|
+
url = solr_url + "/#{core}/admin/ping?df=title"
|
47
|
+
success = false
|
48
|
+
|
49
|
+
# Some docs will cause solr to time out during the POST
|
50
|
+
begin
|
51
|
+
RestClient.get(url) do |response, _request, _result|
|
52
|
+
success = response.code == 200
|
53
|
+
puts "Error in ping request: #{response.body}" unless success
|
54
|
+
end
|
55
|
+
rescue => e
|
56
|
+
puts "Rest exception while pinging Solr: #{e}"
|
57
|
+
end
|
58
|
+
success
|
59
|
+
end
|
60
|
+
|
61
|
+
# This should be overridden by child classes to implement the ability
|
62
|
+
# to "ping" the data center. Returns true if the ping is successful (or, as
|
63
|
+
# in this default, no ping method was defined)
|
64
|
+
def ping_source
|
65
|
+
puts "Harvester does not have ping method defined, assuming true"
|
66
|
+
true
|
67
|
+
end
|
68
|
+
|
36
69
|
def harvest_and_delete(harvest_method, delete_constraints, solr_core = SolrEnvironments[@environment][:collection_name])
|
37
70
|
start_time = Time.now.utc.iso8601
|
38
|
-
|
71
|
+
|
72
|
+
harvest_status = harvest_method.call
|
39
73
|
delete_old_documents start_time, delete_constraints, solr_core
|
74
|
+
|
75
|
+
harvest_status
|
40
76
|
end
|
41
77
|
|
42
78
|
def delete_old_documents(timestamp, constraints, solr_core, force = false)
|
@@ -77,21 +113,31 @@ module SearchSolrTools
|
|
77
113
|
def insert_solr_docs(docs, content_type = XML_CONTENT_TYPE, core = SolrEnvironments[@environment][:collection_name])
|
78
114
|
success = 0
|
79
115
|
failure = 0
|
116
|
+
|
117
|
+
status = Helpers::HarvestStatus.new
|
118
|
+
|
80
119
|
docs.each do |doc|
|
81
|
-
insert_solr_doc(doc, content_type, core)
|
120
|
+
doc_status = insert_solr_doc(doc, content_type, core)
|
121
|
+
status.record_status doc_status
|
122
|
+
doc_status == Helpers::HarvestStatus::INGEST_OK ? success += 1 : failure += 1
|
82
123
|
end
|
83
124
|
puts "#{success} document#{success == 1 ? '' : 's'} successfully added to Solr."
|
84
125
|
puts "#{failure} document#{failure == 1 ? '' : 's'} not added to Solr."
|
85
|
-
|
126
|
+
|
127
|
+
status
|
86
128
|
end
|
87
129
|
|
130
|
+
# TODO Need to return a specific type of failure:
|
131
|
+
# - Bad record content identified and no ingest attempted
|
132
|
+
# - Solr tries to ingest document and fails (bad content not detected prior to ingest)
|
133
|
+
# - Solr cannot insert document for reasons other than the document structure and content.
|
88
134
|
def insert_solr_doc(doc, content_type = XML_CONTENT_TYPE, core = SolrEnvironments[@environment][:collection_name])
|
89
135
|
url = solr_url + "/#{core}/update?commit=true"
|
90
|
-
|
136
|
+
status = Helpers::HarvestStatus::INGEST_OK
|
91
137
|
|
92
138
|
# Some of the docs will cause Solr to crash - CPU goes to 195% with `top` and it
|
93
139
|
# doesn't seem to recover.
|
94
|
-
return
|
140
|
+
return Helpers::HarvestStatus::INGEST_ERR_INVALID_DOC if content_type == XML_CONTENT_TYPE && !doc_valid?(doc)
|
95
141
|
|
96
142
|
doc_serialized = get_serialized_doc(doc, content_type)
|
97
143
|
|
@@ -99,13 +145,18 @@ module SearchSolrTools
|
|
99
145
|
begin
|
100
146
|
RestClient.post(url, doc_serialized, content_type: content_type) do |response, _request, _result|
|
101
147
|
success = response.code == 200
|
102
|
-
|
148
|
+
unless success
|
149
|
+
puts "Error for #{doc_serialized}\n\n response: #{response.body}"
|
150
|
+
status = Helpers::HarvestStatus::INGEST_ERR_SOLR_ERROR
|
151
|
+
end
|
103
152
|
end
|
104
153
|
rescue => e
|
154
|
+
# TODO Need to provide more detail re: this failure so we know whether to
|
155
|
+
# exit the job with a status != 0
|
105
156
|
puts "Rest exception while POSTing to Solr: #{e}, for doc: #{doc_serialized}"
|
157
|
+
status = Helpers::HarvestStatus::INGEST_ERR_SOLR_ERROR
|
106
158
|
end
|
107
|
-
|
108
|
-
success
|
159
|
+
status
|
109
160
|
end
|
110
161
|
|
111
162
|
def get_serialized_doc(doc, content_type)
|
@@ -118,7 +169,7 @@ module SearchSolrTools
|
|
118
169
|
end
|
119
170
|
end
|
120
171
|
|
121
|
-
# Get results from
|
172
|
+
# Get results from an end point specified in the request_url
|
122
173
|
def get_results(request_url, metadata_path, content_type = 'application/xml')
|
123
174
|
timeout = 300
|
124
175
|
retries_left = 3
|
@@ -134,6 +185,9 @@ module SearchSolrTools
|
|
134
185
|
|
135
186
|
retry if retries_left > 0
|
136
187
|
|
188
|
+
# TODO - Do we really need this "die_on_failure" anymore? The empty return
|
189
|
+
# will cause the "No Documents" error to be thrown in the harvester class
|
190
|
+
# now, so it will pretty much always "die on failure"
|
137
191
|
raise e if @die_on_failure
|
138
192
|
return
|
139
193
|
end
|
@@ -3,6 +3,7 @@ require 'rest-client'
|
|
3
3
|
|
4
4
|
require 'search_solr_tools'
|
5
5
|
|
6
|
+
|
6
7
|
module SearchSolrTools
|
7
8
|
module Harvesters
|
8
9
|
# Harvests data from NSIDC OAI and inserts it into Solr after it has been translated
|
@@ -13,6 +14,17 @@ module SearchSolrTools
|
|
13
14
|
Helpers::FacetConfiguration.import_bin_configuration(env)
|
14
15
|
end
|
15
16
|
|
17
|
+
def ping_source
|
18
|
+
begin
|
19
|
+
RestClient.options(nsidc_json_url) do |response, _request, _result|
|
20
|
+
return response.code == 200
|
21
|
+
end
|
22
|
+
rescue => e
|
23
|
+
puts "Error trying to get options for #{nsidc_json_url} (ping)"
|
24
|
+
end
|
25
|
+
false
|
26
|
+
end
|
27
|
+
|
16
28
|
def harvest_and_delete
|
17
29
|
puts "Running harvest of NSIDC catalog from #{nsidc_json_url}"
|
18
30
|
super(method(:harvest_nsidc_json_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:NSIDC][:long_name]}\"")
|
@@ -22,8 +34,22 @@ module SearchSolrTools
|
|
22
34
|
# this is the main entry point for the class
|
23
35
|
def harvest_nsidc_json_into_solr
|
24
36
|
result = docs_with_translated_entries_from_nsidc
|
25
|
-
|
26
|
-
|
37
|
+
|
38
|
+
status = insert_solr_docs result[:add_docs], Base::JSON_CONTENT_TYPE
|
39
|
+
|
40
|
+
status.record_status(Helpers::HarvestStatus::HARVEST_NO_DOCS) if result[:num_docs] == 0
|
41
|
+
|
42
|
+
# Record the number of harvest failures; note that if this is 0, thats OK, the status will stay at 0
|
43
|
+
status.record_status(Helpers::HarvestStatus::HARVEST_FAILURE, result[:failure_ids].length)
|
44
|
+
|
45
|
+
raise Errors::HarvestError, status unless status.ok?
|
46
|
+
rescue Errors::HarvestError => e
|
47
|
+
raise e
|
48
|
+
rescue StandardError => e
|
49
|
+
puts "An unexpected exception occurred while trying to harvest or insert: #{e}"
|
50
|
+
puts e.backtrace
|
51
|
+
status = Helpers::HarvestStatus.new(Helpers::HarvestStatus::OTHER_ERROR => e)
|
52
|
+
raise Errors::HarvestError, status
|
27
53
|
end
|
28
54
|
|
29
55
|
def nsidc_json_url
|
@@ -33,7 +59,7 @@ module SearchSolrTools
|
|
33
59
|
def result_ids_from_nsidc
|
34
60
|
url = SolrEnvironments[@environment][:nsidc_dataset_metadata_url] +
|
35
61
|
SolrEnvironments[@environment][:nsidc_oai_identifiers_url]
|
36
|
-
get_results
|
62
|
+
get_results(url, '//xmlns:identifier') || []
|
37
63
|
end
|
38
64
|
|
39
65
|
# Fetch a JSON representation of a dataset's metadata
|
@@ -48,7 +74,8 @@ module SearchSolrTools
|
|
48
74
|
docs = []
|
49
75
|
failure_ids = []
|
50
76
|
|
51
|
-
|
77
|
+
all_docs = result_ids_from_nsidc
|
78
|
+
all_docs.each do |r|
|
52
79
|
# Each result looks like:
|
53
80
|
# oai:nsidc.org/AE_L2A
|
54
81
|
id = r.text.split('/').last
|
@@ -60,7 +87,7 @@ module SearchSolrTools
|
|
60
87
|
end
|
61
88
|
end
|
62
89
|
|
63
|
-
{ add_docs: docs, failure_ids: failure_ids }
|
90
|
+
{ num_docs: all_docs.size, add_docs: docs, failure_ids: failure_ids }
|
64
91
|
end
|
65
92
|
end
|
66
93
|
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
module SearchSolrTools
|
2
|
+
module Helpers
|
3
|
+
class HarvestStatus
|
4
|
+
INGEST_OK = :ok
|
5
|
+
HARVEST_NO_DOCS = :harvest_none
|
6
|
+
HARVEST_FAILURE = :harvest_fail
|
7
|
+
INGEST_ERR_INVALID_DOC = :invalid
|
8
|
+
INGEST_ERR_SOLR_ERROR = :solr_error
|
9
|
+
OTHER_ERROR = :other
|
10
|
+
PING_SOLR = :ping_solr # used for initialize only
|
11
|
+
PING_SOURCE = :ping_source # used for initialize only
|
12
|
+
|
13
|
+
ERROR_STATUS = [HARVEST_NO_DOCS, HARVEST_FAILURE, INGEST_ERR_INVALID_DOC, INGEST_ERR_SOLR_ERROR, OTHER_ERROR]
|
14
|
+
|
15
|
+
attr_reader :status, :ping_solr, :ping_source
|
16
|
+
attr_writer :ping_solr, :ping_source
|
17
|
+
|
18
|
+
# init_info is an optional hash that contains the various status keys and the documents to
|
19
|
+
# associate with them
|
20
|
+
def initialize(init_info={})
|
21
|
+
@status = { INGEST_OK => 0 }
|
22
|
+
@ping_solr = true
|
23
|
+
@ping_source = true
|
24
|
+
ERROR_STATUS.each { |s| @status[s] = 0 }
|
25
|
+
|
26
|
+
init_info.each do |key, count|
|
27
|
+
@status[key] = count if @status.include? key
|
28
|
+
end
|
29
|
+
|
30
|
+
@ping_solr = init_info[PING_SOLR] if init_info.include? PING_SOLR
|
31
|
+
@ping_source = init_info[PING_SOURCE] if init_info.include? PING_SOURCE
|
32
|
+
end
|
33
|
+
|
34
|
+
def record_status(status, count = 1)
|
35
|
+
@status[status] += count
|
36
|
+
end
|
37
|
+
|
38
|
+
def ok?
|
39
|
+
ERROR_STATUS.each { |s| return false unless @status[s] == 0 }
|
40
|
+
@ping_solr && @ping_source
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -4,6 +4,9 @@ require 'rgeo/geo_json'
|
|
4
4
|
require 'rgeo/wkrep/wkt_parser'
|
5
5
|
|
6
6
|
require 'search_solr_tools'
|
7
|
+
require_relative '../helpers/solr_format'
|
8
|
+
require_relative '../helpers/translate_temporal_coverage'
|
9
|
+
require_relative '../helpers/translate_spatial_coverage'
|
7
10
|
|
8
11
|
module SearchSolrTools
|
9
12
|
module Translators
|
@@ -1,4 +1,8 @@
|
|
1
1
|
require 'search_solr_tools'
|
2
|
+
require_relative '../helpers/solr_format'
|
3
|
+
require_relative '../helpers/translate_temporal_coverage'
|
4
|
+
require_relative '../helpers/translate_spatial_coverage'
|
5
|
+
require_relative '../helpers/bounding_box_util'
|
2
6
|
|
3
7
|
module SearchSolrTools
|
4
8
|
module Translators
|
@@ -3,6 +3,9 @@ require 'rest-client'
|
|
3
3
|
require 'rgeo/geo_json'
|
4
4
|
|
5
5
|
require 'search_solr_tools'
|
6
|
+
require_relative '../helpers/solr_format'
|
7
|
+
require_relative '../helpers/translate_temporal_coverage'
|
8
|
+
require_relative '../helpers/translate_spatial_coverage'
|
6
9
|
|
7
10
|
module SearchSolrTools
|
8
11
|
module Translators
|
@@ -2,6 +2,9 @@
|
|
2
2
|
require 'rgeo/geo_json'
|
3
3
|
|
4
4
|
require 'search_solr_tools'
|
5
|
+
require_relative '../helpers/solr_format'
|
6
|
+
require_relative '../helpers/translate_temporal_coverage'
|
7
|
+
require_relative '../helpers/translate_spatial_coverage'
|
5
8
|
|
6
9
|
module SearchSolrTools
|
7
10
|
module Translators
|
data/lib/search_solr_tools.rb
CHANGED
@@ -2,6 +2,9 @@ require_relative 'search_solr_tools/config/environments'
|
|
2
2
|
require_relative 'search_solr_tools/version'
|
3
3
|
|
4
4
|
require_relative 'search_solr_tools/helpers/selectors'
|
5
|
+
require_relative 'search_solr_tools/helpers/harvest_status'
|
6
|
+
require_relative 'search_solr_tools/errors/harvest_error'
|
7
|
+
|
5
8
|
%w( selectors harvesters translators ).each do |subdir|
|
6
9
|
Dir[File.join(__dir__, 'search_solr_tools', subdir, '*.rb')].each { |file| require file }
|
7
10
|
end
|
data/search_solr_tools.gemspec
CHANGED
@@ -16,7 +16,7 @@ Gem::Specification.new do |spec|
|
|
16
16
|
metadata into a working solr instance.
|
17
17
|
EOF
|
18
18
|
spec.homepage = 'https://github.com/nsidc/search-solr-tools'
|
19
|
-
spec.license = '
|
19
|
+
spec.license = 'GPL-3.0-or-later'
|
20
20
|
|
21
21
|
spec.files = `git ls-files -z #{gem_files}`.split("\x0")
|
22
22
|
spec.executables = spec.files.grep(/^bin\//) { |f| File.basename(f) }
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: search_solr_tools
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.
|
4
|
+
version: 5.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Chris Chalstrom
|
@@ -14,7 +14,7 @@ authors:
|
|
14
14
|
autorequire:
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
|
-
date:
|
17
|
+
date: 2022-08-31 00:00:00.000000000 Z
|
18
18
|
dependencies:
|
19
19
|
- !ruby/object:Gem::Dependency
|
20
20
|
name: ffi-geos
|
@@ -292,6 +292,7 @@ files:
|
|
292
292
|
- lib/search_solr_tools.rb
|
293
293
|
- lib/search_solr_tools/config/environments.rb
|
294
294
|
- lib/search_solr_tools/config/environments.yaml
|
295
|
+
- lib/search_solr_tools/errors/harvest_error.rb
|
295
296
|
- lib/search_solr_tools/harvesters/adc.rb
|
296
297
|
- lib/search_solr_tools/harvesters/ade_auto_suggest.rb
|
297
298
|
- lib/search_solr_tools/harvesters/auto_suggest.rb
|
@@ -317,6 +318,7 @@ files:
|
|
317
318
|
- lib/search_solr_tools/helpers/csw_iso_query_builder.rb
|
318
319
|
- lib/search_solr_tools/helpers/data_one_format.rb
|
319
320
|
- lib/search_solr_tools/helpers/facet_configuration.rb
|
321
|
+
- lib/search_solr_tools/helpers/harvest_status.rb
|
320
322
|
- lib/search_solr_tools/helpers/iso_namespaces.rb
|
321
323
|
- lib/search_solr_tools/helpers/iso_to_solr.rb
|
322
324
|
- lib/search_solr_tools/helpers/iso_to_solr_format.rb
|
@@ -349,7 +351,7 @@ files:
|
|
349
351
|
- search_solr_tools.gemspec
|
350
352
|
homepage: https://github.com/nsidc/search-solr-tools
|
351
353
|
licenses:
|
352
|
-
-
|
354
|
+
- GPL-3.0-or-later
|
353
355
|
metadata: {}
|
354
356
|
post_install_message:
|
355
357
|
rdoc_options: []
|