search_solr_tools 8.0.0 → 9.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/bin/search_solr_tools +13 -4
- data/lib/search_solr_tools/config/environments.yaml +7 -8
- data/lib/search_solr_tools/errors/harvest_error.rb +0 -2
- data/lib/search_solr_tools/harvesters/auto_suggest.rb +3 -1
- data/lib/search_solr_tools/harvesters/base.rb +18 -7
- data/lib/search_solr_tools/harvesters/nsidc_json.rb +2 -1
- data/lib/search_solr_tools/helpers/facet_configuration.rb +5 -1
- data/lib/search_solr_tools/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: b3ea7011c8dec193a548d8fd55041aa3eeb7ebe8cf9898a76b2181ae1b2f751b
|
|
4
|
+
data.tar.gz: 7ab1619de2ad05a715e7ceaff6530d2ea3c8d40fb0b25acc9f2f5af26ab6d74d
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: f36385c78277a840c2b0ece657c933a0c2ae6cdd2b05f3090c022fad58e0a26f2aca5f5140709a514b730d0c28ce2984e0edb387004e9203683612088b82beee
|
|
7
|
+
data.tar.gz: 652b5ffc34e1ba397808d2e5bdc6738c694552e4287963b73caa877184ceb0d7e178d8176ce128a158f9f397c69c5f330933bce9240061fb407c52305623fd5c
|
data/CHANGELOG.md
CHANGED
data/bin/search_solr_tools
CHANGED
|
@@ -85,6 +85,15 @@ class SolrHarvestCLI < Thor
|
|
|
85
85
|
rescue StandardError => e
|
|
86
86
|
# If it gets here, there is an error that we aren't expecting.
|
|
87
87
|
logger.error "harvest failed for #{target}: #{e.message}"
|
|
88
|
+
if e.response
|
|
89
|
+
logger.error " Status Code: #{e.response[:status]}"
|
|
90
|
+
logger.error " Error body: #{e.response[:body]}"
|
|
91
|
+
end
|
|
92
|
+
if e.respond_to?(:cause) && e.cause
|
|
93
|
+
logger.error " Underlying System Error: #{e.cause.class} - #{e.cause.message}"
|
|
94
|
+
else
|
|
95
|
+
logger.error ' Underlying System Error: Cause unknown...'
|
|
96
|
+
end
|
|
88
97
|
logger.error e.backtrace
|
|
89
98
|
exit SearchSolrTools::Errors::HarvestError::ERRCODE_OTHER
|
|
90
99
|
end
|
|
@@ -101,8 +110,8 @@ class SolrHarvestCLI < Thor
|
|
|
101
110
|
def delete_all
|
|
102
111
|
env = SearchSolrTools::SolrEnvironments[options[:environment]]
|
|
103
112
|
logger.info('DELETE ALL started')
|
|
104
|
-
`curl '
|
|
105
|
-
`curl '
|
|
113
|
+
`curl -k 'https://#{env[:host]}/solr/update' -H 'Content-Type: text/xml; charset=utf-8' --data '<delete><query>*:*</query></delete>'`
|
|
114
|
+
`curl -k 'https://#{env[:host]}/solr/update' -H 'Content-Type: text/xml; charset=utf-8' --data '<commit/>'`
|
|
106
115
|
logger.info('DELETE ALL complete')
|
|
107
116
|
end
|
|
108
117
|
|
|
@@ -111,8 +120,8 @@ class SolrHarvestCLI < Thor
|
|
|
111
120
|
def delete_all_auto_suggest
|
|
112
121
|
env = SearchSolrTools::SolrEnvironments[options[:environment]]
|
|
113
122
|
logger.info('DELETE ALL AUTO_SUGGEST started')
|
|
114
|
-
`curl '
|
|
115
|
-
`curl '
|
|
123
|
+
`curl -k 'https://#{env[:host]}/solr/update' -H 'Content-Type: text/xml; charset=utf-8' --data '<delete><query>*:*</query></delete>'`
|
|
124
|
+
`curl -k 'https://#{env[:host]}/solr/update' -H 'Content-Type: text/xml; charset=utf-8' --data '<commit/>'`
|
|
116
125
|
logger.info('DELETE ALL AUTO_SUGGEST complete')
|
|
117
126
|
end
|
|
118
127
|
|
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
:auto_suggest_collection_name: auto_suggest
|
|
3
3
|
:collection_name: nsidc_oai
|
|
4
4
|
:collection_path: solr
|
|
5
|
-
:port: 8983
|
|
6
5
|
|
|
7
6
|
# Not using DCS API v2 here because not all retired datasets have their "retired"
|
|
8
7
|
# flag checked. For example, GLA01.033 is retired; GLA01.018 is not, but it
|
|
@@ -17,7 +16,7 @@
|
|
|
17
16
|
:local:
|
|
18
17
|
<<: *common
|
|
19
18
|
:host: localhost
|
|
20
|
-
:nsidc_dataset_metadata_url:
|
|
19
|
+
:nsidc_dataset_metadata_url: https://integration.nsidc.org/api/dataset/metadata/
|
|
21
20
|
|
|
22
21
|
:dev: &dev
|
|
23
22
|
<<: *common
|
|
@@ -25,7 +24,7 @@
|
|
|
25
24
|
:host: dev.search-solr.USERNAME.dev.int.nsidc.org
|
|
26
25
|
## For the metadata content, either set up your own instance of dataset-catalog-services
|
|
27
26
|
## or change the URL below to point to integration
|
|
28
|
-
:nsidc_dataset_metadata_url:
|
|
27
|
+
:nsidc_dataset_metadata_url: https://dev.dcs.USERNAME.dev.int.nsidc.org:11580/api/dataset/metadata/
|
|
29
28
|
|
|
30
29
|
:development:
|
|
31
30
|
<<: *dev
|
|
@@ -33,24 +32,24 @@
|
|
|
33
32
|
:integration:
|
|
34
33
|
<<: *common
|
|
35
34
|
:host: integration.search-solr.apps.int.nsidc.org
|
|
36
|
-
:nsidc_dataset_metadata_url:
|
|
35
|
+
:nsidc_dataset_metadata_url: https://integration.nsidc.org/api/dataset/metadata/
|
|
37
36
|
|
|
38
37
|
:qa:
|
|
39
38
|
<<: *common
|
|
40
39
|
:host: qa.search-solr.apps.int.nsidc.org
|
|
41
|
-
:nsidc_dataset_metadata_url:
|
|
40
|
+
:nsidc_dataset_metadata_url: https://qa.nsidc.org/api/dataset/metadata/
|
|
42
41
|
|
|
43
42
|
:staging:
|
|
44
43
|
<<: *common
|
|
45
44
|
:host: staging.search-solr.apps.int.nsidc.org
|
|
46
|
-
:nsidc_dataset_metadata_url:
|
|
45
|
+
:nsidc_dataset_metadata_url: https://staging.nsidc.org/api/dataset/metadata/
|
|
47
46
|
|
|
48
47
|
:blue:
|
|
49
48
|
<<: *common
|
|
50
49
|
:host: blue.search-solr.apps.int.nsidc.org
|
|
51
|
-
:nsidc_dataset_metadata_url:
|
|
50
|
+
:nsidc_dataset_metadata_url: https://nsidc.org/api/dataset/metadata/
|
|
52
51
|
|
|
53
52
|
:production:
|
|
54
53
|
<<: *common
|
|
55
54
|
:host: search-solr.apps.int.nsidc.org
|
|
56
|
-
:nsidc_dataset_metadata_url:
|
|
55
|
+
:nsidc_dataset_metadata_url: https://nsidc.org/api/dataset/metadata/
|
|
@@ -74,7 +74,6 @@ module SearchSolrTools
|
|
|
74
74
|
super(message)
|
|
75
75
|
end
|
|
76
76
|
|
|
77
|
-
# rubocop:disable Metrics/AbcSize
|
|
78
77
|
def exit_code
|
|
79
78
|
if @status_data.nil?
|
|
80
79
|
logger.error "OTHER ERROR REPORTED: #{@other_message}"
|
|
@@ -95,7 +94,6 @@ module SearchSolrTools
|
|
|
95
94
|
|
|
96
95
|
code
|
|
97
96
|
end
|
|
98
|
-
# rubocop:enable Metrics/AbcSize
|
|
99
97
|
|
|
100
98
|
def message
|
|
101
99
|
self.class.describe_exit_code(exit_code).map { |_c, v| v }.join("\n")
|
|
@@ -32,7 +32,9 @@ module SearchSolrTools
|
|
|
32
32
|
url += "&facet.field=#{name}"
|
|
33
33
|
end
|
|
34
34
|
|
|
35
|
-
serialized_facet_response = RestClient.
|
|
35
|
+
serialized_facet_response = RestClient::Request.execute(
|
|
36
|
+
method: :get, url: url, verify_ssl: OpenSSL::SSL::VERIFY_NONE
|
|
37
|
+
)
|
|
36
38
|
JSON.parse(serialized_facet_response)
|
|
37
39
|
end
|
|
38
40
|
|
|
@@ -5,6 +5,7 @@ require 'nokogiri'
|
|
|
5
5
|
require 'open-uri'
|
|
6
6
|
require 'rest-client'
|
|
7
7
|
require 'rsolr'
|
|
8
|
+
require 'faraday'
|
|
8
9
|
require 'time'
|
|
9
10
|
|
|
10
11
|
require 'search_solr_tools'
|
|
@@ -30,7 +31,7 @@ module SearchSolrTools
|
|
|
30
31
|
|
|
31
32
|
def solr_url
|
|
32
33
|
env = SolrEnvironments[@environment]
|
|
33
|
-
"
|
|
34
|
+
"https://#{env[:host]}/#{env[:collection_path]}"
|
|
34
35
|
end
|
|
35
36
|
|
|
36
37
|
# Some data providers require encoding (such as URI.encode),
|
|
@@ -50,12 +51,12 @@ module SearchSolrTools
|
|
|
50
51
|
|
|
51
52
|
# Some docs will cause solr to time out during the POST
|
|
52
53
|
begin
|
|
53
|
-
RestClient.get
|
|
54
|
-
success = response.code
|
|
54
|
+
RestClient::Request.execute(method: :get, url: url, verify_ssl: OpenSSL::SSL::VERIFY_NONE) do |response, _request, _result|
|
|
55
|
+
success = (200..299).include?(response.code)
|
|
55
56
|
logger.error "Error in ping request: #{response.body}" unless success
|
|
56
57
|
end
|
|
57
58
|
rescue StandardError => e
|
|
58
|
-
logger.error "Rest exception while pinging Solr: #{e}"
|
|
59
|
+
logger.error "Rest exception while pinging Solr at #{url}: #{e}"
|
|
59
60
|
end
|
|
60
61
|
success
|
|
61
62
|
end
|
|
@@ -80,7 +81,15 @@ module SearchSolrTools
|
|
|
80
81
|
def delete_old_documents(timestamp, constraints, solr_core, force: false)
|
|
81
82
|
constraints = sanitize_data_centers_constraints(constraints)
|
|
82
83
|
delete_query = "last_update:[* TO #{timestamp}] AND #{constraints}"
|
|
83
|
-
|
|
84
|
+
full_solr_url = "#{solr_url}/#{solr_core}"
|
|
85
|
+
|
|
86
|
+
faraday_connection = Faraday.new(url: full_solr_url, ssl: { verify: false }) do |conn|
|
|
87
|
+
conn.request :url_encoded
|
|
88
|
+
conn.adapter Faraday.default_adapter
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
solr = RSolr.connect(faraday_connection, url: full_solr_url)
|
|
92
|
+
|
|
84
93
|
unchanged_count = (solr.get 'select', params: { wt: :ruby, q: delete_query, rows: 0 })['response']['numFound'].to_i
|
|
85
94
|
if unchanged_count.zero?
|
|
86
95
|
logger.info "All documents were updated after #{timestamp}, nothing to delete"
|
|
@@ -145,8 +154,10 @@ module SearchSolrTools
|
|
|
145
154
|
|
|
146
155
|
# Some docs will cause solr to time out during the POST
|
|
147
156
|
begin
|
|
148
|
-
RestClient.
|
|
149
|
-
|
|
157
|
+
RestClient::Request.execute(
|
|
158
|
+
method: :post, url: url, payload: doc_serialized, headers: { content_type: }, verify_ssl: OpenSSL::SSL::VERIFY_NONE
|
|
159
|
+
) do |response, _request, _result|
|
|
160
|
+
success = (200..299).include?(response.code)
|
|
150
161
|
unless success
|
|
151
162
|
logger.error "Error for #{doc_serialized}\n\n response: #{response.body}"
|
|
152
163
|
status = Helpers::HarvestStatus::INGEST_ERR_SOLR_ERROR
|
|
@@ -18,7 +18,7 @@ module SearchSolrTools
|
|
|
18
18
|
def ping_source
|
|
19
19
|
begin
|
|
20
20
|
RestClient.options(nsidc_json_url) do |response, _request, _result|
|
|
21
|
-
return response.code
|
|
21
|
+
return (200..299).include? response.code
|
|
22
22
|
end
|
|
23
23
|
rescue StandardError
|
|
24
24
|
logger.error "Error trying to get options for #{nsidc_json_url} (ping)"
|
|
@@ -45,6 +45,7 @@ module SearchSolrTools
|
|
|
45
45
|
|
|
46
46
|
raise Errors::HarvestError, status unless status.ok?
|
|
47
47
|
rescue Errors::HarvestError => e
|
|
48
|
+
logger.error 'A HarvestError occurred'
|
|
48
49
|
raise e
|
|
49
50
|
rescue StandardError => e
|
|
50
51
|
logger.error "An unexpected exception occurred while trying to harvest or insert: #{e}"
|
|
@@ -11,7 +11,11 @@ module SearchSolrTools
|
|
|
11
11
|
include Singleton
|
|
12
12
|
|
|
13
13
|
def self.import_bin_configuration(env)
|
|
14
|
-
|
|
14
|
+
return unless @bin_configuration.nil?
|
|
15
|
+
|
|
16
|
+
@bin_configuration = JSON.parse(
|
|
17
|
+
RestClient::Request.execute(method: :get, url: "#{SolrEnvironments[env][:nsidc_dataset_metadata_url]}binConfiguration", verify_ssl: OpenSSL::SSL::VERIFY_NONE)
|
|
18
|
+
)
|
|
15
19
|
end
|
|
16
20
|
|
|
17
21
|
def self.get_facet_bin(facet_name)
|