search_solr_tools 6.2.0 → 6.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -3
- data/bin/search_solr_tools +4 -4
- data/lib/search_solr_tools/config/environments.rb +3 -1
- data/lib/search_solr_tools/errors/harvest_error.rb +44 -31
- data/lib/search_solr_tools/harvesters/auto_suggest.rb +5 -3
- data/lib/search_solr_tools/harvesters/base.rb +21 -19
- data/lib/search_solr_tools/harvesters/nsidc_auto_suggest.rb +7 -5
- data/lib/search_solr_tools/harvesters/nsidc_json.rb +9 -8
- data/lib/search_solr_tools/helpers/bounding_box_util.rb +8 -8
- data/lib/search_solr_tools/helpers/facet_configuration.rb +3 -1
- data/lib/search_solr_tools/helpers/harvest_status.rb +10 -8
- data/lib/search_solr_tools/helpers/iso_namespaces.rb +3 -1
- data/lib/search_solr_tools/helpers/solr_format.rb +25 -30
- data/lib/search_solr_tools/helpers/translate_spatial_coverage.rb +13 -9
- data/lib/search_solr_tools/helpers/translate_temporal_coverage.rb +2 -0
- data/lib/search_solr_tools/translators/nsidc_json.rb +48 -44
- data/lib/search_solr_tools/version.rb +3 -1
- data/lib/search_solr_tools.rb +3 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f9ced4643b8adbda2b5ef09192f036af86878e07243fe959448213762e0e5cc1
|
4
|
+
data.tar.gz: 0a5f27a7bc1d8c9c0c07a20b6fbf122d5a3b6163a5654db635d5c478ac4a21bc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cc66f8b40c62e2640fd72ce05aa2ac01aa76c58c730b6c445976fc7cf6e43b88cff29ec088f73e5dff913c879f7a1a31016cb634d851cfb3adb7b8bb735614c8
|
7
|
+
data.tar.gz: f896f7b473f977f0e349d422f6568774342e6bdb66e1a7dad1cf4477d0ffa7e9b05b81184898ab2d4d69c44f8ca98a3aa6791234926190484820cca4b439dc7d
|
data/CHANGELOG.md
CHANGED
@@ -1,12 +1,17 @@
|
|
1
|
-
## v6.
|
1
|
+
## v6.3.0 (2023-07-24)
|
2
|
+
|
3
|
+
- Update Rubocop configuration to actually run against files, and make
|
4
|
+
necessary corrections to comply with Rubocop styling.
|
5
|
+
|
6
|
+
## v6.2.0 (2023-07-18)
|
2
7
|
|
3
8
|
- Remove deprecated harvesters and associated tests, helpers, etc.
|
4
9
|
|
5
|
-
## v6.1.0 (
|
10
|
+
## v6.1.0 (2023-07-14)
|
6
11
|
|
7
12
|
- Updated a few other dependencies that weren't at the newest versions.
|
8
13
|
|
9
|
-
## v6.0.0 (
|
14
|
+
## v6.0.0 (2023-07-14)
|
10
15
|
|
11
16
|
- Updated Ruby to 3.2.2, updated gem dependencies to more recent versions.
|
12
17
|
|
data/bin/search_solr_tools
CHANGED
@@ -47,7 +47,7 @@ class SolrHarvestCLI < Thor
|
|
47
47
|
end
|
48
48
|
|
49
49
|
ping_status = SearchSolrTools::Helpers::HarvestStatus.new(
|
50
|
-
SearchSolrTools::Helpers::HarvestStatus::PING_SOLR
|
50
|
+
SearchSolrTools::Helpers::HarvestStatus::PING_SOLR => solr_success,
|
51
51
|
SearchSolrTools::Helpers::HarvestStatus::PING_SOURCE => source_success
|
52
52
|
)
|
53
53
|
raise SearchSolrTools::Errors::HarvestError, ping_status unless ping_status.ok?
|
@@ -64,9 +64,9 @@ class SolrHarvestCLI < Thor
|
|
64
64
|
puts "Target: #{target}"
|
65
65
|
begin
|
66
66
|
harvest_class = get_harvester_class(target)
|
67
|
-
harvester = harvest_class.new(options[:environment], die_on_failure)
|
67
|
+
harvester = harvest_class.new(options[:environment], die_on_failure:)
|
68
68
|
ping_status = SearchSolrTools::Helpers::HarvestStatus.new(
|
69
|
-
SearchSolrTools::Helpers::HarvestStatus::PING_SOLR
|
69
|
+
SearchSolrTools::Helpers::HarvestStatus::PING_SOLR => harvester.ping_solr,
|
70
70
|
SearchSolrTools::Helpers::HarvestStatus::PING_SOURCE => harvester.ping_source
|
71
71
|
)
|
72
72
|
raise SearchSolrTools::Errors::HarvestError, ping_status unless ping_status.ok?
|
@@ -120,7 +120,7 @@ class SolrHarvestCLI < Thor
|
|
120
120
|
no_tasks do
|
121
121
|
def harvester_map
|
122
122
|
{
|
123
|
-
'nsidc'
|
123
|
+
'nsidc' => SearchSolrTools::Harvesters::NsidcJson,
|
124
124
|
'nsidc_auto_suggest' => SearchSolrTools::Harvesters::NsidcAutoSuggest
|
125
125
|
}
|
126
126
|
end
|
@@ -1,9 +1,11 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'yaml'
|
2
4
|
|
3
5
|
module SearchSolrTools
|
4
6
|
# configuration to work with solr locally, or on integration/qa/staging/prod
|
5
7
|
module SolrEnvironments
|
6
|
-
YAML_ENVS = YAML.load_file(File.expand_path('
|
8
|
+
YAML_ENVS = YAML.load_file(File.expand_path('environments.yaml', __dir__))
|
7
9
|
|
8
10
|
def self.[](env = :development)
|
9
11
|
YAML_ENVS[:common].merge(YAML_ENVS[env.to_sym])
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module SearchSolrTools
|
2
4
|
module Errors
|
3
5
|
class HarvestError < StandardError
|
@@ -10,34 +12,47 @@ module SearchSolrTools
|
|
10
12
|
ERRCODE_OTHER = 128
|
11
13
|
|
12
14
|
ERRCODE_DESC = {
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
15
|
+
ERRCODE_SOLR_PING => 'Solr instance did not return a successful ping',
|
16
|
+
ERRCODE_SOURCE_PING => 'Source to be harvested did not return a successful ping',
|
17
|
+
ERRCODE_SOURCE_NO_RESULTS => 'Source to be harvested returned no documents matching query',
|
18
|
+
ERRCODE_SOURCE_HARVEST_ERROR => 'One or more source documents returned an error when trying to retrieve or translate',
|
19
|
+
ERRCODE_DOCUMENT_INVALID => 'One or more documents to be harvested was invalid (malformed)',
|
20
|
+
ERRCODE_INGEST_ERROR => 'Solr returned an error trying to ingest one or more harvested documents',
|
21
|
+
ERRCODE_OTHER => 'General error code for non-harvest related issues'
|
20
22
|
}.freeze
|
21
23
|
|
22
24
|
PING_ERRCODE_MAP = {
|
23
|
-
'ping_solr'
|
24
|
-
'ping_source' => ERRCODE_SOURCE_PING
|
25
|
-
}
|
25
|
+
'ping_solr' => ERRCODE_SOLR_PING,
|
26
|
+
'ping_source' => ERRCODE_SOURCE_PING
|
27
|
+
}.freeze
|
26
28
|
|
27
29
|
STATUS_ERRCODE_MAP = {
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
30
|
+
Helpers::HarvestStatus::HARVEST_NO_DOCS => ERRCODE_SOURCE_NO_RESULTS,
|
31
|
+
Helpers::HarvestStatus::HARVEST_FAILURE => ERRCODE_SOURCE_HARVEST_ERROR,
|
32
|
+
Helpers::HarvestStatus::INGEST_ERR_INVALID_DOC => ERRCODE_DOCUMENT_INVALID,
|
33
|
+
Helpers::HarvestStatus::INGEST_ERR_SOLR_ERROR => ERRCODE_INGEST_ERROR,
|
34
|
+
Helpers::HarvestStatus::OTHER_ERROR => ERRCODE_OTHER
|
33
35
|
}.freeze
|
34
36
|
|
35
37
|
# If code is -1, it means display all error codes
|
36
38
|
def self.describe_exit_code(code = -1)
|
39
|
+
code_list = code_to_list(code)
|
40
|
+
|
41
|
+
codes = {}
|
42
|
+
code_list.each do |k|
|
43
|
+
next if code == -1 && !ERRCODE_DESC.keys.include?(k) # skip INVALID CODE if showing all codes
|
44
|
+
|
45
|
+
codes[k] = ERRCODE_DESC.keys.include?(k) ? ERRCODE_DESC[k] : 'INVALID CODE NUMBER'
|
46
|
+
end
|
47
|
+
|
48
|
+
codes
|
49
|
+
end
|
50
|
+
|
51
|
+
# Loop through all bit-flag values to produce a list of integers
|
52
|
+
def self.code_to_list(code)
|
37
53
|
code = code.to_i
|
38
54
|
code_list = []
|
39
55
|
|
40
|
-
# Loop through all bit-flag values
|
41
56
|
[128, 64, 32, 16, 8, 4, 2, 1].each do |k|
|
42
57
|
if code >= k || code == -1
|
43
58
|
code_list.prepend k
|
@@ -45,20 +60,17 @@ module SearchSolrTools
|
|
45
60
|
end
|
46
61
|
end
|
47
62
|
|
48
|
-
|
49
|
-
code_list.each do |k|
|
50
|
-
next if code == -1 && !ERRCODE_DESC.keys.include?(k) # skip INVALID CODE if showing all codes
|
51
|
-
codes[k] = ERRCODE_DESC.keys.include?(k) ? ERRCODE_DESC[k] : 'INVALID CODE NUMBER'
|
52
|
-
end
|
53
|
-
|
54
|
-
codes
|
63
|
+
code_list
|
55
64
|
end
|
56
65
|
|
57
|
-
def initialize(status, message=nil)
|
66
|
+
def initialize(status, message = nil)
|
58
67
|
@status_data = status
|
59
68
|
@other_message = message
|
69
|
+
|
70
|
+
super message
|
60
71
|
end
|
61
72
|
|
73
|
+
# rubocop:disable Metrics/AbcSize
|
62
74
|
def exit_code
|
63
75
|
if @status_data.nil?
|
64
76
|
puts "OTHER ERROR REPORTED: #{@other_message}"
|
@@ -70,19 +82,20 @@ module SearchSolrTools
|
|
70
82
|
code = 0
|
71
83
|
code += ERRCODE_SOLR_PING unless @status_data.ping_solr
|
72
84
|
code += ERRCODE_SOURCE_PING unless @status_data.ping_source
|
73
|
-
code += ERRCODE_SOURCE_NO_RESULTS if @status_data.status[Helpers::HarvestStatus::HARVEST_NO_DOCS]
|
74
|
-
code += ERRCODE_SOURCE_HARVEST_ERROR if @status_data.status[Helpers::HarvestStatus::HARVEST_FAILURE]
|
75
|
-
code += ERRCODE_DOCUMENT_INVALID if @status_data.status[Helpers::HarvestStatus::INGEST_ERR_INVALID_DOC]
|
76
|
-
code += ERRCODE_INGEST_ERROR if @status_data.status[Helpers::HarvestStatus::INGEST_ERR_SOLR_ERROR]
|
85
|
+
code += ERRCODE_SOURCE_NO_RESULTS if @status_data.status[Helpers::HarvestStatus::HARVEST_NO_DOCS].positive?
|
86
|
+
code += ERRCODE_SOURCE_HARVEST_ERROR if @status_data.status[Helpers::HarvestStatus::HARVEST_FAILURE].positive?
|
87
|
+
code += ERRCODE_DOCUMENT_INVALID if @status_data.status[Helpers::HarvestStatus::INGEST_ERR_INVALID_DOC].positive?
|
88
|
+
code += ERRCODE_INGEST_ERROR if @status_data.status[Helpers::HarvestStatus::INGEST_ERR_SOLR_ERROR].positive?
|
77
89
|
|
78
|
-
code = ERRCODE_OTHER if code
|
90
|
+
code = ERRCODE_OTHER if code.zero?
|
79
91
|
|
80
92
|
code
|
81
93
|
end
|
94
|
+
# rubocop:enable Metrics/AbcSize
|
82
95
|
|
83
96
|
def message
|
84
|
-
self.class.describe_exit_code(exit_code).map{|
|
97
|
+
self.class.describe_exit_code(exit_code).map { |_c, v| v }.join("\n")
|
85
98
|
end
|
86
99
|
end
|
87
100
|
end
|
88
|
-
end
|
101
|
+
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'base'
|
2
4
|
require 'json'
|
3
5
|
require 'rest-client'
|
@@ -6,8 +8,8 @@ module SearchSolrTools
|
|
6
8
|
module Harvesters
|
7
9
|
# Use the nsidc_oai core to populate the auto_suggest core
|
8
10
|
class AutoSuggest < Base
|
9
|
-
def initialize(env = 'development', die_on_failure
|
10
|
-
super
|
11
|
+
def initialize(env = 'development', die_on_failure: false)
|
12
|
+
super
|
11
13
|
@env_settings = SolrEnvironments[@environment] # super sets @environment.
|
12
14
|
end
|
13
15
|
|
@@ -50,7 +52,7 @@ module SearchSolrTools
|
|
50
52
|
|
51
53
|
if status == Helpers::HarvestStatus::INGEST_OK
|
52
54
|
puts "Added #{add_docs.size} auto suggest documents in one commit"
|
53
|
-
|
55
|
+
Helpers::HarvestStatus.new(Helpers::HarvestStatus::INGEST_OK => add_docs)
|
54
56
|
else
|
55
57
|
puts "Failed adding #{add_docs.size} documents in single commit, retrying one by one"
|
56
58
|
new_add_docs = []
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'multi_json'
|
2
4
|
require 'nokogiri'
|
3
5
|
require 'open-uri'
|
@@ -9,7 +11,6 @@ require 'search_solr_tools'
|
|
9
11
|
require_relative '../helpers/iso_namespaces'
|
10
12
|
require_relative '../helpers/solr_format'
|
11
13
|
|
12
|
-
|
13
14
|
module SearchSolrTools
|
14
15
|
module Harvesters
|
15
16
|
# base class for solr harvesters
|
@@ -20,7 +21,7 @@ module SearchSolrTools
|
|
20
21
|
XML_CONTENT_TYPE = 'text/xml; charset=utf-8'
|
21
22
|
JSON_CONTENT_TYPE = 'application/json; charset=utf-8'
|
22
23
|
|
23
|
-
def initialize(env = 'development', die_on_failure
|
24
|
+
def initialize(env = 'development', die_on_failure: false)
|
24
25
|
@environment = env
|
25
26
|
@die_on_failure = die_on_failure
|
26
27
|
end
|
@@ -51,7 +52,7 @@ module SearchSolrTools
|
|
51
52
|
success = response.code == 200
|
52
53
|
puts "Error in ping request: #{response.body}" unless success
|
53
54
|
end
|
54
|
-
rescue => e
|
55
|
+
rescue StandardError => e
|
55
56
|
puts "Rest exception while pinging Solr: #{e}"
|
56
57
|
end
|
57
58
|
success
|
@@ -61,7 +62,7 @@ module SearchSolrTools
|
|
61
62
|
# to "ping" the data center. Returns true if the ping is successful (or, as
|
62
63
|
# in this default, no ping method was defined)
|
63
64
|
def ping_source
|
64
|
-
puts
|
65
|
+
puts 'Harvester does not have ping method defined, assuming true'
|
65
66
|
true
|
66
67
|
end
|
67
68
|
|
@@ -74,12 +75,12 @@ module SearchSolrTools
|
|
74
75
|
harvest_status
|
75
76
|
end
|
76
77
|
|
77
|
-
def delete_old_documents(timestamp, constraints, solr_core, force
|
78
|
+
def delete_old_documents(timestamp, constraints, solr_core, force: false)
|
78
79
|
constraints = sanitize_data_centers_constraints(constraints)
|
79
80
|
delete_query = "last_update:[* TO #{timestamp}] AND #{constraints}"
|
80
81
|
solr = RSolr.connect url: solr_url + "/#{solr_core}"
|
81
82
|
unchanged_count = (solr.get 'select', params: { wt: :ruby, q: delete_query, rows: 0 })['response']['numFound'].to_i
|
82
|
-
if unchanged_count
|
83
|
+
if unchanged_count.zero?
|
83
84
|
puts "All documents were updated after #{timestamp}, nothing to delete"
|
84
85
|
else
|
85
86
|
puts "Begin removing documents older than #{timestamp}"
|
@@ -90,8 +91,8 @@ module SearchSolrTools
|
|
90
91
|
def sanitize_data_centers_constraints(query_string)
|
91
92
|
# Remove lucene special characters, preserve the query parameter and compress whitespace
|
92
93
|
query_string.gsub!(/[:&|!~\-\(\)\{\}\[\]\^\*\?\+]+/, ' ')
|
93
|
-
query_string.gsub!(
|
94
|
-
query_string.gsub!(
|
94
|
+
query_string.gsub!('data_centers ', 'data_centers:')
|
95
|
+
query_string.gsub!('source ', 'source:')
|
95
96
|
query_string.squeeze(' ').strip
|
96
97
|
end
|
97
98
|
|
@@ -126,7 +127,7 @@ module SearchSolrTools
|
|
126
127
|
status
|
127
128
|
end
|
128
129
|
|
129
|
-
# TODO Need to return a specific type of failure:
|
130
|
+
# TODO: Need to return a specific type of failure:
|
130
131
|
# - Bad record content identified and no ingest attempted
|
131
132
|
# - Solr tries to ingest document and fails (bad content not detected prior to ingest)
|
132
133
|
# - Solr cannot insert document for reasons other than the document structure and content.
|
@@ -142,15 +143,15 @@ module SearchSolrTools
|
|
142
143
|
|
143
144
|
# Some docs will cause solr to time out during the POST
|
144
145
|
begin
|
145
|
-
RestClient.post(url, doc_serialized, content_type:
|
146
|
+
RestClient.post(url, doc_serialized, content_type:) do |response, _request, _result|
|
146
147
|
success = response.code == 200
|
147
148
|
unless success
|
148
149
|
puts "Error for #{doc_serialized}\n\n response: #{response.body}"
|
149
150
|
status = Helpers::HarvestStatus::INGEST_ERR_SOLR_ERROR
|
150
151
|
end
|
151
152
|
end
|
152
|
-
rescue => e
|
153
|
-
# TODO Need to provide more detail re: this failure so we know whether to
|
153
|
+
rescue StandardError => e
|
154
|
+
# TODO: Need to provide more detail re: this failure so we know whether to
|
154
155
|
# exit the job with a status != 0
|
155
156
|
puts "Rest exception while POSTing to Solr: #{e}, for doc: #{doc_serialized}"
|
156
157
|
status = Helpers::HarvestStatus::INGEST_ERR_SOLR_ERROR
|
@@ -160,11 +161,11 @@ module SearchSolrTools
|
|
160
161
|
|
161
162
|
def get_serialized_doc(doc, content_type)
|
162
163
|
if content_type.eql?(XML_CONTENT_TYPE)
|
163
|
-
|
164
|
+
doc.respond_to?(:to_xml) ? doc.to_xml : doc
|
164
165
|
elsif content_type.eql?(JSON_CONTENT_TYPE)
|
165
|
-
|
166
|
+
MultiJson.dump(doc)
|
166
167
|
else
|
167
|
-
|
168
|
+
doc
|
168
169
|
end
|
169
170
|
end
|
170
171
|
|
@@ -177,17 +178,18 @@ module SearchSolrTools
|
|
177
178
|
|
178
179
|
begin
|
179
180
|
puts "Request: #{request_url}"
|
180
|
-
response = URI.open(
|
181
|
+
response = URI.parse(request_url).open(read_timeout: timeout, 'Content-Type' => content_type)
|
181
182
|
rescue OpenURI::HTTPError, Timeout::Error, Errno::ETIMEDOUT => e
|
182
183
|
retries_left -= 1
|
183
184
|
puts "## REQUEST FAILED ## #{e.class} ## Retrying #{retries_left} more times..."
|
184
185
|
|
185
|
-
retry if retries_left
|
186
|
+
retry if retries_left.positive?
|
186
187
|
|
187
|
-
# TODO
|
188
|
+
# TODO: Do we really need this "die_on_failure" anymore? The empty return
|
188
189
|
# will cause the "No Documents" error to be thrown in the harvester class
|
189
190
|
# now, so it will pretty much always "die on failure"
|
190
191
|
raise e if @die_on_failure
|
192
|
+
|
191
193
|
return
|
192
194
|
end
|
193
195
|
doc = Nokogiri.XML(response)
|
@@ -215,7 +217,7 @@ module SearchSolrTools
|
|
215
217
|
spatial_coverages = doc.xpath(".//field[@name='spatial_coverages']").first
|
216
218
|
return true if spatial_coverages.nil?
|
217
219
|
|
218
|
-
spatial_coverages = spatial_coverages.text.split
|
220
|
+
spatial_coverages = spatial_coverages.text.split
|
219
221
|
|
220
222
|
# We've only seen the failure with 4 spatial coverage values
|
221
223
|
return true if spatial_coverages.size < 4
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'auto_suggest'
|
2
4
|
|
3
5
|
module SearchSolrTools
|
@@ -16,11 +18,11 @@ module SearchSolrTools
|
|
16
18
|
def fields
|
17
19
|
{
|
18
20
|
'authoritative_id' => { weight: 1, source: 'NSIDC', creator: method(:standard_add_creator) },
|
19
|
-
'full_title'
|
20
|
-
'copy_parameters'
|
21
|
-
'full_platforms'
|
22
|
-
'full_sensors'
|
23
|
-
'full_authors'
|
21
|
+
'full_title' => { weight: 2, source: 'NSIDC', creator: method(:standard_add_creator) },
|
22
|
+
'copy_parameters' => { weight: 5, source: 'NSIDC', creator: method(:standard_add_creator) },
|
23
|
+
'full_platforms' => { weight: 2, source: 'NSIDC', creator: method(:short_full_split_add_creator) },
|
24
|
+
'full_sensors' => { weight: 2, source: 'NSIDC', creator: method(:short_full_split_add_creator) },
|
25
|
+
'full_authors' => { weight: 1, source: 'NSIDC', creator: method(:standard_add_creator) }
|
24
26
|
}
|
25
27
|
end
|
26
28
|
|
@@ -1,15 +1,16 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'json'
|
2
4
|
require 'rest-client'
|
3
5
|
|
4
6
|
require 'search_solr_tools'
|
5
7
|
|
6
|
-
|
7
8
|
module SearchSolrTools
|
8
9
|
module Harvesters
|
9
10
|
# Harvests data from NSIDC OAI and inserts it into Solr after it has been translated
|
10
11
|
class NsidcJson < Base
|
11
|
-
def initialize(env = 'development', die_on_failure
|
12
|
-
super
|
12
|
+
def initialize(env = 'development', die_on_failure: false)
|
13
|
+
super
|
13
14
|
@translator = Translators::NsidcJsonToSolr.new
|
14
15
|
Helpers::FacetConfiguration.import_bin_configuration(env)
|
15
16
|
end
|
@@ -19,7 +20,7 @@ module SearchSolrTools
|
|
19
20
|
RestClient.options(nsidc_json_url) do |response, _request, _result|
|
20
21
|
return response.code == 200
|
21
22
|
end
|
22
|
-
rescue
|
23
|
+
rescue StandardError
|
23
24
|
puts "Error trying to get options for #{nsidc_json_url} (ping)"
|
24
25
|
end
|
25
26
|
false
|
@@ -37,7 +38,7 @@ module SearchSolrTools
|
|
37
38
|
|
38
39
|
status = insert_solr_docs result[:add_docs], Base::JSON_CONTENT_TYPE
|
39
40
|
|
40
|
-
status.record_status(Helpers::HarvestStatus::HARVEST_NO_DOCS) if result[:num_docs]
|
41
|
+
status.record_status(Helpers::HarvestStatus::HARVEST_NO_DOCS) if (result[:num_docs]).zero?
|
41
42
|
|
42
43
|
# Record the number of harvest failures; note that if this is 0, thats OK, the status will stay at 0
|
43
44
|
status.record_status(Helpers::HarvestStatus::HARVEST_FAILURE, result[:failure_ids].length)
|
@@ -66,7 +67,7 @@ module SearchSolrTools
|
|
66
67
|
# @param id [String] NSIDC authoritative ID for the dataset
|
67
68
|
# @return [Hash] Parsed version of the JSON response
|
68
69
|
def fetch_json_from_nsidc(id)
|
69
|
-
json_response = RestClient.get(nsidc_json_url
|
70
|
+
json_response = RestClient.get("#{nsidc_json_url}#{id}.json")
|
70
71
|
JSON.parse(json_response)
|
71
72
|
end
|
72
73
|
|
@@ -81,13 +82,13 @@ module SearchSolrTools
|
|
81
82
|
id = r.text.split('/').last
|
82
83
|
begin
|
83
84
|
docs << { 'add' => { 'doc' => @translator.translate(fetch_json_from_nsidc(id)) } }
|
84
|
-
rescue => e
|
85
|
+
rescue StandardError => e
|
85
86
|
puts "Failed to fetch #{id} with error #{e}: #{e.backtrace}"
|
86
87
|
failure_ids << id
|
87
88
|
end
|
88
89
|
end
|
89
90
|
|
90
|
-
{ num_docs: all_docs.size, add_docs: docs, failure_ids:
|
91
|
+
{ num_docs: all_docs.size, add_docs: docs, failure_ids: }
|
91
92
|
end
|
92
93
|
end
|
93
94
|
end
|
@@ -1,4 +1,6 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'iso_namespaces'
|
2
4
|
|
3
5
|
module SearchSolrTools
|
4
6
|
module Helpers
|
@@ -8,12 +10,10 @@ module SearchSolrTools
|
|
8
10
|
NORTHERN_GLOBAL_BOUNDARY = 85.0
|
9
11
|
|
10
12
|
def self.bounding_box_hash_from_geo_json(geometry)
|
11
|
-
if geometry_is_point?(geometry)
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
return { west: bbox.min_x.to_s, south: bbox.min_y.to_s, east: bbox.max_x.to_s, north: bbox.max_y.to_s }
|
16
|
-
end
|
13
|
+
return { west: geometry.x.to_s, south: geometry.y.to_s, east: geometry.x.to_s, north: geometry.y.to_s } if geometry_is_point?(geometry)
|
14
|
+
|
15
|
+
bbox = RGeo::Cartesian::BoundingBox.create_from_geometry(geometry)
|
16
|
+
{ west: bbox.min_x.to_s, south: bbox.min_y.to_s, east: bbox.max_x.to_s, north: bbox.max_y.to_s }
|
17
17
|
end
|
18
18
|
|
19
19
|
def self.geometry_is_point?(geometry)
|
@@ -30,7 +30,7 @@ module SearchSolrTools
|
|
30
30
|
end
|
31
31
|
|
32
32
|
def self.box_invalid?(box)
|
33
|
-
[
|
33
|
+
%i[north south east west].any? { |d| box[d].to_s.empty? }
|
34
34
|
end
|
35
35
|
end
|
36
36
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'json'
|
2
4
|
require 'rest_client'
|
3
5
|
require 'singleton'
|
@@ -8,7 +10,7 @@ module SearchSolrTools
|
|
8
10
|
class FacetConfiguration
|
9
11
|
include Singleton
|
10
12
|
def self.import_bin_configuration(env)
|
11
|
-
@bin_configuration = JSON.parse(RestClient.get(SolrEnvironments[env][:nsidc_dataset_metadata_url]
|
13
|
+
@bin_configuration = JSON.parse(RestClient.get("#{SolrEnvironments[env][:nsidc_dataset_metadata_url]}binConfiguration")) if @bin_configuration.nil?
|
12
14
|
end
|
13
15
|
|
14
16
|
def self.get_facet_bin(facet_name)
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module SearchSolrTools
|
2
4
|
module Helpers
|
3
5
|
class HarvestStatus
|
@@ -7,17 +9,17 @@ module SearchSolrTools
|
|
7
9
|
INGEST_ERR_INVALID_DOC = :invalid
|
8
10
|
INGEST_ERR_SOLR_ERROR = :solr_error
|
9
11
|
OTHER_ERROR = :other
|
10
|
-
PING_SOLR = :ping_solr
|
11
|
-
PING_SOURCE = :ping_source
|
12
|
+
PING_SOLR = :ping_solr # used for initialize only
|
13
|
+
PING_SOURCE = :ping_source # used for initialize only
|
12
14
|
|
13
|
-
ERROR_STATUS = [HARVEST_NO_DOCS, HARVEST_FAILURE, INGEST_ERR_INVALID_DOC, INGEST_ERR_SOLR_ERROR, OTHER_ERROR]
|
15
|
+
ERROR_STATUS = [HARVEST_NO_DOCS, HARVEST_FAILURE, INGEST_ERR_INVALID_DOC, INGEST_ERR_SOLR_ERROR, OTHER_ERROR].freeze
|
14
16
|
|
15
|
-
|
16
|
-
|
17
|
+
attr_accessor :ping_solr, :ping_source
|
18
|
+
attr_reader :status
|
17
19
|
|
18
20
|
# init_info is an optional hash that contains the various status keys and the documents to
|
19
21
|
# associate with them
|
20
|
-
def initialize(init_info={})
|
22
|
+
def initialize(init_info = {})
|
21
23
|
@status = { INGEST_OK => 0 }
|
22
24
|
@ping_solr = true
|
23
25
|
@ping_source = true
|
@@ -36,9 +38,9 @@ module SearchSolrTools
|
|
36
38
|
end
|
37
39
|
|
38
40
|
def ok?
|
39
|
-
ERROR_STATUS.each { |s| return false unless @status[s]
|
41
|
+
ERROR_STATUS.each { |s| return false unless (@status[s]).zero? }
|
40
42
|
@ping_solr && @ping_source
|
41
43
|
end
|
42
44
|
end
|
43
45
|
end
|
44
|
-
end
|
46
|
+
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module SearchSolrTools
|
2
4
|
module Helpers
|
3
5
|
# Helper class to provide default namespaces for XML document parsing.
|
@@ -25,7 +27,7 @@ module SearchSolrTools
|
|
25
27
|
'srv' => 'http://www.isotc211.org/2005/srv',
|
26
28
|
'xlink' => 'http://www.w3.org/1999/xlink',
|
27
29
|
'xsi' => 'http://www.w3.org/2001/XMLSchema-instance'
|
28
|
-
}
|
30
|
+
}.freeze
|
29
31
|
end
|
30
32
|
end
|
31
33
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'date'
|
2
4
|
require 'iso8601'
|
3
5
|
|
@@ -7,15 +9,14 @@ require_relative 'facet_configuration'
|
|
7
9
|
module SearchSolrTools
|
8
10
|
module Helpers
|
9
11
|
# Methods for generating formatted values that can be indexed by SOLR
|
10
|
-
# rubocop:disable Metrics/ModuleLength
|
11
12
|
module SolrFormat
|
12
13
|
DATA_CENTER_NAMES = {
|
13
|
-
NSIDC: { short_name: 'NSIDC', long_name: 'National Snow and Ice Data Center' }
|
14
|
-
}
|
14
|
+
NSIDC: { short_name: 'NSIDC', long_name: 'National Snow and Ice Data Center' }
|
15
|
+
}.freeze
|
15
16
|
|
16
17
|
NOT_SPECIFIED = 'Not specified'
|
17
18
|
|
18
|
-
TEMPORAL_RESOLUTION_FACET_VALUES = %w
|
19
|
+
TEMPORAL_RESOLUTION_FACET_VALUES = %w[Subhourly Hourly Subdaily Daily Weekly Submonthly Monthly Subyearly Yearly Multiyearly].freeze
|
19
20
|
SUBHOURLY_INDEX = 0
|
20
21
|
HOURLY_INDEX = 1
|
21
22
|
SUBDAILY_INDEX = 2
|
@@ -27,7 +28,7 @@ module SearchSolrTools
|
|
27
28
|
YEARLY_INDEX = 8
|
28
29
|
MULTIYEARLY_INDEX = 9
|
29
30
|
|
30
|
-
SPATIAL_RESOLUTION_FACET_VALUES = ['0 - 500 m', '501 m - 1 km', '2 - 5 km', '6 - 15 km', '16 - 30 km', '>30 km']
|
31
|
+
SPATIAL_RESOLUTION_FACET_VALUES = ['0 - 500 m', '501 m - 1 km', '2 - 5 km', '6 - 15 km', '16 - 30 km', '>30 km'].freeze
|
31
32
|
SPATIAL_0_500_INDEX = 0
|
32
33
|
SPATIAL_501_1_INDEX = 1
|
33
34
|
SPATIAL_2_5_INDEX = 2
|
@@ -44,7 +45,7 @@ module SearchSolrTools
|
|
44
45
|
end
|
45
46
|
|
46
47
|
def self.temporal_display_str(date_range)
|
47
|
-
temporal_str =
|
48
|
+
temporal_str = (date_range[:start]).to_s
|
48
49
|
temporal_str += ",#{date_range[:end]}" unless date_range[:end].nil?
|
49
50
|
temporal_str
|
50
51
|
end
|
@@ -67,6 +68,7 @@ module SearchSolrTools
|
|
67
68
|
|
68
69
|
def self.get_temporal_duration_facet(duration)
|
69
70
|
return NOT_SPECIFIED if duration.nil?
|
71
|
+
|
70
72
|
years = duration.to_i / 365
|
71
73
|
temporal_duration_range(years)
|
72
74
|
end
|
@@ -86,31 +88,28 @@ module SearchSolrTools
|
|
86
88
|
def self.facet_binning(type, format_string)
|
87
89
|
binned_facet = bin(FacetConfiguration.get_facet_bin(type), format_string)
|
88
90
|
if binned_facet.nil?
|
89
|
-
|
91
|
+
format_string
|
90
92
|
elsif binned_facet.eql?('exclude')
|
91
|
-
|
93
|
+
nil
|
92
94
|
else
|
93
|
-
|
95
|
+
binned_facet
|
94
96
|
end
|
95
|
-
|
96
|
-
nil
|
97
97
|
end
|
98
98
|
|
99
99
|
def self.parameter_binning(parameter_string)
|
100
100
|
binned_parameter = bin(FacetConfiguration.get_facet_bin('parameter'), parameter_string)
|
101
101
|
# use variable_level_1 if no mapping exists
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
return binned_parameter
|
107
|
-
end
|
102
|
+
return binned_parameter unless binned_parameter.nil?
|
103
|
+
|
104
|
+
parts = parameter_string.split '>'
|
105
|
+
return parts[3].strip if parts.length >= 4
|
108
106
|
|
109
107
|
nil
|
110
108
|
end
|
111
109
|
|
112
110
|
def self.resolution_value(resolution, find_index_method, resolution_values)
|
113
|
-
return NOT_SPECIFIED if
|
111
|
+
return NOT_SPECIFIED if resolution_not_specified? resolution
|
112
|
+
|
114
113
|
if resolution['type'] == 'single'
|
115
114
|
i = send(find_index_method, resolution['resolution'])
|
116
115
|
return resolution_values[i]
|
@@ -120,12 +119,12 @@ module SearchSolrTools
|
|
120
119
|
j = send(find_index_method, resolution['max_resolution'])
|
121
120
|
return resolution_values[i..j]
|
122
121
|
end
|
123
|
-
|
122
|
+
raise "Invalid resolution #{resolution['type']}"
|
124
123
|
end
|
125
124
|
|
126
125
|
def self.resolution_not_specified?(resolution)
|
127
126
|
return true if resolution.to_s.empty?
|
128
|
-
return true unless %w
|
127
|
+
return true unless %w[single range].include? resolution['type']
|
129
128
|
return true if resolution['type'] == 'single' && resolution['resolution'].to_s.empty?
|
130
129
|
return true if resolution['type'] == 'range' && resolution['min_resolution'].to_s.empty?
|
131
130
|
end
|
@@ -140,6 +139,7 @@ module SearchSolrTools
|
|
140
139
|
else
|
141
140
|
facet = 'Between 1 and 170 degrees of latitude change | Regional'
|
142
141
|
end
|
142
|
+
|
143
143
|
facet
|
144
144
|
end
|
145
145
|
|
@@ -152,8 +152,6 @@ module SearchSolrTools
|
|
152
152
|
"#{d.iso8601[0..-7]}Z" unless d.nil?
|
153
153
|
end
|
154
154
|
|
155
|
-
private
|
156
|
-
|
157
155
|
MIN_DATE = '00010101'
|
158
156
|
MAX_DATE = Time.now.strftime('%Y%m%d')
|
159
157
|
|
@@ -166,7 +164,6 @@ module SearchSolrTools
|
|
166
164
|
nil
|
167
165
|
end
|
168
166
|
|
169
|
-
# rubocop:disable CyclomaticComplexity
|
170
167
|
def self.find_index_for_single_temporal_resolution_value(string_duration)
|
171
168
|
iso8601_duration = ISO8601::Duration.new(string_duration)
|
172
169
|
|
@@ -186,10 +183,9 @@ module SearchSolrTools
|
|
186
183
|
MULTIYEARLY_INDEX
|
187
184
|
end
|
188
185
|
end
|
189
|
-
# rubocop:enable CyclomaticComplexity
|
190
186
|
|
191
187
|
def self.find_index_for_single_spatial_resolution_value(string_duration)
|
192
|
-
value, units = string_duration.split
|
188
|
+
value, units = string_duration.split
|
193
189
|
|
194
190
|
if units == 'deg'
|
195
191
|
spatial_resolution_index_degrees(value)
|
@@ -234,11 +230,10 @@ module SearchSolrTools
|
|
234
230
|
end
|
235
231
|
|
236
232
|
def self.date?(date)
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
valid_date
|
233
|
+
return false unless date.is_a? String
|
234
|
+
|
235
|
+
d = DateTime.parse(date.strip) rescue false
|
236
|
+
DateTime.valid_date?(d.year, d.mon, d.day) unless d.eql?(false)
|
242
237
|
end
|
243
238
|
|
244
239
|
def self.format_date_for_index(date_str, default)
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'rgeo/geo_json'
|
2
4
|
|
3
5
|
require_relative 'bounding_box_util'
|
@@ -42,7 +44,7 @@ module SearchSolrTools
|
|
42
44
|
|
43
45
|
def self.geojson_to_spatial_area(spatial_coverage_geom)
|
44
46
|
spatial_areas = spatial_coverage_geom.map do |geo_json|
|
45
|
-
if %w
|
47
|
+
if %w[point].include?(geo_json.geometry_type.to_s.downcase)
|
46
48
|
0.0
|
47
49
|
else
|
48
50
|
bbox = RGeo::Cartesian::BoundingBox.create_from_geometry(geo_json)
|
@@ -50,11 +52,13 @@ module SearchSolrTools
|
|
50
52
|
end
|
51
53
|
end
|
52
54
|
return nil if spatial_areas.empty?
|
53
|
-
|
55
|
+
|
56
|
+
spatial_areas.max
|
54
57
|
end
|
55
58
|
|
56
59
|
def self.geojson_to_global_facet(spatial_coverage_geom)
|
57
60
|
return nil if spatial_coverage_geom.nil?
|
61
|
+
|
58
62
|
spatial_coverage_geom.each do |geo_json|
|
59
63
|
bbox_hash = BoundingBoxUtil.bounding_box_hash_from_geo_json(geo_json)
|
60
64
|
return 'Show Global Only' if BoundingBoxUtil.box_global?(bbox_hash)
|
@@ -63,13 +67,13 @@ module SearchSolrTools
|
|
63
67
|
end
|
64
68
|
|
65
69
|
def self.geojson_to_spatial_scope_facet(spatial_coverage_geom)
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
end
|
70
|
+
return if spatial_coverage_geom.nil?
|
71
|
+
|
72
|
+
spatial_coverage_geom.map do |geo_json|
|
73
|
+
bbox_hash = BoundingBoxUtil.bounding_box_hash_from_geo_json(geo_json)
|
74
|
+
scope = SolrFormat.get_spatial_scope_facet_with_bounding_box(bbox_hash)
|
75
|
+
scope unless scope.nil?
|
76
|
+
end.uniq
|
73
77
|
end
|
74
78
|
end
|
75
79
|
end
|
@@ -1,4 +1,5 @@
|
|
1
|
-
#
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
2
3
|
require 'rgeo/geo_json'
|
3
4
|
|
4
5
|
require 'search_solr_tools'
|
@@ -10,50 +11,50 @@ module SearchSolrTools
|
|
10
11
|
module Translators
|
11
12
|
# Translates NSIDC JSON format to Solr JSON add format
|
12
13
|
class NsidcJsonToSolr
|
13
|
-
PARAMETER_PARTS = %w
|
14
|
+
PARAMETER_PARTS = %w[category topic term variableLevel1 variableLevel2 variableLevel3 detailedVariable].freeze
|
14
15
|
|
15
16
|
# rubocop:disable Metrics/MethodLength
|
16
17
|
# rubocop:disable Metrics/AbcSize
|
17
18
|
def translate(json_doc)
|
18
|
-
copy_keys = %w
|
19
|
+
copy_keys = %w[title summary keywords brokered]
|
19
20
|
temporal_coverage_values = Helpers::TranslateTemporalCoverage.translate_coverages json_doc['temporalCoverages']
|
20
21
|
spatial_coverages = convert_spatial_coverages(json_doc['spatialCoverages'])
|
21
22
|
|
22
23
|
solr_add_hash = json_doc.select { |k, _v| copy_keys.include?(k) }
|
23
24
|
solr_add_hash.merge!(
|
24
|
-
'authoritative_id'
|
25
|
-
'dataset_version'
|
26
|
-
'data_centers'
|
27
|
-
'facet_data_center'
|
28
|
-
'authors'
|
29
|
-
'topics'
|
30
|
-
'parameters'
|
31
|
-
'full_parameters'
|
32
|
-
'facet_parameter'
|
33
|
-
'platforms'
|
34
|
-
'sensors'
|
35
|
-
'facet_sensor'
|
36
|
-
'published_date'
|
37
|
-
'spatial_coverages'
|
38
|
-
'spatial'
|
39
|
-
'spatial_area'
|
40
|
-
'facet_spatial_coverage'
|
41
|
-
'facet_spatial_scope'
|
42
|
-
'temporal_coverages'
|
43
|
-
'temporal_duration'
|
44
|
-
'temporal'
|
45
|
-
'facet_temporal_duration'
|
46
|
-
'last_revision_date'
|
47
|
-
'dataset_url'
|
48
|
-
'distribution_formats'
|
49
|
-
'facet_format'
|
50
|
-
'source'
|
51
|
-
'popularity'
|
52
|
-
'data_access_urls'
|
53
|
-
'facet_sponsored_program'
|
25
|
+
'authoritative_id' => json_doc['authoritativeId'],
|
26
|
+
'dataset_version' => json_doc['majorVersion']['version'],
|
27
|
+
'data_centers' => Helpers::SolrFormat::DATA_CENTER_NAMES[:NSIDC][:long_name],
|
28
|
+
'facet_data_center' => "#{Helpers::SolrFormat::DATA_CENTER_NAMES[:NSIDC][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:NSIDC][:short_name]}",
|
29
|
+
'authors' => translate_personnel_and_creators_to_authors(json_doc['personnel'], generate_data_citation_creators(json_doc['dataCitation'])),
|
30
|
+
'topics' => translate_iso_topic_categories(json_doc['isoTopicCategories']),
|
31
|
+
'parameters' => translate_parameters(json_doc['parameters']),
|
32
|
+
'full_parameters' => translate_json_string(json_doc['parameters'], PARAMETER_PARTS),
|
33
|
+
'facet_parameter' => translate_parameters_to_facet_parameters(json_doc['parameters']),
|
34
|
+
'platforms' => translate_json_string(json_doc['platforms']),
|
35
|
+
'sensors' => translate_json_string(json_doc['instruments']),
|
36
|
+
'facet_sensor' => translate_sensor_to_facet_sensor(json_doc['instruments']),
|
37
|
+
'published_date' => (Helpers::SolrFormat.date_str json_doc['releaseDate']),
|
38
|
+
'spatial_coverages' => Helpers::TranslateSpatialCoverage.geojson_to_spatial_display_str(spatial_coverages),
|
39
|
+
'spatial' => Helpers::TranslateSpatialCoverage.geojson_to_spatial_index_str(spatial_coverages),
|
40
|
+
'spatial_area' => Helpers::TranslateSpatialCoverage.geojson_to_spatial_area(spatial_coverages),
|
41
|
+
'facet_spatial_coverage' => Helpers::TranslateSpatialCoverage.geojson_to_global_facet(spatial_coverages),
|
42
|
+
'facet_spatial_scope' => Helpers::TranslateSpatialCoverage.geojson_to_spatial_scope_facet(spatial_coverages),
|
43
|
+
'temporal_coverages' => temporal_coverage_values['temporal_coverages'],
|
44
|
+
'temporal_duration' => temporal_coverage_values['temporal_duration'],
|
45
|
+
'temporal' => temporal_coverage_values['temporal'],
|
46
|
+
'facet_temporal_duration' => temporal_coverage_values['facet_temporal_duration'],
|
47
|
+
'last_revision_date' => (Helpers::SolrFormat.date_str json_doc['lastRevisionDate']),
|
48
|
+
'dataset_url' => json_doc['datasetUrl'],
|
49
|
+
'distribution_formats' => json_doc['distributionFormats'],
|
50
|
+
'facet_format' => json_doc['distributionFormats'].empty? ? [Helpers::SolrFormat::NOT_SPECIFIED] : translate_format_to_facet_format(json_doc['distributionFormats']),
|
51
|
+
'source' => %w[NSIDC ADE],
|
52
|
+
'popularity' => json_doc['popularity'],
|
53
|
+
'data_access_urls' => translate_data_access_urls(json_doc['dataAccessLinks']),
|
54
|
+
'facet_sponsored_program' => translate_short_long_names_to_facet_value(json_doc['internalDataCenters']),
|
54
55
|
'facet_temporal_resolution' => translate_temporal_resolution_facet_values(json_doc['parameters']),
|
55
|
-
'facet_spatial_resolution'
|
56
|
-
'sponsored_programs'
|
56
|
+
'facet_spatial_resolution' => translate_spatial_resolution_facet_values(json_doc['parameters']),
|
57
|
+
'sponsored_programs' => translate_internal_datacenters(json_doc['internalDataCenters'])
|
57
58
|
)
|
58
59
|
end
|
59
60
|
# rubocop:enable Metrics/MethodLength
|
@@ -70,13 +71,14 @@ module SearchSolrTools
|
|
70
71
|
def translate_sensor_to_facet_sensor(json)
|
71
72
|
facet_values = []
|
72
73
|
return facet_values if json.nil?
|
74
|
+
|
73
75
|
json.each do |json_entry|
|
74
76
|
sensor_bin = Helpers::SolrFormat.facet_binning('sensor', json_entry['shortName'].to_s)
|
75
|
-
if sensor_bin.eql? json_entry['shortName']
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
77
|
+
facet_values << if sensor_bin.eql? json_entry['shortName']
|
78
|
+
"#{json_entry['longName']} | #{json_entry['shortName']}"
|
79
|
+
else
|
80
|
+
" | #{sensor_bin}"
|
81
|
+
end
|
80
82
|
end
|
81
83
|
facet_values
|
82
84
|
end
|
@@ -100,12 +102,13 @@ module SearchSolrTools
|
|
100
102
|
end
|
101
103
|
|
102
104
|
def translate_iso_topic_categories(iso_topic_categories_json)
|
103
|
-
iso_topic_categories_json
|
105
|
+
iso_topic_categories_json&.map { |t| t['name'] }
|
104
106
|
end
|
105
107
|
|
106
108
|
def translate_data_access_urls(json)
|
107
109
|
values = []
|
108
110
|
return values if json.nil?
|
111
|
+
|
109
112
|
json.each do |json_entry|
|
110
113
|
link_display = json_entry['displayText'].nil? ? '' : json_entry['displayText']
|
111
114
|
link_type = json_entry['type'].nil? ? '' : json_entry['type']
|
@@ -120,6 +123,7 @@ module SearchSolrTools
|
|
120
123
|
def translate_internal_datacenters(json)
|
121
124
|
values = []
|
122
125
|
return values if json.nil?
|
126
|
+
|
123
127
|
json.each do |json_entry|
|
124
128
|
short_name = json_entry['shortName'].nil? ? '' : json_entry['shortName']
|
125
129
|
values << short_name
|
@@ -130,6 +134,7 @@ module SearchSolrTools
|
|
130
134
|
def translate_short_long_names_to_facet_value(json)
|
131
135
|
facet_values = []
|
132
136
|
return facet_values if json.nil?
|
137
|
+
|
133
138
|
json.each do |json_entry|
|
134
139
|
long_name = json_entry['longName'].nil? ? '' : json_entry['longName']
|
135
140
|
short_name = json_entry['shortName'].nil? ? '' : json_entry['shortName']
|
@@ -169,6 +174,7 @@ module SearchSolrTools
|
|
169
174
|
def translate_parameters_to_facet_parameters(parameters_json)
|
170
175
|
parameters_strings = translate_json_string(parameters_json, PARAMETER_PARTS)
|
171
176
|
return [] if parameters_strings.nil?
|
177
|
+
|
172
178
|
facet_params = []
|
173
179
|
parameters_strings.each do |str|
|
174
180
|
facet_params << Helpers::SolrFormat.parameter_binning(str)
|
@@ -199,8 +205,7 @@ module SearchSolrTools
|
|
199
205
|
end
|
200
206
|
|
201
207
|
def generate_data_citation_creators(data_citation)
|
202
|
-
data_citation.nil? ?
|
203
|
-
creators
|
208
|
+
data_citation.nil? ? [] : data_citation['creators']
|
204
209
|
end
|
205
210
|
|
206
211
|
def generate_part_array(json, limit_values = nil)
|
@@ -214,6 +219,5 @@ module SearchSolrTools
|
|
214
219
|
parts
|
215
220
|
end
|
216
221
|
end
|
217
|
-
# rubocop:enable Metrics/ClassLength
|
218
222
|
end
|
219
223
|
end
|
data/lib/search_solr_tools.rb
CHANGED
@@ -1,9 +1,11 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'search_solr_tools/config/environments'
|
2
4
|
require_relative 'search_solr_tools/version'
|
3
5
|
|
4
6
|
require_relative 'search_solr_tools/helpers/harvest_status'
|
5
7
|
require_relative 'search_solr_tools/errors/harvest_error'
|
6
8
|
|
7
|
-
%w
|
9
|
+
%w[harvesters translators].each do |subdir|
|
8
10
|
Dir[File.join(__dir__, 'search_solr_tools', subdir, '*.rb')].each { |file| require file }
|
9
11
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: search_solr_tools
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 6.
|
4
|
+
version: 6.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Chris Chalstrom
|
@@ -14,7 +14,7 @@ authors:
|
|
14
14
|
autorequire:
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
|
-
date: 2023-07-
|
17
|
+
date: 2023-07-24 00:00:00.000000000 Z
|
18
18
|
dependencies:
|
19
19
|
- !ruby/object:Gem::Dependency
|
20
20
|
name: ffi-geos
|
@@ -354,7 +354,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
354
354
|
- !ruby/object:Gem::Version
|
355
355
|
version: '0'
|
356
356
|
requirements: []
|
357
|
-
rubygems_version: 3.4.
|
357
|
+
rubygems_version: 3.4.17
|
358
358
|
signing_key:
|
359
359
|
specification_version: 4
|
360
360
|
summary: Tools to harvest and manage various scientific dataset feeds in a Solr instance.
|