search_solr_tools 6.2.0 → 6.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -3
- data/bin/search_solr_tools +4 -4
- data/lib/search_solr_tools/config/environments.rb +3 -1
- data/lib/search_solr_tools/errors/harvest_error.rb +44 -31
- data/lib/search_solr_tools/harvesters/auto_suggest.rb +5 -3
- data/lib/search_solr_tools/harvesters/base.rb +21 -19
- data/lib/search_solr_tools/harvesters/nsidc_auto_suggest.rb +7 -5
- data/lib/search_solr_tools/harvesters/nsidc_json.rb +9 -8
- data/lib/search_solr_tools/helpers/bounding_box_util.rb +8 -8
- data/lib/search_solr_tools/helpers/facet_configuration.rb +3 -1
- data/lib/search_solr_tools/helpers/harvest_status.rb +10 -8
- data/lib/search_solr_tools/helpers/iso_namespaces.rb +3 -1
- data/lib/search_solr_tools/helpers/solr_format.rb +25 -30
- data/lib/search_solr_tools/helpers/translate_spatial_coverage.rb +13 -9
- data/lib/search_solr_tools/helpers/translate_temporal_coverage.rb +2 -0
- data/lib/search_solr_tools/translators/nsidc_json.rb +48 -44
- data/lib/search_solr_tools/version.rb +3 -1
- data/lib/search_solr_tools.rb +3 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f9ced4643b8adbda2b5ef09192f036af86878e07243fe959448213762e0e5cc1
|
4
|
+
data.tar.gz: 0a5f27a7bc1d8c9c0c07a20b6fbf122d5a3b6163a5654db635d5c478ac4a21bc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cc66f8b40c62e2640fd72ce05aa2ac01aa76c58c730b6c445976fc7cf6e43b88cff29ec088f73e5dff913c879f7a1a31016cb634d851cfb3adb7b8bb735614c8
|
7
|
+
data.tar.gz: f896f7b473f977f0e349d422f6568774342e6bdb66e1a7dad1cf4477d0ffa7e9b05b81184898ab2d4d69c44f8ca98a3aa6791234926190484820cca4b439dc7d
|
data/CHANGELOG.md
CHANGED
@@ -1,12 +1,17 @@
|
|
1
|
-
## v6.
|
1
|
+
## v6.3.0 (2023-07-24)
|
2
|
+
|
3
|
+
- Update Rubocop configuration to actually run against files, and make
|
4
|
+
necessary corrections to comply with Rubocop styling.
|
5
|
+
|
6
|
+
## v6.2.0 (2023-07-18)
|
2
7
|
|
3
8
|
- Remove deprecated harvesters and associated tests, helpers, etc.
|
4
9
|
|
5
|
-
## v6.1.0 (
|
10
|
+
## v6.1.0 (2023-07-14)
|
6
11
|
|
7
12
|
- Updated a few other dependencies that weren't at the newest versions.
|
8
13
|
|
9
|
-
## v6.0.0 (
|
14
|
+
## v6.0.0 (2023-07-14)
|
10
15
|
|
11
16
|
- Updated Ruby to 3.2.2, updated gem dependencies to more recent versions.
|
12
17
|
|
data/bin/search_solr_tools
CHANGED
@@ -47,7 +47,7 @@ class SolrHarvestCLI < Thor
|
|
47
47
|
end
|
48
48
|
|
49
49
|
ping_status = SearchSolrTools::Helpers::HarvestStatus.new(
|
50
|
-
SearchSolrTools::Helpers::HarvestStatus::PING_SOLR
|
50
|
+
SearchSolrTools::Helpers::HarvestStatus::PING_SOLR => solr_success,
|
51
51
|
SearchSolrTools::Helpers::HarvestStatus::PING_SOURCE => source_success
|
52
52
|
)
|
53
53
|
raise SearchSolrTools::Errors::HarvestError, ping_status unless ping_status.ok?
|
@@ -64,9 +64,9 @@ class SolrHarvestCLI < Thor
|
|
64
64
|
puts "Target: #{target}"
|
65
65
|
begin
|
66
66
|
harvest_class = get_harvester_class(target)
|
67
|
-
harvester = harvest_class.new(options[:environment], die_on_failure)
|
67
|
+
harvester = harvest_class.new(options[:environment], die_on_failure:)
|
68
68
|
ping_status = SearchSolrTools::Helpers::HarvestStatus.new(
|
69
|
-
SearchSolrTools::Helpers::HarvestStatus::PING_SOLR
|
69
|
+
SearchSolrTools::Helpers::HarvestStatus::PING_SOLR => harvester.ping_solr,
|
70
70
|
SearchSolrTools::Helpers::HarvestStatus::PING_SOURCE => harvester.ping_source
|
71
71
|
)
|
72
72
|
raise SearchSolrTools::Errors::HarvestError, ping_status unless ping_status.ok?
|
@@ -120,7 +120,7 @@ class SolrHarvestCLI < Thor
|
|
120
120
|
no_tasks do
|
121
121
|
def harvester_map
|
122
122
|
{
|
123
|
-
'nsidc'
|
123
|
+
'nsidc' => SearchSolrTools::Harvesters::NsidcJson,
|
124
124
|
'nsidc_auto_suggest' => SearchSolrTools::Harvesters::NsidcAutoSuggest
|
125
125
|
}
|
126
126
|
end
|
@@ -1,9 +1,11 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'yaml'
|
2
4
|
|
3
5
|
module SearchSolrTools
|
4
6
|
# configuration to work with solr locally, or on integration/qa/staging/prod
|
5
7
|
module SolrEnvironments
|
6
|
-
YAML_ENVS = YAML.load_file(File.expand_path('
|
8
|
+
YAML_ENVS = YAML.load_file(File.expand_path('environments.yaml', __dir__))
|
7
9
|
|
8
10
|
def self.[](env = :development)
|
9
11
|
YAML_ENVS[:common].merge(YAML_ENVS[env.to_sym])
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module SearchSolrTools
|
2
4
|
module Errors
|
3
5
|
class HarvestError < StandardError
|
@@ -10,34 +12,47 @@ module SearchSolrTools
|
|
10
12
|
ERRCODE_OTHER = 128
|
11
13
|
|
12
14
|
ERRCODE_DESC = {
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
15
|
+
ERRCODE_SOLR_PING => 'Solr instance did not return a successful ping',
|
16
|
+
ERRCODE_SOURCE_PING => 'Source to be harvested did not return a successful ping',
|
17
|
+
ERRCODE_SOURCE_NO_RESULTS => 'Source to be harvested returned no documents matching query',
|
18
|
+
ERRCODE_SOURCE_HARVEST_ERROR => 'One or more source documents returned an error when trying to retrieve or translate',
|
19
|
+
ERRCODE_DOCUMENT_INVALID => 'One or more documents to be harvested was invalid (malformed)',
|
20
|
+
ERRCODE_INGEST_ERROR => 'Solr returned an error trying to ingest one or more harvested documents',
|
21
|
+
ERRCODE_OTHER => 'General error code for non-harvest related issues'
|
20
22
|
}.freeze
|
21
23
|
|
22
24
|
PING_ERRCODE_MAP = {
|
23
|
-
'ping_solr'
|
24
|
-
'ping_source' => ERRCODE_SOURCE_PING
|
25
|
-
}
|
25
|
+
'ping_solr' => ERRCODE_SOLR_PING,
|
26
|
+
'ping_source' => ERRCODE_SOURCE_PING
|
27
|
+
}.freeze
|
26
28
|
|
27
29
|
STATUS_ERRCODE_MAP = {
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
30
|
+
Helpers::HarvestStatus::HARVEST_NO_DOCS => ERRCODE_SOURCE_NO_RESULTS,
|
31
|
+
Helpers::HarvestStatus::HARVEST_FAILURE => ERRCODE_SOURCE_HARVEST_ERROR,
|
32
|
+
Helpers::HarvestStatus::INGEST_ERR_INVALID_DOC => ERRCODE_DOCUMENT_INVALID,
|
33
|
+
Helpers::HarvestStatus::INGEST_ERR_SOLR_ERROR => ERRCODE_INGEST_ERROR,
|
34
|
+
Helpers::HarvestStatus::OTHER_ERROR => ERRCODE_OTHER
|
33
35
|
}.freeze
|
34
36
|
|
35
37
|
# If code is -1, it means display all error codes
|
36
38
|
def self.describe_exit_code(code = -1)
|
39
|
+
code_list = code_to_list(code)
|
40
|
+
|
41
|
+
codes = {}
|
42
|
+
code_list.each do |k|
|
43
|
+
next if code == -1 && !ERRCODE_DESC.keys.include?(k) # skip INVALID CODE if showing all codes
|
44
|
+
|
45
|
+
codes[k] = ERRCODE_DESC.keys.include?(k) ? ERRCODE_DESC[k] : 'INVALID CODE NUMBER'
|
46
|
+
end
|
47
|
+
|
48
|
+
codes
|
49
|
+
end
|
50
|
+
|
51
|
+
# Loop through all bit-flag values to produce a list of integers
|
52
|
+
def self.code_to_list(code)
|
37
53
|
code = code.to_i
|
38
54
|
code_list = []
|
39
55
|
|
40
|
-
# Loop through all bit-flag values
|
41
56
|
[128, 64, 32, 16, 8, 4, 2, 1].each do |k|
|
42
57
|
if code >= k || code == -1
|
43
58
|
code_list.prepend k
|
@@ -45,20 +60,17 @@ module SearchSolrTools
|
|
45
60
|
end
|
46
61
|
end
|
47
62
|
|
48
|
-
|
49
|
-
code_list.each do |k|
|
50
|
-
next if code == -1 && !ERRCODE_DESC.keys.include?(k) # skip INVALID CODE if showing all codes
|
51
|
-
codes[k] = ERRCODE_DESC.keys.include?(k) ? ERRCODE_DESC[k] : 'INVALID CODE NUMBER'
|
52
|
-
end
|
53
|
-
|
54
|
-
codes
|
63
|
+
code_list
|
55
64
|
end
|
56
65
|
|
57
|
-
def initialize(status, message=nil)
|
66
|
+
def initialize(status, message = nil)
|
58
67
|
@status_data = status
|
59
68
|
@other_message = message
|
69
|
+
|
70
|
+
super message
|
60
71
|
end
|
61
72
|
|
73
|
+
# rubocop:disable Metrics/AbcSize
|
62
74
|
def exit_code
|
63
75
|
if @status_data.nil?
|
64
76
|
puts "OTHER ERROR REPORTED: #{@other_message}"
|
@@ -70,19 +82,20 @@ module SearchSolrTools
|
|
70
82
|
code = 0
|
71
83
|
code += ERRCODE_SOLR_PING unless @status_data.ping_solr
|
72
84
|
code += ERRCODE_SOURCE_PING unless @status_data.ping_source
|
73
|
-
code += ERRCODE_SOURCE_NO_RESULTS if @status_data.status[Helpers::HarvestStatus::HARVEST_NO_DOCS]
|
74
|
-
code += ERRCODE_SOURCE_HARVEST_ERROR if @status_data.status[Helpers::HarvestStatus::HARVEST_FAILURE]
|
75
|
-
code += ERRCODE_DOCUMENT_INVALID if @status_data.status[Helpers::HarvestStatus::INGEST_ERR_INVALID_DOC]
|
76
|
-
code += ERRCODE_INGEST_ERROR if @status_data.status[Helpers::HarvestStatus::INGEST_ERR_SOLR_ERROR]
|
85
|
+
code += ERRCODE_SOURCE_NO_RESULTS if @status_data.status[Helpers::HarvestStatus::HARVEST_NO_DOCS].positive?
|
86
|
+
code += ERRCODE_SOURCE_HARVEST_ERROR if @status_data.status[Helpers::HarvestStatus::HARVEST_FAILURE].positive?
|
87
|
+
code += ERRCODE_DOCUMENT_INVALID if @status_data.status[Helpers::HarvestStatus::INGEST_ERR_INVALID_DOC].positive?
|
88
|
+
code += ERRCODE_INGEST_ERROR if @status_data.status[Helpers::HarvestStatus::INGEST_ERR_SOLR_ERROR].positive?
|
77
89
|
|
78
|
-
code = ERRCODE_OTHER if code
|
90
|
+
code = ERRCODE_OTHER if code.zero?
|
79
91
|
|
80
92
|
code
|
81
93
|
end
|
94
|
+
# rubocop:enable Metrics/AbcSize
|
82
95
|
|
83
96
|
def message
|
84
|
-
self.class.describe_exit_code(exit_code).map{|
|
97
|
+
self.class.describe_exit_code(exit_code).map { |_c, v| v }.join("\n")
|
85
98
|
end
|
86
99
|
end
|
87
100
|
end
|
88
|
-
end
|
101
|
+
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'base'
|
2
4
|
require 'json'
|
3
5
|
require 'rest-client'
|
@@ -6,8 +8,8 @@ module SearchSolrTools
|
|
6
8
|
module Harvesters
|
7
9
|
# Use the nsidc_oai core to populate the auto_suggest core
|
8
10
|
class AutoSuggest < Base
|
9
|
-
def initialize(env = 'development', die_on_failure
|
10
|
-
super
|
11
|
+
def initialize(env = 'development', die_on_failure: false)
|
12
|
+
super
|
11
13
|
@env_settings = SolrEnvironments[@environment] # super sets @environment.
|
12
14
|
end
|
13
15
|
|
@@ -50,7 +52,7 @@ module SearchSolrTools
|
|
50
52
|
|
51
53
|
if status == Helpers::HarvestStatus::INGEST_OK
|
52
54
|
puts "Added #{add_docs.size} auto suggest documents in one commit"
|
53
|
-
|
55
|
+
Helpers::HarvestStatus.new(Helpers::HarvestStatus::INGEST_OK => add_docs)
|
54
56
|
else
|
55
57
|
puts "Failed adding #{add_docs.size} documents in single commit, retrying one by one"
|
56
58
|
new_add_docs = []
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'multi_json'
|
2
4
|
require 'nokogiri'
|
3
5
|
require 'open-uri'
|
@@ -9,7 +11,6 @@ require 'search_solr_tools'
|
|
9
11
|
require_relative '../helpers/iso_namespaces'
|
10
12
|
require_relative '../helpers/solr_format'
|
11
13
|
|
12
|
-
|
13
14
|
module SearchSolrTools
|
14
15
|
module Harvesters
|
15
16
|
# base class for solr harvesters
|
@@ -20,7 +21,7 @@ module SearchSolrTools
|
|
20
21
|
XML_CONTENT_TYPE = 'text/xml; charset=utf-8'
|
21
22
|
JSON_CONTENT_TYPE = 'application/json; charset=utf-8'
|
22
23
|
|
23
|
-
def initialize(env = 'development', die_on_failure
|
24
|
+
def initialize(env = 'development', die_on_failure: false)
|
24
25
|
@environment = env
|
25
26
|
@die_on_failure = die_on_failure
|
26
27
|
end
|
@@ -51,7 +52,7 @@ module SearchSolrTools
|
|
51
52
|
success = response.code == 200
|
52
53
|
puts "Error in ping request: #{response.body}" unless success
|
53
54
|
end
|
54
|
-
rescue => e
|
55
|
+
rescue StandardError => e
|
55
56
|
puts "Rest exception while pinging Solr: #{e}"
|
56
57
|
end
|
57
58
|
success
|
@@ -61,7 +62,7 @@ module SearchSolrTools
|
|
61
62
|
# to "ping" the data center. Returns true if the ping is successful (or, as
|
62
63
|
# in this default, no ping method was defined)
|
63
64
|
def ping_source
|
64
|
-
puts
|
65
|
+
puts 'Harvester does not have ping method defined, assuming true'
|
65
66
|
true
|
66
67
|
end
|
67
68
|
|
@@ -74,12 +75,12 @@ module SearchSolrTools
|
|
74
75
|
harvest_status
|
75
76
|
end
|
76
77
|
|
77
|
-
def delete_old_documents(timestamp, constraints, solr_core, force
|
78
|
+
def delete_old_documents(timestamp, constraints, solr_core, force: false)
|
78
79
|
constraints = sanitize_data_centers_constraints(constraints)
|
79
80
|
delete_query = "last_update:[* TO #{timestamp}] AND #{constraints}"
|
80
81
|
solr = RSolr.connect url: solr_url + "/#{solr_core}"
|
81
82
|
unchanged_count = (solr.get 'select', params: { wt: :ruby, q: delete_query, rows: 0 })['response']['numFound'].to_i
|
82
|
-
if unchanged_count
|
83
|
+
if unchanged_count.zero?
|
83
84
|
puts "All documents were updated after #{timestamp}, nothing to delete"
|
84
85
|
else
|
85
86
|
puts "Begin removing documents older than #{timestamp}"
|
@@ -90,8 +91,8 @@ module SearchSolrTools
|
|
90
91
|
def sanitize_data_centers_constraints(query_string)
|
91
92
|
# Remove lucene special characters, preserve the query parameter and compress whitespace
|
92
93
|
query_string.gsub!(/[:&|!~\-\(\)\{\}\[\]\^\*\?\+]+/, ' ')
|
93
|
-
query_string.gsub!(
|
94
|
-
query_string.gsub!(
|
94
|
+
query_string.gsub!('data_centers ', 'data_centers:')
|
95
|
+
query_string.gsub!('source ', 'source:')
|
95
96
|
query_string.squeeze(' ').strip
|
96
97
|
end
|
97
98
|
|
@@ -126,7 +127,7 @@ module SearchSolrTools
|
|
126
127
|
status
|
127
128
|
end
|
128
129
|
|
129
|
-
# TODO Need to return a specific type of failure:
|
130
|
+
# TODO: Need to return a specific type of failure:
|
130
131
|
# - Bad record content identified and no ingest attempted
|
131
132
|
# - Solr tries to ingest document and fails (bad content not detected prior to ingest)
|
132
133
|
# - Solr cannot insert document for reasons other than the document structure and content.
|
@@ -142,15 +143,15 @@ module SearchSolrTools
|
|
142
143
|
|
143
144
|
# Some docs will cause solr to time out during the POST
|
144
145
|
begin
|
145
|
-
RestClient.post(url, doc_serialized, content_type:
|
146
|
+
RestClient.post(url, doc_serialized, content_type:) do |response, _request, _result|
|
146
147
|
success = response.code == 200
|
147
148
|
unless success
|
148
149
|
puts "Error for #{doc_serialized}\n\n response: #{response.body}"
|
149
150
|
status = Helpers::HarvestStatus::INGEST_ERR_SOLR_ERROR
|
150
151
|
end
|
151
152
|
end
|
152
|
-
rescue => e
|
153
|
-
# TODO Need to provide more detail re: this failure so we know whether to
|
153
|
+
rescue StandardError => e
|
154
|
+
# TODO: Need to provide more detail re: this failure so we know whether to
|
154
155
|
# exit the job with a status != 0
|
155
156
|
puts "Rest exception while POSTing to Solr: #{e}, for doc: #{doc_serialized}"
|
156
157
|
status = Helpers::HarvestStatus::INGEST_ERR_SOLR_ERROR
|
@@ -160,11 +161,11 @@ module SearchSolrTools
|
|
160
161
|
|
161
162
|
def get_serialized_doc(doc, content_type)
|
162
163
|
if content_type.eql?(XML_CONTENT_TYPE)
|
163
|
-
|
164
|
+
doc.respond_to?(:to_xml) ? doc.to_xml : doc
|
164
165
|
elsif content_type.eql?(JSON_CONTENT_TYPE)
|
165
|
-
|
166
|
+
MultiJson.dump(doc)
|
166
167
|
else
|
167
|
-
|
168
|
+
doc
|
168
169
|
end
|
169
170
|
end
|
170
171
|
|
@@ -177,17 +178,18 @@ module SearchSolrTools
|
|
177
178
|
|
178
179
|
begin
|
179
180
|
puts "Request: #{request_url}"
|
180
|
-
response = URI.open(
|
181
|
+
response = URI.parse(request_url).open(read_timeout: timeout, 'Content-Type' => content_type)
|
181
182
|
rescue OpenURI::HTTPError, Timeout::Error, Errno::ETIMEDOUT => e
|
182
183
|
retries_left -= 1
|
183
184
|
puts "## REQUEST FAILED ## #{e.class} ## Retrying #{retries_left} more times..."
|
184
185
|
|
185
|
-
retry if retries_left
|
186
|
+
retry if retries_left.positive?
|
186
187
|
|
187
|
-
# TODO
|
188
|
+
# TODO: Do we really need this "die_on_failure" anymore? The empty return
|
188
189
|
# will cause the "No Documents" error to be thrown in the harvester class
|
189
190
|
# now, so it will pretty much always "die on failure"
|
190
191
|
raise e if @die_on_failure
|
192
|
+
|
191
193
|
return
|
192
194
|
end
|
193
195
|
doc = Nokogiri.XML(response)
|
@@ -215,7 +217,7 @@ module SearchSolrTools
|
|
215
217
|
spatial_coverages = doc.xpath(".//field[@name='spatial_coverages']").first
|
216
218
|
return true if spatial_coverages.nil?
|
217
219
|
|
218
|
-
spatial_coverages = spatial_coverages.text.split
|
220
|
+
spatial_coverages = spatial_coverages.text.split
|
219
221
|
|
220
222
|
# We've only seen the failure with 4 spatial coverage values
|
221
223
|
return true if spatial_coverages.size < 4
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'auto_suggest'
|
2
4
|
|
3
5
|
module SearchSolrTools
|
@@ -16,11 +18,11 @@ module SearchSolrTools
|
|
16
18
|
def fields
|
17
19
|
{
|
18
20
|
'authoritative_id' => { weight: 1, source: 'NSIDC', creator: method(:standard_add_creator) },
|
19
|
-
'full_title'
|
20
|
-
'copy_parameters'
|
21
|
-
'full_platforms'
|
22
|
-
'full_sensors'
|
23
|
-
'full_authors'
|
21
|
+
'full_title' => { weight: 2, source: 'NSIDC', creator: method(:standard_add_creator) },
|
22
|
+
'copy_parameters' => { weight: 5, source: 'NSIDC', creator: method(:standard_add_creator) },
|
23
|
+
'full_platforms' => { weight: 2, source: 'NSIDC', creator: method(:short_full_split_add_creator) },
|
24
|
+
'full_sensors' => { weight: 2, source: 'NSIDC', creator: method(:short_full_split_add_creator) },
|
25
|
+
'full_authors' => { weight: 1, source: 'NSIDC', creator: method(:standard_add_creator) }
|
24
26
|
}
|
25
27
|
end
|
26
28
|
|
@@ -1,15 +1,16 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'json'
|
2
4
|
require 'rest-client'
|
3
5
|
|
4
6
|
require 'search_solr_tools'
|
5
7
|
|
6
|
-
|
7
8
|
module SearchSolrTools
|
8
9
|
module Harvesters
|
9
10
|
# Harvests data from NSIDC OAI and inserts it into Solr after it has been translated
|
10
11
|
class NsidcJson < Base
|
11
|
-
def initialize(env = 'development', die_on_failure
|
12
|
-
super
|
12
|
+
def initialize(env = 'development', die_on_failure: false)
|
13
|
+
super
|
13
14
|
@translator = Translators::NsidcJsonToSolr.new
|
14
15
|
Helpers::FacetConfiguration.import_bin_configuration(env)
|
15
16
|
end
|
@@ -19,7 +20,7 @@ module SearchSolrTools
|
|
19
20
|
RestClient.options(nsidc_json_url) do |response, _request, _result|
|
20
21
|
return response.code == 200
|
21
22
|
end
|
22
|
-
rescue
|
23
|
+
rescue StandardError
|
23
24
|
puts "Error trying to get options for #{nsidc_json_url} (ping)"
|
24
25
|
end
|
25
26
|
false
|
@@ -37,7 +38,7 @@ module SearchSolrTools
|
|
37
38
|
|
38
39
|
status = insert_solr_docs result[:add_docs], Base::JSON_CONTENT_TYPE
|
39
40
|
|
40
|
-
status.record_status(Helpers::HarvestStatus::HARVEST_NO_DOCS) if result[:num_docs]
|
41
|
+
status.record_status(Helpers::HarvestStatus::HARVEST_NO_DOCS) if (result[:num_docs]).zero?
|
41
42
|
|
42
43
|
# Record the number of harvest failures; note that if this is 0, thats OK, the status will stay at 0
|
43
44
|
status.record_status(Helpers::HarvestStatus::HARVEST_FAILURE, result[:failure_ids].length)
|
@@ -66,7 +67,7 @@ module SearchSolrTools
|
|
66
67
|
# @param id [String] NSIDC authoritative ID for the dataset
|
67
68
|
# @return [Hash] Parsed version of the JSON response
|
68
69
|
def fetch_json_from_nsidc(id)
|
69
|
-
json_response = RestClient.get(nsidc_json_url
|
70
|
+
json_response = RestClient.get("#{nsidc_json_url}#{id}.json")
|
70
71
|
JSON.parse(json_response)
|
71
72
|
end
|
72
73
|
|
@@ -81,13 +82,13 @@ module SearchSolrTools
|
|
81
82
|
id = r.text.split('/').last
|
82
83
|
begin
|
83
84
|
docs << { 'add' => { 'doc' => @translator.translate(fetch_json_from_nsidc(id)) } }
|
84
|
-
rescue => e
|
85
|
+
rescue StandardError => e
|
85
86
|
puts "Failed to fetch #{id} with error #{e}: #{e.backtrace}"
|
86
87
|
failure_ids << id
|
87
88
|
end
|
88
89
|
end
|
89
90
|
|
90
|
-
{ num_docs: all_docs.size, add_docs: docs, failure_ids:
|
91
|
+
{ num_docs: all_docs.size, add_docs: docs, failure_ids: }
|
91
92
|
end
|
92
93
|
end
|
93
94
|
end
|
@@ -1,4 +1,6 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'iso_namespaces'
|
2
4
|
|
3
5
|
module SearchSolrTools
|
4
6
|
module Helpers
|
@@ -8,12 +10,10 @@ module SearchSolrTools
|
|
8
10
|
NORTHERN_GLOBAL_BOUNDARY = 85.0
|
9
11
|
|
10
12
|
def self.bounding_box_hash_from_geo_json(geometry)
|
11
|
-
if geometry_is_point?(geometry)
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
return { west: bbox.min_x.to_s, south: bbox.min_y.to_s, east: bbox.max_x.to_s, north: bbox.max_y.to_s }
|
16
|
-
end
|
13
|
+
return { west: geometry.x.to_s, south: geometry.y.to_s, east: geometry.x.to_s, north: geometry.y.to_s } if geometry_is_point?(geometry)
|
14
|
+
|
15
|
+
bbox = RGeo::Cartesian::BoundingBox.create_from_geometry(geometry)
|
16
|
+
{ west: bbox.min_x.to_s, south: bbox.min_y.to_s, east: bbox.max_x.to_s, north: bbox.max_y.to_s }
|
17
17
|
end
|
18
18
|
|
19
19
|
def self.geometry_is_point?(geometry)
|
@@ -30,7 +30,7 @@ module SearchSolrTools
|
|
30
30
|
end
|
31
31
|
|
32
32
|
def self.box_invalid?(box)
|
33
|
-
[
|
33
|
+
%i[north south east west].any? { |d| box[d].to_s.empty? }
|
34
34
|
end
|
35
35
|
end
|
36
36
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'json'
|
2
4
|
require 'rest_client'
|
3
5
|
require 'singleton'
|
@@ -8,7 +10,7 @@ module SearchSolrTools
|
|
8
10
|
class FacetConfiguration
|
9
11
|
include Singleton
|
10
12
|
def self.import_bin_configuration(env)
|
11
|
-
@bin_configuration = JSON.parse(RestClient.get(SolrEnvironments[env][:nsidc_dataset_metadata_url]
|
13
|
+
@bin_configuration = JSON.parse(RestClient.get("#{SolrEnvironments[env][:nsidc_dataset_metadata_url]}binConfiguration")) if @bin_configuration.nil?
|
12
14
|
end
|
13
15
|
|
14
16
|
def self.get_facet_bin(facet_name)
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module SearchSolrTools
|
2
4
|
module Helpers
|
3
5
|
class HarvestStatus
|
@@ -7,17 +9,17 @@ module SearchSolrTools
|
|
7
9
|
INGEST_ERR_INVALID_DOC = :invalid
|
8
10
|
INGEST_ERR_SOLR_ERROR = :solr_error
|
9
11
|
OTHER_ERROR = :other
|
10
|
-
PING_SOLR = :ping_solr
|
11
|
-
PING_SOURCE = :ping_source
|
12
|
+
PING_SOLR = :ping_solr # used for initialize only
|
13
|
+
PING_SOURCE = :ping_source # used for initialize only
|
12
14
|
|
13
|
-
ERROR_STATUS = [HARVEST_NO_DOCS, HARVEST_FAILURE, INGEST_ERR_INVALID_DOC, INGEST_ERR_SOLR_ERROR, OTHER_ERROR]
|
15
|
+
ERROR_STATUS = [HARVEST_NO_DOCS, HARVEST_FAILURE, INGEST_ERR_INVALID_DOC, INGEST_ERR_SOLR_ERROR, OTHER_ERROR].freeze
|
14
16
|
|
15
|
-
|
16
|
-
|
17
|
+
attr_accessor :ping_solr, :ping_source
|
18
|
+
attr_reader :status
|
17
19
|
|
18
20
|
# init_info is an optional hash that contains the various status keys and the documents to
|
19
21
|
# associate with them
|
20
|
-
def initialize(init_info={})
|
22
|
+
def initialize(init_info = {})
|
21
23
|
@status = { INGEST_OK => 0 }
|
22
24
|
@ping_solr = true
|
23
25
|
@ping_source = true
|
@@ -36,9 +38,9 @@ module SearchSolrTools
|
|
36
38
|
end
|
37
39
|
|
38
40
|
def ok?
|
39
|
-
ERROR_STATUS.each { |s| return false unless @status[s]
|
41
|
+
ERROR_STATUS.each { |s| return false unless (@status[s]).zero? }
|
40
42
|
@ping_solr && @ping_source
|
41
43
|
end
|
42
44
|
end
|
43
45
|
end
|
44
|
-
end
|
46
|
+
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module SearchSolrTools
|
2
4
|
module Helpers
|
3
5
|
# Helper class to provide default namespaces for XML document parsing.
|
@@ -25,7 +27,7 @@ module SearchSolrTools
|
|
25
27
|
'srv' => 'http://www.isotc211.org/2005/srv',
|
26
28
|
'xlink' => 'http://www.w3.org/1999/xlink',
|
27
29
|
'xsi' => 'http://www.w3.org/2001/XMLSchema-instance'
|
28
|
-
}
|
30
|
+
}.freeze
|
29
31
|
end
|
30
32
|
end
|
31
33
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'date'
|
2
4
|
require 'iso8601'
|
3
5
|
|
@@ -7,15 +9,14 @@ require_relative 'facet_configuration'
|
|
7
9
|
module SearchSolrTools
|
8
10
|
module Helpers
|
9
11
|
# Methods for generating formatted values that can be indexed by SOLR
|
10
|
-
# rubocop:disable Metrics/ModuleLength
|
11
12
|
module SolrFormat
|
12
13
|
DATA_CENTER_NAMES = {
|
13
|
-
NSIDC: { short_name: 'NSIDC', long_name: 'National Snow and Ice Data Center' }
|
14
|
-
}
|
14
|
+
NSIDC: { short_name: 'NSIDC', long_name: 'National Snow and Ice Data Center' }
|
15
|
+
}.freeze
|
15
16
|
|
16
17
|
NOT_SPECIFIED = 'Not specified'
|
17
18
|
|
18
|
-
TEMPORAL_RESOLUTION_FACET_VALUES = %w
|
19
|
+
TEMPORAL_RESOLUTION_FACET_VALUES = %w[Subhourly Hourly Subdaily Daily Weekly Submonthly Monthly Subyearly Yearly Multiyearly].freeze
|
19
20
|
SUBHOURLY_INDEX = 0
|
20
21
|
HOURLY_INDEX = 1
|
21
22
|
SUBDAILY_INDEX = 2
|
@@ -27,7 +28,7 @@ module SearchSolrTools
|
|
27
28
|
YEARLY_INDEX = 8
|
28
29
|
MULTIYEARLY_INDEX = 9
|
29
30
|
|
30
|
-
SPATIAL_RESOLUTION_FACET_VALUES = ['0 - 500 m', '501 m - 1 km', '2 - 5 km', '6 - 15 km', '16 - 30 km', '>30 km']
|
31
|
+
SPATIAL_RESOLUTION_FACET_VALUES = ['0 - 500 m', '501 m - 1 km', '2 - 5 km', '6 - 15 km', '16 - 30 km', '>30 km'].freeze
|
31
32
|
SPATIAL_0_500_INDEX = 0
|
32
33
|
SPATIAL_501_1_INDEX = 1
|
33
34
|
SPATIAL_2_5_INDEX = 2
|
@@ -44,7 +45,7 @@ module SearchSolrTools
|
|
44
45
|
end
|
45
46
|
|
46
47
|
def self.temporal_display_str(date_range)
|
47
|
-
temporal_str =
|
48
|
+
temporal_str = (date_range[:start]).to_s
|
48
49
|
temporal_str += ",#{date_range[:end]}" unless date_range[:end].nil?
|
49
50
|
temporal_str
|
50
51
|
end
|
@@ -67,6 +68,7 @@ module SearchSolrTools
|
|
67
68
|
|
68
69
|
def self.get_temporal_duration_facet(duration)
|
69
70
|
return NOT_SPECIFIED if duration.nil?
|
71
|
+
|
70
72
|
years = duration.to_i / 365
|
71
73
|
temporal_duration_range(years)
|
72
74
|
end
|
@@ -86,31 +88,28 @@ module SearchSolrTools
|
|
86
88
|
def self.facet_binning(type, format_string)
|
87
89
|
binned_facet = bin(FacetConfiguration.get_facet_bin(type), format_string)
|
88
90
|
if binned_facet.nil?
|
89
|
-
|
91
|
+
format_string
|
90
92
|
elsif binned_facet.eql?('exclude')
|
91
|
-
|
93
|
+
nil
|
92
94
|
else
|
93
|
-
|
95
|
+
binned_facet
|
94
96
|
end
|
95
|
-
|
96
|
-
nil
|
97
97
|
end
|
98
98
|
|
99
99
|
def self.parameter_binning(parameter_string)
|
100
100
|
binned_parameter = bin(FacetConfiguration.get_facet_bin('parameter'), parameter_string)
|
101
101
|
# use variable_level_1 if no mapping exists
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
return binned_parameter
|
107
|
-
end
|
102
|
+
return binned_parameter unless binned_parameter.nil?
|
103
|
+
|
104
|
+
parts = parameter_string.split '>'
|
105
|
+
return parts[3].strip if parts.length >= 4
|
108
106
|
|
109
107
|
nil
|
110
108
|
end
|
111
109
|
|
112
110
|
def self.resolution_value(resolution, find_index_method, resolution_values)
|
113
|
-
return NOT_SPECIFIED if
|
111
|
+
return NOT_SPECIFIED if resolution_not_specified? resolution
|
112
|
+
|
114
113
|
if resolution['type'] == 'single'
|
115
114
|
i = send(find_index_method, resolution['resolution'])
|
116
115
|
return resolution_values[i]
|
@@ -120,12 +119,12 @@ module SearchSolrTools
|
|
120
119
|
j = send(find_index_method, resolution['max_resolution'])
|
121
120
|
return resolution_values[i..j]
|
122
121
|
end
|
123
|
-
|
122
|
+
raise "Invalid resolution #{resolution['type']}"
|
124
123
|
end
|
125
124
|
|
126
125
|
def self.resolution_not_specified?(resolution)
|
127
126
|
return true if resolution.to_s.empty?
|
128
|
-
return true unless %w
|
127
|
+
return true unless %w[single range].include? resolution['type']
|
129
128
|
return true if resolution['type'] == 'single' && resolution['resolution'].to_s.empty?
|
130
129
|
return true if resolution['type'] == 'range' && resolution['min_resolution'].to_s.empty?
|
131
130
|
end
|
@@ -140,6 +139,7 @@ module SearchSolrTools
|
|
140
139
|
else
|
141
140
|
facet = 'Between 1 and 170 degrees of latitude change | Regional'
|
142
141
|
end
|
142
|
+
|
143
143
|
facet
|
144
144
|
end
|
145
145
|
|
@@ -152,8 +152,6 @@ module SearchSolrTools
|
|
152
152
|
"#{d.iso8601[0..-7]}Z" unless d.nil?
|
153
153
|
end
|
154
154
|
|
155
|
-
private
|
156
|
-
|
157
155
|
MIN_DATE = '00010101'
|
158
156
|
MAX_DATE = Time.now.strftime('%Y%m%d')
|
159
157
|
|
@@ -166,7 +164,6 @@ module SearchSolrTools
|
|
166
164
|
nil
|
167
165
|
end
|
168
166
|
|
169
|
-
# rubocop:disable CyclomaticComplexity
|
170
167
|
def self.find_index_for_single_temporal_resolution_value(string_duration)
|
171
168
|
iso8601_duration = ISO8601::Duration.new(string_duration)
|
172
169
|
|
@@ -186,10 +183,9 @@ module SearchSolrTools
|
|
186
183
|
MULTIYEARLY_INDEX
|
187
184
|
end
|
188
185
|
end
|
189
|
-
# rubocop:enable CyclomaticComplexity
|
190
186
|
|
191
187
|
def self.find_index_for_single_spatial_resolution_value(string_duration)
|
192
|
-
value, units = string_duration.split
|
188
|
+
value, units = string_duration.split
|
193
189
|
|
194
190
|
if units == 'deg'
|
195
191
|
spatial_resolution_index_degrees(value)
|
@@ -234,11 +230,10 @@ module SearchSolrTools
|
|
234
230
|
end
|
235
231
|
|
236
232
|
def self.date?(date)
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
valid_date
|
233
|
+
return false unless date.is_a? String
|
234
|
+
|
235
|
+
d = DateTime.parse(date.strip) rescue false
|
236
|
+
DateTime.valid_date?(d.year, d.mon, d.day) unless d.eql?(false)
|
242
237
|
end
|
243
238
|
|
244
239
|
def self.format_date_for_index(date_str, default)
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'rgeo/geo_json'
|
2
4
|
|
3
5
|
require_relative 'bounding_box_util'
|
@@ -42,7 +44,7 @@ module SearchSolrTools
|
|
42
44
|
|
43
45
|
def self.geojson_to_spatial_area(spatial_coverage_geom)
|
44
46
|
spatial_areas = spatial_coverage_geom.map do |geo_json|
|
45
|
-
if %w
|
47
|
+
if %w[point].include?(geo_json.geometry_type.to_s.downcase)
|
46
48
|
0.0
|
47
49
|
else
|
48
50
|
bbox = RGeo::Cartesian::BoundingBox.create_from_geometry(geo_json)
|
@@ -50,11 +52,13 @@ module SearchSolrTools
|
|
50
52
|
end
|
51
53
|
end
|
52
54
|
return nil if spatial_areas.empty?
|
53
|
-
|
55
|
+
|
56
|
+
spatial_areas.max
|
54
57
|
end
|
55
58
|
|
56
59
|
def self.geojson_to_global_facet(spatial_coverage_geom)
|
57
60
|
return nil if spatial_coverage_geom.nil?
|
61
|
+
|
58
62
|
spatial_coverage_geom.each do |geo_json|
|
59
63
|
bbox_hash = BoundingBoxUtil.bounding_box_hash_from_geo_json(geo_json)
|
60
64
|
return 'Show Global Only' if BoundingBoxUtil.box_global?(bbox_hash)
|
@@ -63,13 +67,13 @@ module SearchSolrTools
|
|
63
67
|
end
|
64
68
|
|
65
69
|
def self.geojson_to_spatial_scope_facet(spatial_coverage_geom)
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
end
|
70
|
+
return if spatial_coverage_geom.nil?
|
71
|
+
|
72
|
+
spatial_coverage_geom.map do |geo_json|
|
73
|
+
bbox_hash = BoundingBoxUtil.bounding_box_hash_from_geo_json(geo_json)
|
74
|
+
scope = SolrFormat.get_spatial_scope_facet_with_bounding_box(bbox_hash)
|
75
|
+
scope unless scope.nil?
|
76
|
+
end.uniq
|
73
77
|
end
|
74
78
|
end
|
75
79
|
end
|
@@ -1,4 +1,5 @@
|
|
1
|
-
#
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
2
3
|
require 'rgeo/geo_json'
|
3
4
|
|
4
5
|
require 'search_solr_tools'
|
@@ -10,50 +11,50 @@ module SearchSolrTools
|
|
10
11
|
module Translators
|
11
12
|
# Translates NSIDC JSON format to Solr JSON add format
|
12
13
|
class NsidcJsonToSolr
|
13
|
-
PARAMETER_PARTS = %w
|
14
|
+
PARAMETER_PARTS = %w[category topic term variableLevel1 variableLevel2 variableLevel3 detailedVariable].freeze
|
14
15
|
|
15
16
|
# rubocop:disable Metrics/MethodLength
|
16
17
|
# rubocop:disable Metrics/AbcSize
|
17
18
|
def translate(json_doc)
|
18
|
-
copy_keys = %w
|
19
|
+
copy_keys = %w[title summary keywords brokered]
|
19
20
|
temporal_coverage_values = Helpers::TranslateTemporalCoverage.translate_coverages json_doc['temporalCoverages']
|
20
21
|
spatial_coverages = convert_spatial_coverages(json_doc['spatialCoverages'])
|
21
22
|
|
22
23
|
solr_add_hash = json_doc.select { |k, _v| copy_keys.include?(k) }
|
23
24
|
solr_add_hash.merge!(
|
24
|
-
'authoritative_id'
|
25
|
-
'dataset_version'
|
26
|
-
'data_centers'
|
27
|
-
'facet_data_center'
|
28
|
-
'authors'
|
29
|
-
'topics'
|
30
|
-
'parameters'
|
31
|
-
'full_parameters'
|
32
|
-
'facet_parameter'
|
33
|
-
'platforms'
|
34
|
-
'sensors'
|
35
|
-
'facet_sensor'
|
36
|
-
'published_date'
|
37
|
-
'spatial_coverages'
|
38
|
-
'spatial'
|
39
|
-
'spatial_area'
|
40
|
-
'facet_spatial_coverage'
|
41
|
-
'facet_spatial_scope'
|
42
|
-
'temporal_coverages'
|
43
|
-
'temporal_duration'
|
44
|
-
'temporal'
|
45
|
-
'facet_temporal_duration'
|
46
|
-
'last_revision_date'
|
47
|
-
'dataset_url'
|
48
|
-
'distribution_formats'
|
49
|
-
'facet_format'
|
50
|
-
'source'
|
51
|
-
'popularity'
|
52
|
-
'data_access_urls'
|
53
|
-
'facet_sponsored_program'
|
25
|
+
'authoritative_id' => json_doc['authoritativeId'],
|
26
|
+
'dataset_version' => json_doc['majorVersion']['version'],
|
27
|
+
'data_centers' => Helpers::SolrFormat::DATA_CENTER_NAMES[:NSIDC][:long_name],
|
28
|
+
'facet_data_center' => "#{Helpers::SolrFormat::DATA_CENTER_NAMES[:NSIDC][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:NSIDC][:short_name]}",
|
29
|
+
'authors' => translate_personnel_and_creators_to_authors(json_doc['personnel'], generate_data_citation_creators(json_doc['dataCitation'])),
|
30
|
+
'topics' => translate_iso_topic_categories(json_doc['isoTopicCategories']),
|
31
|
+
'parameters' => translate_parameters(json_doc['parameters']),
|
32
|
+
'full_parameters' => translate_json_string(json_doc['parameters'], PARAMETER_PARTS),
|
33
|
+
'facet_parameter' => translate_parameters_to_facet_parameters(json_doc['parameters']),
|
34
|
+
'platforms' => translate_json_string(json_doc['platforms']),
|
35
|
+
'sensors' => translate_json_string(json_doc['instruments']),
|
36
|
+
'facet_sensor' => translate_sensor_to_facet_sensor(json_doc['instruments']),
|
37
|
+
'published_date' => (Helpers::SolrFormat.date_str json_doc['releaseDate']),
|
38
|
+
'spatial_coverages' => Helpers::TranslateSpatialCoverage.geojson_to_spatial_display_str(spatial_coverages),
|
39
|
+
'spatial' => Helpers::TranslateSpatialCoverage.geojson_to_spatial_index_str(spatial_coverages),
|
40
|
+
'spatial_area' => Helpers::TranslateSpatialCoverage.geojson_to_spatial_area(spatial_coverages),
|
41
|
+
'facet_spatial_coverage' => Helpers::TranslateSpatialCoverage.geojson_to_global_facet(spatial_coverages),
|
42
|
+
'facet_spatial_scope' => Helpers::TranslateSpatialCoverage.geojson_to_spatial_scope_facet(spatial_coverages),
|
43
|
+
'temporal_coverages' => temporal_coverage_values['temporal_coverages'],
|
44
|
+
'temporal_duration' => temporal_coverage_values['temporal_duration'],
|
45
|
+
'temporal' => temporal_coverage_values['temporal'],
|
46
|
+
'facet_temporal_duration' => temporal_coverage_values['facet_temporal_duration'],
|
47
|
+
'last_revision_date' => (Helpers::SolrFormat.date_str json_doc['lastRevisionDate']),
|
48
|
+
'dataset_url' => json_doc['datasetUrl'],
|
49
|
+
'distribution_formats' => json_doc['distributionFormats'],
|
50
|
+
'facet_format' => json_doc['distributionFormats'].empty? ? [Helpers::SolrFormat::NOT_SPECIFIED] : translate_format_to_facet_format(json_doc['distributionFormats']),
|
51
|
+
'source' => %w[NSIDC ADE],
|
52
|
+
'popularity' => json_doc['popularity'],
|
53
|
+
'data_access_urls' => translate_data_access_urls(json_doc['dataAccessLinks']),
|
54
|
+
'facet_sponsored_program' => translate_short_long_names_to_facet_value(json_doc['internalDataCenters']),
|
54
55
|
'facet_temporal_resolution' => translate_temporal_resolution_facet_values(json_doc['parameters']),
|
55
|
-
'facet_spatial_resolution'
|
56
|
-
'sponsored_programs'
|
56
|
+
'facet_spatial_resolution' => translate_spatial_resolution_facet_values(json_doc['parameters']),
|
57
|
+
'sponsored_programs' => translate_internal_datacenters(json_doc['internalDataCenters'])
|
57
58
|
)
|
58
59
|
end
|
59
60
|
# rubocop:enable Metrics/MethodLength
|
@@ -70,13 +71,14 @@ module SearchSolrTools
|
|
70
71
|
def translate_sensor_to_facet_sensor(json)
|
71
72
|
facet_values = []
|
72
73
|
return facet_values if json.nil?
|
74
|
+
|
73
75
|
json.each do |json_entry|
|
74
76
|
sensor_bin = Helpers::SolrFormat.facet_binning('sensor', json_entry['shortName'].to_s)
|
75
|
-
if sensor_bin.eql? json_entry['shortName']
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
77
|
+
facet_values << if sensor_bin.eql? json_entry['shortName']
|
78
|
+
"#{json_entry['longName']} | #{json_entry['shortName']}"
|
79
|
+
else
|
80
|
+
" | #{sensor_bin}"
|
81
|
+
end
|
80
82
|
end
|
81
83
|
facet_values
|
82
84
|
end
|
@@ -100,12 +102,13 @@ module SearchSolrTools
|
|
100
102
|
end
|
101
103
|
|
102
104
|
def translate_iso_topic_categories(iso_topic_categories_json)
|
103
|
-
iso_topic_categories_json
|
105
|
+
iso_topic_categories_json&.map { |t| t['name'] }
|
104
106
|
end
|
105
107
|
|
106
108
|
def translate_data_access_urls(json)
|
107
109
|
values = []
|
108
110
|
return values if json.nil?
|
111
|
+
|
109
112
|
json.each do |json_entry|
|
110
113
|
link_display = json_entry['displayText'].nil? ? '' : json_entry['displayText']
|
111
114
|
link_type = json_entry['type'].nil? ? '' : json_entry['type']
|
@@ -120,6 +123,7 @@ module SearchSolrTools
|
|
120
123
|
def translate_internal_datacenters(json)
|
121
124
|
values = []
|
122
125
|
return values if json.nil?
|
126
|
+
|
123
127
|
json.each do |json_entry|
|
124
128
|
short_name = json_entry['shortName'].nil? ? '' : json_entry['shortName']
|
125
129
|
values << short_name
|
@@ -130,6 +134,7 @@ module SearchSolrTools
|
|
130
134
|
def translate_short_long_names_to_facet_value(json)
|
131
135
|
facet_values = []
|
132
136
|
return facet_values if json.nil?
|
137
|
+
|
133
138
|
json.each do |json_entry|
|
134
139
|
long_name = json_entry['longName'].nil? ? '' : json_entry['longName']
|
135
140
|
short_name = json_entry['shortName'].nil? ? '' : json_entry['shortName']
|
@@ -169,6 +174,7 @@ module SearchSolrTools
|
|
169
174
|
def translate_parameters_to_facet_parameters(parameters_json)
|
170
175
|
parameters_strings = translate_json_string(parameters_json, PARAMETER_PARTS)
|
171
176
|
return [] if parameters_strings.nil?
|
177
|
+
|
172
178
|
facet_params = []
|
173
179
|
parameters_strings.each do |str|
|
174
180
|
facet_params << Helpers::SolrFormat.parameter_binning(str)
|
@@ -199,8 +205,7 @@ module SearchSolrTools
|
|
199
205
|
end
|
200
206
|
|
201
207
|
def generate_data_citation_creators(data_citation)
|
202
|
-
data_citation.nil? ?
|
203
|
-
creators
|
208
|
+
data_citation.nil? ? [] : data_citation['creators']
|
204
209
|
end
|
205
210
|
|
206
211
|
def generate_part_array(json, limit_values = nil)
|
@@ -214,6 +219,5 @@ module SearchSolrTools
|
|
214
219
|
parts
|
215
220
|
end
|
216
221
|
end
|
217
|
-
# rubocop:enable Metrics/ClassLength
|
218
222
|
end
|
219
223
|
end
|
data/lib/search_solr_tools.rb
CHANGED
@@ -1,9 +1,11 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'search_solr_tools/config/environments'
|
2
4
|
require_relative 'search_solr_tools/version'
|
3
5
|
|
4
6
|
require_relative 'search_solr_tools/helpers/harvest_status'
|
5
7
|
require_relative 'search_solr_tools/errors/harvest_error'
|
6
8
|
|
7
|
-
%w
|
9
|
+
%w[harvesters translators].each do |subdir|
|
8
10
|
Dir[File.join(__dir__, 'search_solr_tools', subdir, '*.rb')].each { |file| require file }
|
9
11
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: search_solr_tools
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 6.
|
4
|
+
version: 6.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Chris Chalstrom
|
@@ -14,7 +14,7 @@ authors:
|
|
14
14
|
autorequire:
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
|
-
date: 2023-07-
|
17
|
+
date: 2023-07-24 00:00:00.000000000 Z
|
18
18
|
dependencies:
|
19
19
|
- !ruby/object:Gem::Dependency
|
20
20
|
name: ffi-geos
|
@@ -354,7 +354,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
354
354
|
- !ruby/object:Gem::Version
|
355
355
|
version: '0'
|
356
356
|
requirements: []
|
357
|
-
rubygems_version: 3.4.
|
357
|
+
rubygems_version: 3.4.17
|
358
358
|
signing_key:
|
359
359
|
specification_version: 4
|
360
360
|
summary: Tools to harvest and manage various scientific dataset feeds in a Solr instance.
|