search_solr_tools 6.1.0 → 6.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -2
- data/bin/search_solr_tools +5 -17
- data/lib/search_solr_tools/config/environments.rb +3 -1
- data/lib/search_solr_tools/config/environments.yaml +0 -32
- data/lib/search_solr_tools/errors/harvest_error.rb +44 -31
- data/lib/search_solr_tools/harvesters/auto_suggest.rb +5 -3
- data/lib/search_solr_tools/harvesters/base.rb +21 -20
- data/lib/search_solr_tools/harvesters/nsidc_auto_suggest.rb +7 -5
- data/lib/search_solr_tools/harvesters/nsidc_json.rb +9 -8
- data/lib/search_solr_tools/helpers/bounding_box_util.rb +8 -8
- data/lib/search_solr_tools/helpers/facet_configuration.rb +3 -1
- data/lib/search_solr_tools/helpers/harvest_status.rb +10 -8
- data/lib/search_solr_tools/helpers/iso_namespaces.rb +3 -1
- data/lib/search_solr_tools/helpers/solr_format.rb +25 -45
- data/lib/search_solr_tools/helpers/translate_spatial_coverage.rb +13 -10
- data/lib/search_solr_tools/helpers/translate_temporal_coverage.rb +2 -0
- data/lib/search_solr_tools/translators/nsidc_json.rb +48 -44
- data/lib/search_solr_tools/version.rb +3 -1
- data/lib/search_solr_tools.rb +3 -2
- metadata +3 -45
- data/lib/search_solr_tools/harvesters/adc.rb +0 -49
- data/lib/search_solr_tools/harvesters/ade_auto_suggest.rb +0 -46
- data/lib/search_solr_tools/harvesters/bcodmo.rb +0 -64
- data/lib/search_solr_tools/harvesters/data_one.rb +0 -49
- data/lib/search_solr_tools/harvesters/echo.rb +0 -52
- data/lib/search_solr_tools/harvesters/eol.rb +0 -51
- data/lib/search_solr_tools/harvesters/gtnp.rb +0 -67
- data/lib/search_solr_tools/harvesters/ices.rb +0 -58
- data/lib/search_solr_tools/harvesters/ncdc_paleo.rb +0 -62
- data/lib/search_solr_tools/harvesters/nmi.rb +0 -34
- data/lib/search_solr_tools/harvesters/nodc.rb +0 -75
- data/lib/search_solr_tools/harvesters/oai.rb +0 -62
- data/lib/search_solr_tools/harvesters/pdc.rb +0 -40
- data/lib/search_solr_tools/harvesters/r2r.rb +0 -61
- data/lib/search_solr_tools/harvesters/rda.rb +0 -35
- data/lib/search_solr_tools/harvesters/tdar.rb +0 -71
- data/lib/search_solr_tools/harvesters/usgs.rb +0 -76
- data/lib/search_solr_tools/helpers/csw_iso_query_builder.rb +0 -29
- data/lib/search_solr_tools/helpers/data_one_format.rb +0 -74
- data/lib/search_solr_tools/helpers/iso_to_solr.rb +0 -97
- data/lib/search_solr_tools/helpers/iso_to_solr_format.rb +0 -197
- data/lib/search_solr_tools/helpers/ncdc_paleo_format.rb +0 -61
- data/lib/search_solr_tools/helpers/query_builder.rb +0 -13
- data/lib/search_solr_tools/helpers/r2r_format.rb +0 -25
- data/lib/search_solr_tools/helpers/selectors.rb +0 -22
- data/lib/search_solr_tools/helpers/tdar_format.rb +0 -70
- data/lib/search_solr_tools/helpers/usgs_format.rb +0 -50
- data/lib/search_solr_tools/selectors/adc.rb +0 -96
- data/lib/search_solr_tools/selectors/data_one.rb +0 -96
- data/lib/search_solr_tools/selectors/echo_iso.rb +0 -112
- data/lib/search_solr_tools/selectors/ices_iso.rb +0 -108
- data/lib/search_solr_tools/selectors/ncdc_paleo.rb +0 -90
- data/lib/search_solr_tools/selectors/nmi.rb +0 -107
- data/lib/search_solr_tools/selectors/nodc_iso.rb +0 -108
- data/lib/search_solr_tools/selectors/pdc_iso.rb +0 -109
- data/lib/search_solr_tools/selectors/r2r.rb +0 -115
- data/lib/search_solr_tools/selectors/rda.rb +0 -107
- data/lib/search_solr_tools/selectors/tdar_opensearch.rb +0 -91
- data/lib/search_solr_tools/selectors/usgs_iso.rb +0 -107
- data/lib/search_solr_tools/translators/bcodmo_json.rb +0 -89
- data/lib/search_solr_tools/translators/eol_to_solr.rb +0 -84
- data/lib/search_solr_tools/translators/gtnp_json.rb +0 -59
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f9ced4643b8adbda2b5ef09192f036af86878e07243fe959448213762e0e5cc1
|
4
|
+
data.tar.gz: 0a5f27a7bc1d8c9c0c07a20b6fbf122d5a3b6163a5654db635d5c478ac4a21bc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cc66f8b40c62e2640fd72ce05aa2ac01aa76c58c730b6c445976fc7cf6e43b88cff29ec088f73e5dff913c879f7a1a31016cb634d851cfb3adb7b8bb735614c8
|
7
|
+
data.tar.gz: f896f7b473f977f0e349d422f6568774342e6bdb66e1a7dad1cf4477d0ffa7e9b05b81184898ab2d4d69c44f8ca98a3aa6791234926190484820cca4b439dc7d
|
data/CHANGELOG.md
CHANGED
@@ -1,8 +1,17 @@
|
|
1
|
-
## v6.
|
1
|
+
## v6.3.0 (2023-07-24)
|
2
|
+
|
3
|
+
- Update Rubocop configuration to actually run against files, and make
|
4
|
+
necessary corrections to comply with Rubocop styling.
|
5
|
+
|
6
|
+
## v6.2.0 (2023-07-18)
|
7
|
+
|
8
|
+
- Remove deprecated harvesters and associated tests, helpers, etc.
|
9
|
+
|
10
|
+
## v6.1.0 (2023-07-14)
|
2
11
|
|
3
12
|
- Updated a few other dependencies that weren't at the newest versions.
|
4
13
|
|
5
|
-
## v6.0.0 (
|
14
|
+
## v6.0.0 (2023-07-14)
|
6
15
|
|
7
16
|
- Updated Ruby to 3.2.2, updated gem dependencies to more recent versions.
|
8
17
|
|
data/bin/search_solr_tools
CHANGED
@@ -47,7 +47,7 @@ class SolrHarvestCLI < Thor
|
|
47
47
|
end
|
48
48
|
|
49
49
|
ping_status = SearchSolrTools::Helpers::HarvestStatus.new(
|
50
|
-
SearchSolrTools::Helpers::HarvestStatus::PING_SOLR
|
50
|
+
SearchSolrTools::Helpers::HarvestStatus::PING_SOLR => solr_success,
|
51
51
|
SearchSolrTools::Helpers::HarvestStatus::PING_SOURCE => source_success
|
52
52
|
)
|
53
53
|
raise SearchSolrTools::Errors::HarvestError, ping_status unless ping_status.ok?
|
@@ -64,9 +64,9 @@ class SolrHarvestCLI < Thor
|
|
64
64
|
puts "Target: #{target}"
|
65
65
|
begin
|
66
66
|
harvest_class = get_harvester_class(target)
|
67
|
-
harvester = harvest_class.new(options[:environment], die_on_failure)
|
67
|
+
harvester = harvest_class.new(options[:environment], die_on_failure:)
|
68
68
|
ping_status = SearchSolrTools::Helpers::HarvestStatus.new(
|
69
|
-
SearchSolrTools::Helpers::HarvestStatus::PING_SOLR
|
69
|
+
SearchSolrTools::Helpers::HarvestStatus::PING_SOLR => harvester.ping_solr,
|
70
70
|
SearchSolrTools::Helpers::HarvestStatus::PING_SOURCE => harvester.ping_source
|
71
71
|
)
|
72
72
|
raise SearchSolrTools::Errors::HarvestError, ping_status unless ping_status.ok?
|
@@ -120,20 +120,8 @@ class SolrHarvestCLI < Thor
|
|
120
120
|
no_tasks do
|
121
121
|
def harvester_map
|
122
122
|
{
|
123
|
-
'
|
124
|
-
'
|
125
|
-
'echo' => SearchSolrTools::Harvesters::Echo,
|
126
|
-
'ices' => SearchSolrTools::Harvesters::Ices,
|
127
|
-
'nmi' => SearchSolrTools::Harvesters::Nmi,
|
128
|
-
'nodc' => SearchSolrTools::Harvesters::Nodc,
|
129
|
-
'r2r' => SearchSolrTools::Harvesters::R2R,
|
130
|
-
'rda' => SearchSolrTools::Harvesters::Rda,
|
131
|
-
'usgs' => SearchSolrTools::Harvesters::Usgs,
|
132
|
-
'tdar' => SearchSolrTools::Harvesters::Tdar,
|
133
|
-
'pdc' => SearchSolrTools::Harvesters::Pdc,
|
134
|
-
'nsidc' => SearchSolrTools::Harvesters::NsidcJson,
|
135
|
-
'nsidc_auto_suggest' => SearchSolrTools::Harvesters::NsidcAutoSuggest,
|
136
|
-
'ade_auto_suggest' => SearchSolrTools::Harvesters::AdeAutoSuggest
|
123
|
+
'nsidc' => SearchSolrTools::Harvesters::NsidcJson,
|
124
|
+
'nsidc_auto_suggest' => SearchSolrTools::Harvesters::NsidcAutoSuggest
|
137
125
|
}
|
138
126
|
end
|
139
127
|
|
@@ -1,9 +1,11 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'yaml'
|
2
4
|
|
3
5
|
module SearchSolrTools
|
4
6
|
# configuration to work with solr locally, or on integration/qa/staging/prod
|
5
7
|
module SolrEnvironments
|
6
|
-
YAML_ENVS = YAML.load_file(File.expand_path('
|
8
|
+
YAML_ENVS = YAML.load_file(File.expand_path('environments.yaml', __dir__))
|
7
9
|
|
8
10
|
def self.[](env = :development)
|
9
11
|
YAML_ENVS[:common].merge(YAML_ENVS[env.to_sym])
|
@@ -3,38 +3,6 @@
|
|
3
3
|
:collection_name: nsidc_oai
|
4
4
|
:collection_path: solr
|
5
5
|
:port: 8983
|
6
|
-
:bcodmo_url: http://www.bco-dmo.org/nsidc/arctic-deployments.json
|
7
|
-
:adc_url: https://arcticdata.io/metacat/d1/mn/v2/query/solr/select?q=northBoundCoord:%5B45.0%20TO%2090.0%5D
|
8
|
-
:data_one_url: https://cn.dataone.org/cn/v1/query/solr/select?q=northBoundCoord:%5B45.0%20TO%2090.0%5D
|
9
|
-
:echo_url: https://api.echo.nasa.gov/catalog-rest/echo_catalog/datasets.echo10?bounding_box=-180,45,180,90
|
10
|
-
:gtnp:
|
11
|
-
- http://www.gtnpdatabase.org/rest/boreholes/json
|
12
|
-
- http://www.gtnpdatabase.org/rest/activelayers/json
|
13
|
-
:ices_url: http://geo.ices.dk/geonetwork/srv/en/csw
|
14
|
-
:ncdc_paleo_url: https://gis.ncdc.noaa.gov/gptpaleo/csw
|
15
|
-
:nmi_url: http://arcticdata.met.no/metamod/oai
|
16
|
-
:nodc_url: https://data.nodc.noaa.gov/geoportal/csw
|
17
|
-
:pdc_url: http://www.polardata.ca/oai/provider
|
18
|
-
:rda_url: https://rda.ucar.edu/cgi-bin/oai
|
19
|
-
:tdar_url: http://core.tdar.org/search/rss
|
20
|
-
:usgs_url: https://www.sciencebase.gov/catalog/item/527cf4ede4b0850ea05182ee/csw
|
21
|
-
:eol:
|
22
|
-
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.SHEBA.thredds.xml
|
23
|
-
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.SBI.thredds.xml
|
24
|
-
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.PacMARS.thredds.xml
|
25
|
-
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BASE.thredds.xml
|
26
|
-
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.ATLAS.thredds.xml
|
27
|
-
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.ARC_MIP.thredds.xml
|
28
|
-
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.AMTS.thredds.xml
|
29
|
-
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BOREAS.thredds.xml
|
30
|
-
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BeringSea.thredds.xml
|
31
|
-
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.ARCSS.thredds.xml
|
32
|
-
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BEST.thredds.xml
|
33
|
-
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BSIERP.thredds.xml
|
34
|
-
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BARROW.thredds.xml
|
35
|
-
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.DBO.thredds.xml
|
36
|
-
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.ITEX.thredds.xml
|
37
|
-
:r2r_url: http://get.rvdata.us/services/cruise/
|
38
6
|
|
39
7
|
# Not using DCS API v2 here because not all retired datasets have their "retired"
|
40
8
|
# flag checked. For example, GLA01.033 is retired; GLA01.018 is not, but it
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module SearchSolrTools
|
2
4
|
module Errors
|
3
5
|
class HarvestError < StandardError
|
@@ -10,34 +12,47 @@ module SearchSolrTools
|
|
10
12
|
ERRCODE_OTHER = 128
|
11
13
|
|
12
14
|
ERRCODE_DESC = {
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
15
|
+
ERRCODE_SOLR_PING => 'Solr instance did not return a successful ping',
|
16
|
+
ERRCODE_SOURCE_PING => 'Source to be harvested did not return a successful ping',
|
17
|
+
ERRCODE_SOURCE_NO_RESULTS => 'Source to be harvested returned no documents matching query',
|
18
|
+
ERRCODE_SOURCE_HARVEST_ERROR => 'One or more source documents returned an error when trying to retrieve or translate',
|
19
|
+
ERRCODE_DOCUMENT_INVALID => 'One or more documents to be harvested was invalid (malformed)',
|
20
|
+
ERRCODE_INGEST_ERROR => 'Solr returned an error trying to ingest one or more harvested documents',
|
21
|
+
ERRCODE_OTHER => 'General error code for non-harvest related issues'
|
20
22
|
}.freeze
|
21
23
|
|
22
24
|
PING_ERRCODE_MAP = {
|
23
|
-
'ping_solr'
|
24
|
-
'ping_source' => ERRCODE_SOURCE_PING
|
25
|
-
}
|
25
|
+
'ping_solr' => ERRCODE_SOLR_PING,
|
26
|
+
'ping_source' => ERRCODE_SOURCE_PING
|
27
|
+
}.freeze
|
26
28
|
|
27
29
|
STATUS_ERRCODE_MAP = {
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
30
|
+
Helpers::HarvestStatus::HARVEST_NO_DOCS => ERRCODE_SOURCE_NO_RESULTS,
|
31
|
+
Helpers::HarvestStatus::HARVEST_FAILURE => ERRCODE_SOURCE_HARVEST_ERROR,
|
32
|
+
Helpers::HarvestStatus::INGEST_ERR_INVALID_DOC => ERRCODE_DOCUMENT_INVALID,
|
33
|
+
Helpers::HarvestStatus::INGEST_ERR_SOLR_ERROR => ERRCODE_INGEST_ERROR,
|
34
|
+
Helpers::HarvestStatus::OTHER_ERROR => ERRCODE_OTHER
|
33
35
|
}.freeze
|
34
36
|
|
35
37
|
# If code is -1, it means display all error codes
|
36
38
|
def self.describe_exit_code(code = -1)
|
39
|
+
code_list = code_to_list(code)
|
40
|
+
|
41
|
+
codes = {}
|
42
|
+
code_list.each do |k|
|
43
|
+
next if code == -1 && !ERRCODE_DESC.keys.include?(k) # skip INVALID CODE if showing all codes
|
44
|
+
|
45
|
+
codes[k] = ERRCODE_DESC.keys.include?(k) ? ERRCODE_DESC[k] : 'INVALID CODE NUMBER'
|
46
|
+
end
|
47
|
+
|
48
|
+
codes
|
49
|
+
end
|
50
|
+
|
51
|
+
# Loop through all bit-flag values to produce a list of integers
|
52
|
+
def self.code_to_list(code)
|
37
53
|
code = code.to_i
|
38
54
|
code_list = []
|
39
55
|
|
40
|
-
# Loop through all bit-flag values
|
41
56
|
[128, 64, 32, 16, 8, 4, 2, 1].each do |k|
|
42
57
|
if code >= k || code == -1
|
43
58
|
code_list.prepend k
|
@@ -45,20 +60,17 @@ module SearchSolrTools
|
|
45
60
|
end
|
46
61
|
end
|
47
62
|
|
48
|
-
|
49
|
-
code_list.each do |k|
|
50
|
-
next if code == -1 && !ERRCODE_DESC.keys.include?(k) # skip INVALID CODE if showing all codes
|
51
|
-
codes[k] = ERRCODE_DESC.keys.include?(k) ? ERRCODE_DESC[k] : 'INVALID CODE NUMBER'
|
52
|
-
end
|
53
|
-
|
54
|
-
codes
|
63
|
+
code_list
|
55
64
|
end
|
56
65
|
|
57
|
-
def initialize(status, message=nil)
|
66
|
+
def initialize(status, message = nil)
|
58
67
|
@status_data = status
|
59
68
|
@other_message = message
|
69
|
+
|
70
|
+
super message
|
60
71
|
end
|
61
72
|
|
73
|
+
# rubocop:disable Metrics/AbcSize
|
62
74
|
def exit_code
|
63
75
|
if @status_data.nil?
|
64
76
|
puts "OTHER ERROR REPORTED: #{@other_message}"
|
@@ -70,19 +82,20 @@ module SearchSolrTools
|
|
70
82
|
code = 0
|
71
83
|
code += ERRCODE_SOLR_PING unless @status_data.ping_solr
|
72
84
|
code += ERRCODE_SOURCE_PING unless @status_data.ping_source
|
73
|
-
code += ERRCODE_SOURCE_NO_RESULTS if @status_data.status[Helpers::HarvestStatus::HARVEST_NO_DOCS]
|
74
|
-
code += ERRCODE_SOURCE_HARVEST_ERROR if @status_data.status[Helpers::HarvestStatus::HARVEST_FAILURE]
|
75
|
-
code += ERRCODE_DOCUMENT_INVALID if @status_data.status[Helpers::HarvestStatus::INGEST_ERR_INVALID_DOC]
|
76
|
-
code += ERRCODE_INGEST_ERROR if @status_data.status[Helpers::HarvestStatus::INGEST_ERR_SOLR_ERROR]
|
85
|
+
code += ERRCODE_SOURCE_NO_RESULTS if @status_data.status[Helpers::HarvestStatus::HARVEST_NO_DOCS].positive?
|
86
|
+
code += ERRCODE_SOURCE_HARVEST_ERROR if @status_data.status[Helpers::HarvestStatus::HARVEST_FAILURE].positive?
|
87
|
+
code += ERRCODE_DOCUMENT_INVALID if @status_data.status[Helpers::HarvestStatus::INGEST_ERR_INVALID_DOC].positive?
|
88
|
+
code += ERRCODE_INGEST_ERROR if @status_data.status[Helpers::HarvestStatus::INGEST_ERR_SOLR_ERROR].positive?
|
77
89
|
|
78
|
-
code = ERRCODE_OTHER if code
|
90
|
+
code = ERRCODE_OTHER if code.zero?
|
79
91
|
|
80
92
|
code
|
81
93
|
end
|
94
|
+
# rubocop:enable Metrics/AbcSize
|
82
95
|
|
83
96
|
def message
|
84
|
-
self.class.describe_exit_code(exit_code).map{|
|
97
|
+
self.class.describe_exit_code(exit_code).map { |_c, v| v }.join("\n")
|
85
98
|
end
|
86
99
|
end
|
87
100
|
end
|
88
|
-
end
|
101
|
+
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'base'
|
2
4
|
require 'json'
|
3
5
|
require 'rest-client'
|
@@ -6,8 +8,8 @@ module SearchSolrTools
|
|
6
8
|
module Harvesters
|
7
9
|
# Use the nsidc_oai core to populate the auto_suggest core
|
8
10
|
class AutoSuggest < Base
|
9
|
-
def initialize(env = 'development', die_on_failure
|
10
|
-
super
|
11
|
+
def initialize(env = 'development', die_on_failure: false)
|
12
|
+
super
|
11
13
|
@env_settings = SolrEnvironments[@environment] # super sets @environment.
|
12
14
|
end
|
13
15
|
|
@@ -50,7 +52,7 @@ module SearchSolrTools
|
|
50
52
|
|
51
53
|
if status == Helpers::HarvestStatus::INGEST_OK
|
52
54
|
puts "Added #{add_docs.size} auto suggest documents in one commit"
|
53
|
-
|
55
|
+
Helpers::HarvestStatus.new(Helpers::HarvestStatus::INGEST_OK => add_docs)
|
54
56
|
else
|
55
57
|
puts "Failed adding #{add_docs.size} documents in single commit, retrying one by one"
|
56
58
|
new_add_docs = []
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'multi_json'
|
2
4
|
require 'nokogiri'
|
3
5
|
require 'open-uri'
|
@@ -8,8 +10,6 @@ require 'time'
|
|
8
10
|
require 'search_solr_tools'
|
9
11
|
require_relative '../helpers/iso_namespaces'
|
10
12
|
require_relative '../helpers/solr_format'
|
11
|
-
require_relative '../helpers/iso_to_solr'
|
12
|
-
|
13
13
|
|
14
14
|
module SearchSolrTools
|
15
15
|
module Harvesters
|
@@ -21,7 +21,7 @@ module SearchSolrTools
|
|
21
21
|
XML_CONTENT_TYPE = 'text/xml; charset=utf-8'
|
22
22
|
JSON_CONTENT_TYPE = 'application/json; charset=utf-8'
|
23
23
|
|
24
|
-
def initialize(env = 'development', die_on_failure
|
24
|
+
def initialize(env = 'development', die_on_failure: false)
|
25
25
|
@environment = env
|
26
26
|
@die_on_failure = die_on_failure
|
27
27
|
end
|
@@ -52,7 +52,7 @@ module SearchSolrTools
|
|
52
52
|
success = response.code == 200
|
53
53
|
puts "Error in ping request: #{response.body}" unless success
|
54
54
|
end
|
55
|
-
rescue => e
|
55
|
+
rescue StandardError => e
|
56
56
|
puts "Rest exception while pinging Solr: #{e}"
|
57
57
|
end
|
58
58
|
success
|
@@ -62,7 +62,7 @@ module SearchSolrTools
|
|
62
62
|
# to "ping" the data center. Returns true if the ping is successful (or, as
|
63
63
|
# in this default, no ping method was defined)
|
64
64
|
def ping_source
|
65
|
-
puts
|
65
|
+
puts 'Harvester does not have ping method defined, assuming true'
|
66
66
|
true
|
67
67
|
end
|
68
68
|
|
@@ -75,12 +75,12 @@ module SearchSolrTools
|
|
75
75
|
harvest_status
|
76
76
|
end
|
77
77
|
|
78
|
-
def delete_old_documents(timestamp, constraints, solr_core, force
|
78
|
+
def delete_old_documents(timestamp, constraints, solr_core, force: false)
|
79
79
|
constraints = sanitize_data_centers_constraints(constraints)
|
80
80
|
delete_query = "last_update:[* TO #{timestamp}] AND #{constraints}"
|
81
81
|
solr = RSolr.connect url: solr_url + "/#{solr_core}"
|
82
82
|
unchanged_count = (solr.get 'select', params: { wt: :ruby, q: delete_query, rows: 0 })['response']['numFound'].to_i
|
83
|
-
if unchanged_count
|
83
|
+
if unchanged_count.zero?
|
84
84
|
puts "All documents were updated after #{timestamp}, nothing to delete"
|
85
85
|
else
|
86
86
|
puts "Begin removing documents older than #{timestamp}"
|
@@ -91,8 +91,8 @@ module SearchSolrTools
|
|
91
91
|
def sanitize_data_centers_constraints(query_string)
|
92
92
|
# Remove lucene special characters, preserve the query parameter and compress whitespace
|
93
93
|
query_string.gsub!(/[:&|!~\-\(\)\{\}\[\]\^\*\?\+]+/, ' ')
|
94
|
-
query_string.gsub!(
|
95
|
-
query_string.gsub!(
|
94
|
+
query_string.gsub!('data_centers ', 'data_centers:')
|
95
|
+
query_string.gsub!('source ', 'source:')
|
96
96
|
query_string.squeeze(' ').strip
|
97
97
|
end
|
98
98
|
|
@@ -127,7 +127,7 @@ module SearchSolrTools
|
|
127
127
|
status
|
128
128
|
end
|
129
129
|
|
130
|
-
# TODO Need to return a specific type of failure:
|
130
|
+
# TODO: Need to return a specific type of failure:
|
131
131
|
# - Bad record content identified and no ingest attempted
|
132
132
|
# - Solr tries to ingest document and fails (bad content not detected prior to ingest)
|
133
133
|
# - Solr cannot insert document for reasons other than the document structure and content.
|
@@ -143,15 +143,15 @@ module SearchSolrTools
|
|
143
143
|
|
144
144
|
# Some docs will cause solr to time out during the POST
|
145
145
|
begin
|
146
|
-
RestClient.post(url, doc_serialized, content_type:
|
146
|
+
RestClient.post(url, doc_serialized, content_type:) do |response, _request, _result|
|
147
147
|
success = response.code == 200
|
148
148
|
unless success
|
149
149
|
puts "Error for #{doc_serialized}\n\n response: #{response.body}"
|
150
150
|
status = Helpers::HarvestStatus::INGEST_ERR_SOLR_ERROR
|
151
151
|
end
|
152
152
|
end
|
153
|
-
rescue => e
|
154
|
-
# TODO Need to provide more detail re: this failure so we know whether to
|
153
|
+
rescue StandardError => e
|
154
|
+
# TODO: Need to provide more detail re: this failure so we know whether to
|
155
155
|
# exit the job with a status != 0
|
156
156
|
puts "Rest exception while POSTing to Solr: #{e}, for doc: #{doc_serialized}"
|
157
157
|
status = Helpers::HarvestStatus::INGEST_ERR_SOLR_ERROR
|
@@ -161,11 +161,11 @@ module SearchSolrTools
|
|
161
161
|
|
162
162
|
def get_serialized_doc(doc, content_type)
|
163
163
|
if content_type.eql?(XML_CONTENT_TYPE)
|
164
|
-
|
164
|
+
doc.respond_to?(:to_xml) ? doc.to_xml : doc
|
165
165
|
elsif content_type.eql?(JSON_CONTENT_TYPE)
|
166
|
-
|
166
|
+
MultiJson.dump(doc)
|
167
167
|
else
|
168
|
-
|
168
|
+
doc
|
169
169
|
end
|
170
170
|
end
|
171
171
|
|
@@ -178,17 +178,18 @@ module SearchSolrTools
|
|
178
178
|
|
179
179
|
begin
|
180
180
|
puts "Request: #{request_url}"
|
181
|
-
response = URI.open(
|
181
|
+
response = URI.parse(request_url).open(read_timeout: timeout, 'Content-Type' => content_type)
|
182
182
|
rescue OpenURI::HTTPError, Timeout::Error, Errno::ETIMEDOUT => e
|
183
183
|
retries_left -= 1
|
184
184
|
puts "## REQUEST FAILED ## #{e.class} ## Retrying #{retries_left} more times..."
|
185
185
|
|
186
|
-
retry if retries_left
|
186
|
+
retry if retries_left.positive?
|
187
187
|
|
188
|
-
# TODO
|
188
|
+
# TODO: Do we really need this "die_on_failure" anymore? The empty return
|
189
189
|
# will cause the "No Documents" error to be thrown in the harvester class
|
190
190
|
# now, so it will pretty much always "die on failure"
|
191
191
|
raise e if @die_on_failure
|
192
|
+
|
192
193
|
return
|
193
194
|
end
|
194
195
|
doc = Nokogiri.XML(response)
|
@@ -216,7 +217,7 @@ module SearchSolrTools
|
|
216
217
|
spatial_coverages = doc.xpath(".//field[@name='spatial_coverages']").first
|
217
218
|
return true if spatial_coverages.nil?
|
218
219
|
|
219
|
-
spatial_coverages = spatial_coverages.text.split
|
220
|
+
spatial_coverages = spatial_coverages.text.split
|
220
221
|
|
221
222
|
# We've only seen the failure with 4 spatial coverage values
|
222
223
|
return true if spatial_coverages.size < 4
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'auto_suggest'
|
2
4
|
|
3
5
|
module SearchSolrTools
|
@@ -16,11 +18,11 @@ module SearchSolrTools
|
|
16
18
|
def fields
|
17
19
|
{
|
18
20
|
'authoritative_id' => { weight: 1, source: 'NSIDC', creator: method(:standard_add_creator) },
|
19
|
-
'full_title'
|
20
|
-
'copy_parameters'
|
21
|
-
'full_platforms'
|
22
|
-
'full_sensors'
|
23
|
-
'full_authors'
|
21
|
+
'full_title' => { weight: 2, source: 'NSIDC', creator: method(:standard_add_creator) },
|
22
|
+
'copy_parameters' => { weight: 5, source: 'NSIDC', creator: method(:standard_add_creator) },
|
23
|
+
'full_platforms' => { weight: 2, source: 'NSIDC', creator: method(:short_full_split_add_creator) },
|
24
|
+
'full_sensors' => { weight: 2, source: 'NSIDC', creator: method(:short_full_split_add_creator) },
|
25
|
+
'full_authors' => { weight: 1, source: 'NSIDC', creator: method(:standard_add_creator) }
|
24
26
|
}
|
25
27
|
end
|
26
28
|
|
@@ -1,15 +1,16 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'json'
|
2
4
|
require 'rest-client'
|
3
5
|
|
4
6
|
require 'search_solr_tools'
|
5
7
|
|
6
|
-
|
7
8
|
module SearchSolrTools
|
8
9
|
module Harvesters
|
9
10
|
# Harvests data from NSIDC OAI and inserts it into Solr after it has been translated
|
10
11
|
class NsidcJson < Base
|
11
|
-
def initialize(env = 'development', die_on_failure
|
12
|
-
super
|
12
|
+
def initialize(env = 'development', die_on_failure: false)
|
13
|
+
super
|
13
14
|
@translator = Translators::NsidcJsonToSolr.new
|
14
15
|
Helpers::FacetConfiguration.import_bin_configuration(env)
|
15
16
|
end
|
@@ -19,7 +20,7 @@ module SearchSolrTools
|
|
19
20
|
RestClient.options(nsidc_json_url) do |response, _request, _result|
|
20
21
|
return response.code == 200
|
21
22
|
end
|
22
|
-
rescue
|
23
|
+
rescue StandardError
|
23
24
|
puts "Error trying to get options for #{nsidc_json_url} (ping)"
|
24
25
|
end
|
25
26
|
false
|
@@ -37,7 +38,7 @@ module SearchSolrTools
|
|
37
38
|
|
38
39
|
status = insert_solr_docs result[:add_docs], Base::JSON_CONTENT_TYPE
|
39
40
|
|
40
|
-
status.record_status(Helpers::HarvestStatus::HARVEST_NO_DOCS) if result[:num_docs]
|
41
|
+
status.record_status(Helpers::HarvestStatus::HARVEST_NO_DOCS) if (result[:num_docs]).zero?
|
41
42
|
|
42
43
|
# Record the number of harvest failures; note that if this is 0, thats OK, the status will stay at 0
|
43
44
|
status.record_status(Helpers::HarvestStatus::HARVEST_FAILURE, result[:failure_ids].length)
|
@@ -66,7 +67,7 @@ module SearchSolrTools
|
|
66
67
|
# @param id [String] NSIDC authoritative ID for the dataset
|
67
68
|
# @return [Hash] Parsed version of the JSON response
|
68
69
|
def fetch_json_from_nsidc(id)
|
69
|
-
json_response = RestClient.get(nsidc_json_url
|
70
|
+
json_response = RestClient.get("#{nsidc_json_url}#{id}.json")
|
70
71
|
JSON.parse(json_response)
|
71
72
|
end
|
72
73
|
|
@@ -81,13 +82,13 @@ module SearchSolrTools
|
|
81
82
|
id = r.text.split('/').last
|
82
83
|
begin
|
83
84
|
docs << { 'add' => { 'doc' => @translator.translate(fetch_json_from_nsidc(id)) } }
|
84
|
-
rescue => e
|
85
|
+
rescue StandardError => e
|
85
86
|
puts "Failed to fetch #{id} with error #{e}: #{e.backtrace}"
|
86
87
|
failure_ids << id
|
87
88
|
end
|
88
89
|
end
|
89
90
|
|
90
|
-
{ num_docs: all_docs.size, add_docs: docs, failure_ids:
|
91
|
+
{ num_docs: all_docs.size, add_docs: docs, failure_ids: }
|
91
92
|
end
|
92
93
|
end
|
93
94
|
end
|
@@ -1,4 +1,6 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'iso_namespaces'
|
2
4
|
|
3
5
|
module SearchSolrTools
|
4
6
|
module Helpers
|
@@ -8,12 +10,10 @@ module SearchSolrTools
|
|
8
10
|
NORTHERN_GLOBAL_BOUNDARY = 85.0
|
9
11
|
|
10
12
|
def self.bounding_box_hash_from_geo_json(geometry)
|
11
|
-
if geometry_is_point?(geometry)
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
return { west: bbox.min_x.to_s, south: bbox.min_y.to_s, east: bbox.max_x.to_s, north: bbox.max_y.to_s }
|
16
|
-
end
|
13
|
+
return { west: geometry.x.to_s, south: geometry.y.to_s, east: geometry.x.to_s, north: geometry.y.to_s } if geometry_is_point?(geometry)
|
14
|
+
|
15
|
+
bbox = RGeo::Cartesian::BoundingBox.create_from_geometry(geometry)
|
16
|
+
{ west: bbox.min_x.to_s, south: bbox.min_y.to_s, east: bbox.max_x.to_s, north: bbox.max_y.to_s }
|
17
17
|
end
|
18
18
|
|
19
19
|
def self.geometry_is_point?(geometry)
|
@@ -30,7 +30,7 @@ module SearchSolrTools
|
|
30
30
|
end
|
31
31
|
|
32
32
|
def self.box_invalid?(box)
|
33
|
-
[
|
33
|
+
%i[north south east west].any? { |d| box[d].to_s.empty? }
|
34
34
|
end
|
35
35
|
end
|
36
36
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'json'
|
2
4
|
require 'rest_client'
|
3
5
|
require 'singleton'
|
@@ -8,7 +10,7 @@ module SearchSolrTools
|
|
8
10
|
class FacetConfiguration
|
9
11
|
include Singleton
|
10
12
|
def self.import_bin_configuration(env)
|
11
|
-
@bin_configuration = JSON.parse(RestClient.get(SolrEnvironments[env][:nsidc_dataset_metadata_url]
|
13
|
+
@bin_configuration = JSON.parse(RestClient.get("#{SolrEnvironments[env][:nsidc_dataset_metadata_url]}binConfiguration")) if @bin_configuration.nil?
|
12
14
|
end
|
13
15
|
|
14
16
|
def self.get_facet_bin(facet_name)
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module SearchSolrTools
|
2
4
|
module Helpers
|
3
5
|
class HarvestStatus
|
@@ -7,17 +9,17 @@ module SearchSolrTools
|
|
7
9
|
INGEST_ERR_INVALID_DOC = :invalid
|
8
10
|
INGEST_ERR_SOLR_ERROR = :solr_error
|
9
11
|
OTHER_ERROR = :other
|
10
|
-
PING_SOLR = :ping_solr
|
11
|
-
PING_SOURCE = :ping_source
|
12
|
+
PING_SOLR = :ping_solr # used for initialize only
|
13
|
+
PING_SOURCE = :ping_source # used for initialize only
|
12
14
|
|
13
|
-
ERROR_STATUS = [HARVEST_NO_DOCS, HARVEST_FAILURE, INGEST_ERR_INVALID_DOC, INGEST_ERR_SOLR_ERROR, OTHER_ERROR]
|
15
|
+
ERROR_STATUS = [HARVEST_NO_DOCS, HARVEST_FAILURE, INGEST_ERR_INVALID_DOC, INGEST_ERR_SOLR_ERROR, OTHER_ERROR].freeze
|
14
16
|
|
15
|
-
|
16
|
-
|
17
|
+
attr_accessor :ping_solr, :ping_source
|
18
|
+
attr_reader :status
|
17
19
|
|
18
20
|
# init_info is an optional hash that contains the various status keys and the documents to
|
19
21
|
# associate with them
|
20
|
-
def initialize(init_info={})
|
22
|
+
def initialize(init_info = {})
|
21
23
|
@status = { INGEST_OK => 0 }
|
22
24
|
@ping_solr = true
|
23
25
|
@ping_source = true
|
@@ -36,9 +38,9 @@ module SearchSolrTools
|
|
36
38
|
end
|
37
39
|
|
38
40
|
def ok?
|
39
|
-
ERROR_STATUS.each { |s| return false unless @status[s]
|
41
|
+
ERROR_STATUS.each { |s| return false unless (@status[s]).zero? }
|
40
42
|
@ping_solr && @ping_source
|
41
43
|
end
|
42
44
|
end
|
43
45
|
end
|
44
|
-
end
|
46
|
+
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module SearchSolrTools
|
2
4
|
module Helpers
|
3
5
|
# Helper class to provide default namespaces for XML document parsing.
|
@@ -25,7 +27,7 @@ module SearchSolrTools
|
|
25
27
|
'srv' => 'http://www.isotc211.org/2005/srv',
|
26
28
|
'xlink' => 'http://www.w3.org/1999/xlink',
|
27
29
|
'xsi' => 'http://www.w3.org/2001/XMLSchema-instance'
|
28
|
-
}
|
30
|
+
}.freeze
|
29
31
|
end
|
30
32
|
end
|
31
33
|
end
|