search_solr_tools 6.1.0 → 6.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -2
- data/bin/search_solr_tools +5 -17
- data/lib/search_solr_tools/config/environments.rb +3 -1
- data/lib/search_solr_tools/config/environments.yaml +0 -32
- data/lib/search_solr_tools/errors/harvest_error.rb +44 -31
- data/lib/search_solr_tools/harvesters/auto_suggest.rb +5 -3
- data/lib/search_solr_tools/harvesters/base.rb +21 -20
- data/lib/search_solr_tools/harvesters/nsidc_auto_suggest.rb +7 -5
- data/lib/search_solr_tools/harvesters/nsidc_json.rb +9 -8
- data/lib/search_solr_tools/helpers/bounding_box_util.rb +8 -8
- data/lib/search_solr_tools/helpers/facet_configuration.rb +3 -1
- data/lib/search_solr_tools/helpers/harvest_status.rb +10 -8
- data/lib/search_solr_tools/helpers/iso_namespaces.rb +3 -1
- data/lib/search_solr_tools/helpers/solr_format.rb +25 -45
- data/lib/search_solr_tools/helpers/translate_spatial_coverage.rb +13 -10
- data/lib/search_solr_tools/helpers/translate_temporal_coverage.rb +2 -0
- data/lib/search_solr_tools/translators/nsidc_json.rb +48 -44
- data/lib/search_solr_tools/version.rb +3 -1
- data/lib/search_solr_tools.rb +3 -2
- metadata +3 -45
- data/lib/search_solr_tools/harvesters/adc.rb +0 -49
- data/lib/search_solr_tools/harvesters/ade_auto_suggest.rb +0 -46
- data/lib/search_solr_tools/harvesters/bcodmo.rb +0 -64
- data/lib/search_solr_tools/harvesters/data_one.rb +0 -49
- data/lib/search_solr_tools/harvesters/echo.rb +0 -52
- data/lib/search_solr_tools/harvesters/eol.rb +0 -51
- data/lib/search_solr_tools/harvesters/gtnp.rb +0 -67
- data/lib/search_solr_tools/harvesters/ices.rb +0 -58
- data/lib/search_solr_tools/harvesters/ncdc_paleo.rb +0 -62
- data/lib/search_solr_tools/harvesters/nmi.rb +0 -34
- data/lib/search_solr_tools/harvesters/nodc.rb +0 -75
- data/lib/search_solr_tools/harvesters/oai.rb +0 -62
- data/lib/search_solr_tools/harvesters/pdc.rb +0 -40
- data/lib/search_solr_tools/harvesters/r2r.rb +0 -61
- data/lib/search_solr_tools/harvesters/rda.rb +0 -35
- data/lib/search_solr_tools/harvesters/tdar.rb +0 -71
- data/lib/search_solr_tools/harvesters/usgs.rb +0 -76
- data/lib/search_solr_tools/helpers/csw_iso_query_builder.rb +0 -29
- data/lib/search_solr_tools/helpers/data_one_format.rb +0 -74
- data/lib/search_solr_tools/helpers/iso_to_solr.rb +0 -97
- data/lib/search_solr_tools/helpers/iso_to_solr_format.rb +0 -197
- data/lib/search_solr_tools/helpers/ncdc_paleo_format.rb +0 -61
- data/lib/search_solr_tools/helpers/query_builder.rb +0 -13
- data/lib/search_solr_tools/helpers/r2r_format.rb +0 -25
- data/lib/search_solr_tools/helpers/selectors.rb +0 -22
- data/lib/search_solr_tools/helpers/tdar_format.rb +0 -70
- data/lib/search_solr_tools/helpers/usgs_format.rb +0 -50
- data/lib/search_solr_tools/selectors/adc.rb +0 -96
- data/lib/search_solr_tools/selectors/data_one.rb +0 -96
- data/lib/search_solr_tools/selectors/echo_iso.rb +0 -112
- data/lib/search_solr_tools/selectors/ices_iso.rb +0 -108
- data/lib/search_solr_tools/selectors/ncdc_paleo.rb +0 -90
- data/lib/search_solr_tools/selectors/nmi.rb +0 -107
- data/lib/search_solr_tools/selectors/nodc_iso.rb +0 -108
- data/lib/search_solr_tools/selectors/pdc_iso.rb +0 -109
- data/lib/search_solr_tools/selectors/r2r.rb +0 -115
- data/lib/search_solr_tools/selectors/rda.rb +0 -107
- data/lib/search_solr_tools/selectors/tdar_opensearch.rb +0 -91
- data/lib/search_solr_tools/selectors/usgs_iso.rb +0 -107
- data/lib/search_solr_tools/translators/bcodmo_json.rb +0 -89
- data/lib/search_solr_tools/translators/eol_to_solr.rb +0 -84
- data/lib/search_solr_tools/translators/gtnp_json.rb +0 -59
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f9ced4643b8adbda2b5ef09192f036af86878e07243fe959448213762e0e5cc1
|
4
|
+
data.tar.gz: 0a5f27a7bc1d8c9c0c07a20b6fbf122d5a3b6163a5654db635d5c478ac4a21bc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cc66f8b40c62e2640fd72ce05aa2ac01aa76c58c730b6c445976fc7cf6e43b88cff29ec088f73e5dff913c879f7a1a31016cb634d851cfb3adb7b8bb735614c8
|
7
|
+
data.tar.gz: f896f7b473f977f0e349d422f6568774342e6bdb66e1a7dad1cf4477d0ffa7e9b05b81184898ab2d4d69c44f8ca98a3aa6791234926190484820cca4b439dc7d
|
data/CHANGELOG.md
CHANGED
@@ -1,8 +1,17 @@
|
|
1
|
-
## v6.
|
1
|
+
## v6.3.0 (2023-07-24)
|
2
|
+
|
3
|
+
- Update Rubocop configuration to actually run against files, and make
|
4
|
+
necessary corrections to comply with Rubocop styling.
|
5
|
+
|
6
|
+
## v6.2.0 (2023-07-18)
|
7
|
+
|
8
|
+
- Remove deprecated harvesters and associated tests, helpers, etc.
|
9
|
+
|
10
|
+
## v6.1.0 (2023-07-14)
|
2
11
|
|
3
12
|
- Updated a few other dependencies that weren't at the newest versions.
|
4
13
|
|
5
|
-
## v6.0.0 (
|
14
|
+
## v6.0.0 (2023-07-14)
|
6
15
|
|
7
16
|
- Updated Ruby to 3.2.2, updated gem dependencies to more recent versions.
|
8
17
|
|
data/bin/search_solr_tools
CHANGED
@@ -47,7 +47,7 @@ class SolrHarvestCLI < Thor
|
|
47
47
|
end
|
48
48
|
|
49
49
|
ping_status = SearchSolrTools::Helpers::HarvestStatus.new(
|
50
|
-
SearchSolrTools::Helpers::HarvestStatus::PING_SOLR
|
50
|
+
SearchSolrTools::Helpers::HarvestStatus::PING_SOLR => solr_success,
|
51
51
|
SearchSolrTools::Helpers::HarvestStatus::PING_SOURCE => source_success
|
52
52
|
)
|
53
53
|
raise SearchSolrTools::Errors::HarvestError, ping_status unless ping_status.ok?
|
@@ -64,9 +64,9 @@ class SolrHarvestCLI < Thor
|
|
64
64
|
puts "Target: #{target}"
|
65
65
|
begin
|
66
66
|
harvest_class = get_harvester_class(target)
|
67
|
-
harvester = harvest_class.new(options[:environment], die_on_failure)
|
67
|
+
harvester = harvest_class.new(options[:environment], die_on_failure:)
|
68
68
|
ping_status = SearchSolrTools::Helpers::HarvestStatus.new(
|
69
|
-
SearchSolrTools::Helpers::HarvestStatus::PING_SOLR
|
69
|
+
SearchSolrTools::Helpers::HarvestStatus::PING_SOLR => harvester.ping_solr,
|
70
70
|
SearchSolrTools::Helpers::HarvestStatus::PING_SOURCE => harvester.ping_source
|
71
71
|
)
|
72
72
|
raise SearchSolrTools::Errors::HarvestError, ping_status unless ping_status.ok?
|
@@ -120,20 +120,8 @@ class SolrHarvestCLI < Thor
|
|
120
120
|
no_tasks do
|
121
121
|
def harvester_map
|
122
122
|
{
|
123
|
-
'
|
124
|
-
'
|
125
|
-
'echo' => SearchSolrTools::Harvesters::Echo,
|
126
|
-
'ices' => SearchSolrTools::Harvesters::Ices,
|
127
|
-
'nmi' => SearchSolrTools::Harvesters::Nmi,
|
128
|
-
'nodc' => SearchSolrTools::Harvesters::Nodc,
|
129
|
-
'r2r' => SearchSolrTools::Harvesters::R2R,
|
130
|
-
'rda' => SearchSolrTools::Harvesters::Rda,
|
131
|
-
'usgs' => SearchSolrTools::Harvesters::Usgs,
|
132
|
-
'tdar' => SearchSolrTools::Harvesters::Tdar,
|
133
|
-
'pdc' => SearchSolrTools::Harvesters::Pdc,
|
134
|
-
'nsidc' => SearchSolrTools::Harvesters::NsidcJson,
|
135
|
-
'nsidc_auto_suggest' => SearchSolrTools::Harvesters::NsidcAutoSuggest,
|
136
|
-
'ade_auto_suggest' => SearchSolrTools::Harvesters::AdeAutoSuggest
|
123
|
+
'nsidc' => SearchSolrTools::Harvesters::NsidcJson,
|
124
|
+
'nsidc_auto_suggest' => SearchSolrTools::Harvesters::NsidcAutoSuggest
|
137
125
|
}
|
138
126
|
end
|
139
127
|
|
@@ -1,9 +1,11 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'yaml'
|
2
4
|
|
3
5
|
module SearchSolrTools
|
4
6
|
# configuration to work with solr locally, or on integration/qa/staging/prod
|
5
7
|
module SolrEnvironments
|
6
|
-
YAML_ENVS = YAML.load_file(File.expand_path('
|
8
|
+
YAML_ENVS = YAML.load_file(File.expand_path('environments.yaml', __dir__))
|
7
9
|
|
8
10
|
def self.[](env = :development)
|
9
11
|
YAML_ENVS[:common].merge(YAML_ENVS[env.to_sym])
|
@@ -3,38 +3,6 @@
|
|
3
3
|
:collection_name: nsidc_oai
|
4
4
|
:collection_path: solr
|
5
5
|
:port: 8983
|
6
|
-
:bcodmo_url: http://www.bco-dmo.org/nsidc/arctic-deployments.json
|
7
|
-
:adc_url: https://arcticdata.io/metacat/d1/mn/v2/query/solr/select?q=northBoundCoord:%5B45.0%20TO%2090.0%5D
|
8
|
-
:data_one_url: https://cn.dataone.org/cn/v1/query/solr/select?q=northBoundCoord:%5B45.0%20TO%2090.0%5D
|
9
|
-
:echo_url: https://api.echo.nasa.gov/catalog-rest/echo_catalog/datasets.echo10?bounding_box=-180,45,180,90
|
10
|
-
:gtnp:
|
11
|
-
- http://www.gtnpdatabase.org/rest/boreholes/json
|
12
|
-
- http://www.gtnpdatabase.org/rest/activelayers/json
|
13
|
-
:ices_url: http://geo.ices.dk/geonetwork/srv/en/csw
|
14
|
-
:ncdc_paleo_url: https://gis.ncdc.noaa.gov/gptpaleo/csw
|
15
|
-
:nmi_url: http://arcticdata.met.no/metamod/oai
|
16
|
-
:nodc_url: https://data.nodc.noaa.gov/geoportal/csw
|
17
|
-
:pdc_url: http://www.polardata.ca/oai/provider
|
18
|
-
:rda_url: https://rda.ucar.edu/cgi-bin/oai
|
19
|
-
:tdar_url: http://core.tdar.org/search/rss
|
20
|
-
:usgs_url: https://www.sciencebase.gov/catalog/item/527cf4ede4b0850ea05182ee/csw
|
21
|
-
:eol:
|
22
|
-
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.SHEBA.thredds.xml
|
23
|
-
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.SBI.thredds.xml
|
24
|
-
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.PacMARS.thredds.xml
|
25
|
-
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BASE.thredds.xml
|
26
|
-
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.ATLAS.thredds.xml
|
27
|
-
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.ARC_MIP.thredds.xml
|
28
|
-
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.AMTS.thredds.xml
|
29
|
-
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BOREAS.thredds.xml
|
30
|
-
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BeringSea.thredds.xml
|
31
|
-
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.ARCSS.thredds.xml
|
32
|
-
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BEST.thredds.xml
|
33
|
-
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BSIERP.thredds.xml
|
34
|
-
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BARROW.thredds.xml
|
35
|
-
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.DBO.thredds.xml
|
36
|
-
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.ITEX.thredds.xml
|
37
|
-
:r2r_url: http://get.rvdata.us/services/cruise/
|
38
6
|
|
39
7
|
# Not using DCS API v2 here because not all retired datasets have their "retired"
|
40
8
|
# flag checked. For example, GLA01.033 is retired; GLA01.018 is not, but it
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module SearchSolrTools
|
2
4
|
module Errors
|
3
5
|
class HarvestError < StandardError
|
@@ -10,34 +12,47 @@ module SearchSolrTools
|
|
10
12
|
ERRCODE_OTHER = 128
|
11
13
|
|
12
14
|
ERRCODE_DESC = {
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
15
|
+
ERRCODE_SOLR_PING => 'Solr instance did not return a successful ping',
|
16
|
+
ERRCODE_SOURCE_PING => 'Source to be harvested did not return a successful ping',
|
17
|
+
ERRCODE_SOURCE_NO_RESULTS => 'Source to be harvested returned no documents matching query',
|
18
|
+
ERRCODE_SOURCE_HARVEST_ERROR => 'One or more source documents returned an error when trying to retrieve or translate',
|
19
|
+
ERRCODE_DOCUMENT_INVALID => 'One or more documents to be harvested was invalid (malformed)',
|
20
|
+
ERRCODE_INGEST_ERROR => 'Solr returned an error trying to ingest one or more harvested documents',
|
21
|
+
ERRCODE_OTHER => 'General error code for non-harvest related issues'
|
20
22
|
}.freeze
|
21
23
|
|
22
24
|
PING_ERRCODE_MAP = {
|
23
|
-
'ping_solr'
|
24
|
-
'ping_source' => ERRCODE_SOURCE_PING
|
25
|
-
}
|
25
|
+
'ping_solr' => ERRCODE_SOLR_PING,
|
26
|
+
'ping_source' => ERRCODE_SOURCE_PING
|
27
|
+
}.freeze
|
26
28
|
|
27
29
|
STATUS_ERRCODE_MAP = {
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
30
|
+
Helpers::HarvestStatus::HARVEST_NO_DOCS => ERRCODE_SOURCE_NO_RESULTS,
|
31
|
+
Helpers::HarvestStatus::HARVEST_FAILURE => ERRCODE_SOURCE_HARVEST_ERROR,
|
32
|
+
Helpers::HarvestStatus::INGEST_ERR_INVALID_DOC => ERRCODE_DOCUMENT_INVALID,
|
33
|
+
Helpers::HarvestStatus::INGEST_ERR_SOLR_ERROR => ERRCODE_INGEST_ERROR,
|
34
|
+
Helpers::HarvestStatus::OTHER_ERROR => ERRCODE_OTHER
|
33
35
|
}.freeze
|
34
36
|
|
35
37
|
# If code is -1, it means display all error codes
|
36
38
|
def self.describe_exit_code(code = -1)
|
39
|
+
code_list = code_to_list(code)
|
40
|
+
|
41
|
+
codes = {}
|
42
|
+
code_list.each do |k|
|
43
|
+
next if code == -1 && !ERRCODE_DESC.keys.include?(k) # skip INVALID CODE if showing all codes
|
44
|
+
|
45
|
+
codes[k] = ERRCODE_DESC.keys.include?(k) ? ERRCODE_DESC[k] : 'INVALID CODE NUMBER'
|
46
|
+
end
|
47
|
+
|
48
|
+
codes
|
49
|
+
end
|
50
|
+
|
51
|
+
# Loop through all bit-flag values to produce a list of integers
|
52
|
+
def self.code_to_list(code)
|
37
53
|
code = code.to_i
|
38
54
|
code_list = []
|
39
55
|
|
40
|
-
# Loop through all bit-flag values
|
41
56
|
[128, 64, 32, 16, 8, 4, 2, 1].each do |k|
|
42
57
|
if code >= k || code == -1
|
43
58
|
code_list.prepend k
|
@@ -45,20 +60,17 @@ module SearchSolrTools
|
|
45
60
|
end
|
46
61
|
end
|
47
62
|
|
48
|
-
|
49
|
-
code_list.each do |k|
|
50
|
-
next if code == -1 && !ERRCODE_DESC.keys.include?(k) # skip INVALID CODE if showing all codes
|
51
|
-
codes[k] = ERRCODE_DESC.keys.include?(k) ? ERRCODE_DESC[k] : 'INVALID CODE NUMBER'
|
52
|
-
end
|
53
|
-
|
54
|
-
codes
|
63
|
+
code_list
|
55
64
|
end
|
56
65
|
|
57
|
-
def initialize(status, message=nil)
|
66
|
+
def initialize(status, message = nil)
|
58
67
|
@status_data = status
|
59
68
|
@other_message = message
|
69
|
+
|
70
|
+
super message
|
60
71
|
end
|
61
72
|
|
73
|
+
# rubocop:disable Metrics/AbcSize
|
62
74
|
def exit_code
|
63
75
|
if @status_data.nil?
|
64
76
|
puts "OTHER ERROR REPORTED: #{@other_message}"
|
@@ -70,19 +82,20 @@ module SearchSolrTools
|
|
70
82
|
code = 0
|
71
83
|
code += ERRCODE_SOLR_PING unless @status_data.ping_solr
|
72
84
|
code += ERRCODE_SOURCE_PING unless @status_data.ping_source
|
73
|
-
code += ERRCODE_SOURCE_NO_RESULTS if @status_data.status[Helpers::HarvestStatus::HARVEST_NO_DOCS]
|
74
|
-
code += ERRCODE_SOURCE_HARVEST_ERROR if @status_data.status[Helpers::HarvestStatus::HARVEST_FAILURE]
|
75
|
-
code += ERRCODE_DOCUMENT_INVALID if @status_data.status[Helpers::HarvestStatus::INGEST_ERR_INVALID_DOC]
|
76
|
-
code += ERRCODE_INGEST_ERROR if @status_data.status[Helpers::HarvestStatus::INGEST_ERR_SOLR_ERROR]
|
85
|
+
code += ERRCODE_SOURCE_NO_RESULTS if @status_data.status[Helpers::HarvestStatus::HARVEST_NO_DOCS].positive?
|
86
|
+
code += ERRCODE_SOURCE_HARVEST_ERROR if @status_data.status[Helpers::HarvestStatus::HARVEST_FAILURE].positive?
|
87
|
+
code += ERRCODE_DOCUMENT_INVALID if @status_data.status[Helpers::HarvestStatus::INGEST_ERR_INVALID_DOC].positive?
|
88
|
+
code += ERRCODE_INGEST_ERROR if @status_data.status[Helpers::HarvestStatus::INGEST_ERR_SOLR_ERROR].positive?
|
77
89
|
|
78
|
-
code = ERRCODE_OTHER if code
|
90
|
+
code = ERRCODE_OTHER if code.zero?
|
79
91
|
|
80
92
|
code
|
81
93
|
end
|
94
|
+
# rubocop:enable Metrics/AbcSize
|
82
95
|
|
83
96
|
def message
|
84
|
-
self.class.describe_exit_code(exit_code).map{|
|
97
|
+
self.class.describe_exit_code(exit_code).map { |_c, v| v }.join("\n")
|
85
98
|
end
|
86
99
|
end
|
87
100
|
end
|
88
|
-
end
|
101
|
+
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'base'
|
2
4
|
require 'json'
|
3
5
|
require 'rest-client'
|
@@ -6,8 +8,8 @@ module SearchSolrTools
|
|
6
8
|
module Harvesters
|
7
9
|
# Use the nsidc_oai core to populate the auto_suggest core
|
8
10
|
class AutoSuggest < Base
|
9
|
-
def initialize(env = 'development', die_on_failure
|
10
|
-
super
|
11
|
+
def initialize(env = 'development', die_on_failure: false)
|
12
|
+
super
|
11
13
|
@env_settings = SolrEnvironments[@environment] # super sets @environment.
|
12
14
|
end
|
13
15
|
|
@@ -50,7 +52,7 @@ module SearchSolrTools
|
|
50
52
|
|
51
53
|
if status == Helpers::HarvestStatus::INGEST_OK
|
52
54
|
puts "Added #{add_docs.size} auto suggest documents in one commit"
|
53
|
-
|
55
|
+
Helpers::HarvestStatus.new(Helpers::HarvestStatus::INGEST_OK => add_docs)
|
54
56
|
else
|
55
57
|
puts "Failed adding #{add_docs.size} documents in single commit, retrying one by one"
|
56
58
|
new_add_docs = []
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'multi_json'
|
2
4
|
require 'nokogiri'
|
3
5
|
require 'open-uri'
|
@@ -8,8 +10,6 @@ require 'time'
|
|
8
10
|
require 'search_solr_tools'
|
9
11
|
require_relative '../helpers/iso_namespaces'
|
10
12
|
require_relative '../helpers/solr_format'
|
11
|
-
require_relative '../helpers/iso_to_solr'
|
12
|
-
|
13
13
|
|
14
14
|
module SearchSolrTools
|
15
15
|
module Harvesters
|
@@ -21,7 +21,7 @@ module SearchSolrTools
|
|
21
21
|
XML_CONTENT_TYPE = 'text/xml; charset=utf-8'
|
22
22
|
JSON_CONTENT_TYPE = 'application/json; charset=utf-8'
|
23
23
|
|
24
|
-
def initialize(env = 'development', die_on_failure
|
24
|
+
def initialize(env = 'development', die_on_failure: false)
|
25
25
|
@environment = env
|
26
26
|
@die_on_failure = die_on_failure
|
27
27
|
end
|
@@ -52,7 +52,7 @@ module SearchSolrTools
|
|
52
52
|
success = response.code == 200
|
53
53
|
puts "Error in ping request: #{response.body}" unless success
|
54
54
|
end
|
55
|
-
rescue => e
|
55
|
+
rescue StandardError => e
|
56
56
|
puts "Rest exception while pinging Solr: #{e}"
|
57
57
|
end
|
58
58
|
success
|
@@ -62,7 +62,7 @@ module SearchSolrTools
|
|
62
62
|
# to "ping" the data center. Returns true if the ping is successful (or, as
|
63
63
|
# in this default, no ping method was defined)
|
64
64
|
def ping_source
|
65
|
-
puts
|
65
|
+
puts 'Harvester does not have ping method defined, assuming true'
|
66
66
|
true
|
67
67
|
end
|
68
68
|
|
@@ -75,12 +75,12 @@ module SearchSolrTools
|
|
75
75
|
harvest_status
|
76
76
|
end
|
77
77
|
|
78
|
-
def delete_old_documents(timestamp, constraints, solr_core, force
|
78
|
+
def delete_old_documents(timestamp, constraints, solr_core, force: false)
|
79
79
|
constraints = sanitize_data_centers_constraints(constraints)
|
80
80
|
delete_query = "last_update:[* TO #{timestamp}] AND #{constraints}"
|
81
81
|
solr = RSolr.connect url: solr_url + "/#{solr_core}"
|
82
82
|
unchanged_count = (solr.get 'select', params: { wt: :ruby, q: delete_query, rows: 0 })['response']['numFound'].to_i
|
83
|
-
if unchanged_count
|
83
|
+
if unchanged_count.zero?
|
84
84
|
puts "All documents were updated after #{timestamp}, nothing to delete"
|
85
85
|
else
|
86
86
|
puts "Begin removing documents older than #{timestamp}"
|
@@ -91,8 +91,8 @@ module SearchSolrTools
|
|
91
91
|
def sanitize_data_centers_constraints(query_string)
|
92
92
|
# Remove lucene special characters, preserve the query parameter and compress whitespace
|
93
93
|
query_string.gsub!(/[:&|!~\-\(\)\{\}\[\]\^\*\?\+]+/, ' ')
|
94
|
-
query_string.gsub!(
|
95
|
-
query_string.gsub!(
|
94
|
+
query_string.gsub!('data_centers ', 'data_centers:')
|
95
|
+
query_string.gsub!('source ', 'source:')
|
96
96
|
query_string.squeeze(' ').strip
|
97
97
|
end
|
98
98
|
|
@@ -127,7 +127,7 @@ module SearchSolrTools
|
|
127
127
|
status
|
128
128
|
end
|
129
129
|
|
130
|
-
# TODO Need to return a specific type of failure:
|
130
|
+
# TODO: Need to return a specific type of failure:
|
131
131
|
# - Bad record content identified and no ingest attempted
|
132
132
|
# - Solr tries to ingest document and fails (bad content not detected prior to ingest)
|
133
133
|
# - Solr cannot insert document for reasons other than the document structure and content.
|
@@ -143,15 +143,15 @@ module SearchSolrTools
|
|
143
143
|
|
144
144
|
# Some docs will cause solr to time out during the POST
|
145
145
|
begin
|
146
|
-
RestClient.post(url, doc_serialized, content_type:
|
146
|
+
RestClient.post(url, doc_serialized, content_type:) do |response, _request, _result|
|
147
147
|
success = response.code == 200
|
148
148
|
unless success
|
149
149
|
puts "Error for #{doc_serialized}\n\n response: #{response.body}"
|
150
150
|
status = Helpers::HarvestStatus::INGEST_ERR_SOLR_ERROR
|
151
151
|
end
|
152
152
|
end
|
153
|
-
rescue => e
|
154
|
-
# TODO Need to provide more detail re: this failure so we know whether to
|
153
|
+
rescue StandardError => e
|
154
|
+
# TODO: Need to provide more detail re: this failure so we know whether to
|
155
155
|
# exit the job with a status != 0
|
156
156
|
puts "Rest exception while POSTing to Solr: #{e}, for doc: #{doc_serialized}"
|
157
157
|
status = Helpers::HarvestStatus::INGEST_ERR_SOLR_ERROR
|
@@ -161,11 +161,11 @@ module SearchSolrTools
|
|
161
161
|
|
162
162
|
def get_serialized_doc(doc, content_type)
|
163
163
|
if content_type.eql?(XML_CONTENT_TYPE)
|
164
|
-
|
164
|
+
doc.respond_to?(:to_xml) ? doc.to_xml : doc
|
165
165
|
elsif content_type.eql?(JSON_CONTENT_TYPE)
|
166
|
-
|
166
|
+
MultiJson.dump(doc)
|
167
167
|
else
|
168
|
-
|
168
|
+
doc
|
169
169
|
end
|
170
170
|
end
|
171
171
|
|
@@ -178,17 +178,18 @@ module SearchSolrTools
|
|
178
178
|
|
179
179
|
begin
|
180
180
|
puts "Request: #{request_url}"
|
181
|
-
response = URI.open(
|
181
|
+
response = URI.parse(request_url).open(read_timeout: timeout, 'Content-Type' => content_type)
|
182
182
|
rescue OpenURI::HTTPError, Timeout::Error, Errno::ETIMEDOUT => e
|
183
183
|
retries_left -= 1
|
184
184
|
puts "## REQUEST FAILED ## #{e.class} ## Retrying #{retries_left} more times..."
|
185
185
|
|
186
|
-
retry if retries_left
|
186
|
+
retry if retries_left.positive?
|
187
187
|
|
188
|
-
# TODO
|
188
|
+
# TODO: Do we really need this "die_on_failure" anymore? The empty return
|
189
189
|
# will cause the "No Documents" error to be thrown in the harvester class
|
190
190
|
# now, so it will pretty much always "die on failure"
|
191
191
|
raise e if @die_on_failure
|
192
|
+
|
192
193
|
return
|
193
194
|
end
|
194
195
|
doc = Nokogiri.XML(response)
|
@@ -216,7 +217,7 @@ module SearchSolrTools
|
|
216
217
|
spatial_coverages = doc.xpath(".//field[@name='spatial_coverages']").first
|
217
218
|
return true if spatial_coverages.nil?
|
218
219
|
|
219
|
-
spatial_coverages = spatial_coverages.text.split
|
220
|
+
spatial_coverages = spatial_coverages.text.split
|
220
221
|
|
221
222
|
# We've only seen the failure with 4 spatial coverage values
|
222
223
|
return true if spatial_coverages.size < 4
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'auto_suggest'
|
2
4
|
|
3
5
|
module SearchSolrTools
|
@@ -16,11 +18,11 @@ module SearchSolrTools
|
|
16
18
|
def fields
|
17
19
|
{
|
18
20
|
'authoritative_id' => { weight: 1, source: 'NSIDC', creator: method(:standard_add_creator) },
|
19
|
-
'full_title'
|
20
|
-
'copy_parameters'
|
21
|
-
'full_platforms'
|
22
|
-
'full_sensors'
|
23
|
-
'full_authors'
|
21
|
+
'full_title' => { weight: 2, source: 'NSIDC', creator: method(:standard_add_creator) },
|
22
|
+
'copy_parameters' => { weight: 5, source: 'NSIDC', creator: method(:standard_add_creator) },
|
23
|
+
'full_platforms' => { weight: 2, source: 'NSIDC', creator: method(:short_full_split_add_creator) },
|
24
|
+
'full_sensors' => { weight: 2, source: 'NSIDC', creator: method(:short_full_split_add_creator) },
|
25
|
+
'full_authors' => { weight: 1, source: 'NSIDC', creator: method(:standard_add_creator) }
|
24
26
|
}
|
25
27
|
end
|
26
28
|
|
@@ -1,15 +1,16 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'json'
|
2
4
|
require 'rest-client'
|
3
5
|
|
4
6
|
require 'search_solr_tools'
|
5
7
|
|
6
|
-
|
7
8
|
module SearchSolrTools
|
8
9
|
module Harvesters
|
9
10
|
# Harvests data from NSIDC OAI and inserts it into Solr after it has been translated
|
10
11
|
class NsidcJson < Base
|
11
|
-
def initialize(env = 'development', die_on_failure
|
12
|
-
super
|
12
|
+
def initialize(env = 'development', die_on_failure: false)
|
13
|
+
super
|
13
14
|
@translator = Translators::NsidcJsonToSolr.new
|
14
15
|
Helpers::FacetConfiguration.import_bin_configuration(env)
|
15
16
|
end
|
@@ -19,7 +20,7 @@ module SearchSolrTools
|
|
19
20
|
RestClient.options(nsidc_json_url) do |response, _request, _result|
|
20
21
|
return response.code == 200
|
21
22
|
end
|
22
|
-
rescue
|
23
|
+
rescue StandardError
|
23
24
|
puts "Error trying to get options for #{nsidc_json_url} (ping)"
|
24
25
|
end
|
25
26
|
false
|
@@ -37,7 +38,7 @@ module SearchSolrTools
|
|
37
38
|
|
38
39
|
status = insert_solr_docs result[:add_docs], Base::JSON_CONTENT_TYPE
|
39
40
|
|
40
|
-
status.record_status(Helpers::HarvestStatus::HARVEST_NO_DOCS) if result[:num_docs]
|
41
|
+
status.record_status(Helpers::HarvestStatus::HARVEST_NO_DOCS) if (result[:num_docs]).zero?
|
41
42
|
|
42
43
|
# Record the number of harvest failures; note that if this is 0, thats OK, the status will stay at 0
|
43
44
|
status.record_status(Helpers::HarvestStatus::HARVEST_FAILURE, result[:failure_ids].length)
|
@@ -66,7 +67,7 @@ module SearchSolrTools
|
|
66
67
|
# @param id [String] NSIDC authoritative ID for the dataset
|
67
68
|
# @return [Hash] Parsed version of the JSON response
|
68
69
|
def fetch_json_from_nsidc(id)
|
69
|
-
json_response = RestClient.get(nsidc_json_url
|
70
|
+
json_response = RestClient.get("#{nsidc_json_url}#{id}.json")
|
70
71
|
JSON.parse(json_response)
|
71
72
|
end
|
72
73
|
|
@@ -81,13 +82,13 @@ module SearchSolrTools
|
|
81
82
|
id = r.text.split('/').last
|
82
83
|
begin
|
83
84
|
docs << { 'add' => { 'doc' => @translator.translate(fetch_json_from_nsidc(id)) } }
|
84
|
-
rescue => e
|
85
|
+
rescue StandardError => e
|
85
86
|
puts "Failed to fetch #{id} with error #{e}: #{e.backtrace}"
|
86
87
|
failure_ids << id
|
87
88
|
end
|
88
89
|
end
|
89
90
|
|
90
|
-
{ num_docs: all_docs.size, add_docs: docs, failure_ids:
|
91
|
+
{ num_docs: all_docs.size, add_docs: docs, failure_ids: }
|
91
92
|
end
|
92
93
|
end
|
93
94
|
end
|
@@ -1,4 +1,6 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'iso_namespaces'
|
2
4
|
|
3
5
|
module SearchSolrTools
|
4
6
|
module Helpers
|
@@ -8,12 +10,10 @@ module SearchSolrTools
|
|
8
10
|
NORTHERN_GLOBAL_BOUNDARY = 85.0
|
9
11
|
|
10
12
|
def self.bounding_box_hash_from_geo_json(geometry)
|
11
|
-
if geometry_is_point?(geometry)
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
return { west: bbox.min_x.to_s, south: bbox.min_y.to_s, east: bbox.max_x.to_s, north: bbox.max_y.to_s }
|
16
|
-
end
|
13
|
+
return { west: geometry.x.to_s, south: geometry.y.to_s, east: geometry.x.to_s, north: geometry.y.to_s } if geometry_is_point?(geometry)
|
14
|
+
|
15
|
+
bbox = RGeo::Cartesian::BoundingBox.create_from_geometry(geometry)
|
16
|
+
{ west: bbox.min_x.to_s, south: bbox.min_y.to_s, east: bbox.max_x.to_s, north: bbox.max_y.to_s }
|
17
17
|
end
|
18
18
|
|
19
19
|
def self.geometry_is_point?(geometry)
|
@@ -30,7 +30,7 @@ module SearchSolrTools
|
|
30
30
|
end
|
31
31
|
|
32
32
|
def self.box_invalid?(box)
|
33
|
-
[
|
33
|
+
%i[north south east west].any? { |d| box[d].to_s.empty? }
|
34
34
|
end
|
35
35
|
end
|
36
36
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'json'
|
2
4
|
require 'rest_client'
|
3
5
|
require 'singleton'
|
@@ -8,7 +10,7 @@ module SearchSolrTools
|
|
8
10
|
class FacetConfiguration
|
9
11
|
include Singleton
|
10
12
|
def self.import_bin_configuration(env)
|
11
|
-
@bin_configuration = JSON.parse(RestClient.get(SolrEnvironments[env][:nsidc_dataset_metadata_url]
|
13
|
+
@bin_configuration = JSON.parse(RestClient.get("#{SolrEnvironments[env][:nsidc_dataset_metadata_url]}binConfiguration")) if @bin_configuration.nil?
|
12
14
|
end
|
13
15
|
|
14
16
|
def self.get_facet_bin(facet_name)
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module SearchSolrTools
|
2
4
|
module Helpers
|
3
5
|
class HarvestStatus
|
@@ -7,17 +9,17 @@ module SearchSolrTools
|
|
7
9
|
INGEST_ERR_INVALID_DOC = :invalid
|
8
10
|
INGEST_ERR_SOLR_ERROR = :solr_error
|
9
11
|
OTHER_ERROR = :other
|
10
|
-
PING_SOLR = :ping_solr
|
11
|
-
PING_SOURCE = :ping_source
|
12
|
+
PING_SOLR = :ping_solr # used for initialize only
|
13
|
+
PING_SOURCE = :ping_source # used for initialize only
|
12
14
|
|
13
|
-
ERROR_STATUS = [HARVEST_NO_DOCS, HARVEST_FAILURE, INGEST_ERR_INVALID_DOC, INGEST_ERR_SOLR_ERROR, OTHER_ERROR]
|
15
|
+
ERROR_STATUS = [HARVEST_NO_DOCS, HARVEST_FAILURE, INGEST_ERR_INVALID_DOC, INGEST_ERR_SOLR_ERROR, OTHER_ERROR].freeze
|
14
16
|
|
15
|
-
|
16
|
-
|
17
|
+
attr_accessor :ping_solr, :ping_source
|
18
|
+
attr_reader :status
|
17
19
|
|
18
20
|
# init_info is an optional hash that contains the various status keys and the documents to
|
19
21
|
# associate with them
|
20
|
-
def initialize(init_info={})
|
22
|
+
def initialize(init_info = {})
|
21
23
|
@status = { INGEST_OK => 0 }
|
22
24
|
@ping_solr = true
|
23
25
|
@ping_source = true
|
@@ -36,9 +38,9 @@ module SearchSolrTools
|
|
36
38
|
end
|
37
39
|
|
38
40
|
def ok?
|
39
|
-
ERROR_STATUS.each { |s| return false unless @status[s]
|
41
|
+
ERROR_STATUS.each { |s| return false unless (@status[s]).zero? }
|
40
42
|
@ping_solr && @ping_source
|
41
43
|
end
|
42
44
|
end
|
43
45
|
end
|
44
|
-
end
|
46
|
+
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module SearchSolrTools
|
2
4
|
module Helpers
|
3
5
|
# Helper class to provide default namespaces for XML document parsing.
|
@@ -25,7 +27,7 @@ module SearchSolrTools
|
|
25
27
|
'srv' => 'http://www.isotc211.org/2005/srv',
|
26
28
|
'xlink' => 'http://www.w3.org/1999/xlink',
|
27
29
|
'xsi' => 'http://www.w3.org/2001/XMLSchema-instance'
|
28
|
-
}
|
30
|
+
}.freeze
|
29
31
|
end
|
30
32
|
end
|
31
33
|
end
|