search_solr_tools 4.2.0 → 5.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -1
- data/README.md +6 -6
- data/bin/search_solr_tools +58 -2
- data/lib/search_solr_tools.rb +9 -7
- data/lib/search_solr_tools/errors/harvest_error.rb +88 -0
- data/lib/search_solr_tools/harvesters/adc.rb +2 -0
- data/lib/search_solr_tools/harvesters/ade_auto_suggest.rb +2 -0
- data/lib/search_solr_tools/harvesters/auto_suggest.rb +5 -1
- data/lib/search_solr_tools/harvesters/base.rb +65 -11
- data/lib/search_solr_tools/harvesters/bcodmo.rb +1 -0
- data/lib/search_solr_tools/harvesters/data_one.rb +2 -0
- data/lib/search_solr_tools/harvesters/echo.rb +2 -0
- data/lib/search_solr_tools/harvesters/gtnp.rb +1 -0
- data/lib/search_solr_tools/harvesters/ices.rb +3 -0
- data/lib/search_solr_tools/harvesters/ncdc_paleo.rb +3 -0
- data/lib/search_solr_tools/harvesters/nmi.rb +2 -0
- data/lib/search_solr_tools/harvesters/nodc.rb +3 -0
- data/lib/search_solr_tools/harvesters/nsidc_auto_suggest.rb +2 -0
- data/lib/search_solr_tools/harvesters/nsidc_json.rb +32 -5
- data/lib/search_solr_tools/harvesters/oai.rb +3 -0
- data/lib/search_solr_tools/harvesters/pdc.rb +2 -0
- data/lib/search_solr_tools/harvesters/r2r.rb +2 -2
- data/lib/search_solr_tools/harvesters/rda.rb +2 -0
- data/lib/search_solr_tools/harvesters/tdar.rb +2 -0
- data/lib/search_solr_tools/harvesters/usgs.rb +3 -0
- data/lib/search_solr_tools/helpers/data_one_format.rb +3 -3
- data/lib/search_solr_tools/helpers/harvest_status.rb +44 -0
- data/lib/search_solr_tools/helpers/iso_to_solr.rb +1 -0
- data/lib/search_solr_tools/helpers/iso_to_solr_format.rb +2 -2
- data/lib/search_solr_tools/helpers/ncdc_paleo_format.rb +3 -3
- data/lib/search_solr_tools/helpers/r2r_format.rb +3 -3
- data/lib/search_solr_tools/helpers/selectors.rb +1 -2
- data/lib/search_solr_tools/helpers/solr_format.rb +1 -0
- data/lib/search_solr_tools/helpers/tdar_format.rb +3 -3
- data/lib/search_solr_tools/helpers/translate_spatial_coverage.rb +2 -2
- data/lib/search_solr_tools/helpers/translate_temporal_coverage.rb +1 -1
- data/lib/search_solr_tools/helpers/usgs_format.rb +2 -2
- data/lib/search_solr_tools/selectors/adc.rb +2 -1
- data/lib/search_solr_tools/selectors/data_one.rb +2 -1
- data/lib/search_solr_tools/selectors/echo_iso.rb +2 -1
- data/lib/search_solr_tools/selectors/ices_iso.rb +2 -1
- data/lib/search_solr_tools/selectors/ncdc_paleo.rb +2 -1
- data/lib/search_solr_tools/selectors/nmi.rb +2 -1
- data/lib/search_solr_tools/selectors/nodc_iso.rb +2 -1
- data/lib/search_solr_tools/selectors/pdc_iso.rb +2 -1
- data/lib/search_solr_tools/selectors/r2r.rb +3 -1
- data/lib/search_solr_tools/selectors/rda.rb +2 -1
- data/lib/search_solr_tools/selectors/tdar_opensearch.rb +3 -1
- data/lib/search_solr_tools/selectors/usgs_iso.rb +3 -1
- data/lib/search_solr_tools/translators/bcodmo_json.rb +3 -0
- data/lib/search_solr_tools/translators/eol_to_solr.rb +6 -0
- data/lib/search_solr_tools/translators/gtnp_json.rb +3 -0
- data/lib/search_solr_tools/translators/nsidc_json.rb +3 -0
- data/lib/search_solr_tools/version.rb +1 -1
- data/search_solr_tools.gemspec +22 -23
- metadata +47 -55
@@ -3,6 +3,7 @@ require 'rest-client'
|
|
3
3
|
|
4
4
|
require 'search_solr_tools'
|
5
5
|
|
6
|
+
|
6
7
|
module SearchSolrTools
|
7
8
|
module Harvesters
|
8
9
|
# Harvests data from NSIDC OAI and inserts it into Solr after it has been translated
|
@@ -13,6 +14,17 @@ module SearchSolrTools
|
|
13
14
|
Helpers::FacetConfiguration.import_bin_configuration(env)
|
14
15
|
end
|
15
16
|
|
17
|
+
def ping_source
|
18
|
+
begin
|
19
|
+
RestClient.options(nsidc_json_url) do |response, _request, _result|
|
20
|
+
return response.code == 200
|
21
|
+
end
|
22
|
+
rescue => e
|
23
|
+
puts "Error trying to get options for #{nsidc_json_url} (ping)"
|
24
|
+
end
|
25
|
+
false
|
26
|
+
end
|
27
|
+
|
16
28
|
def harvest_and_delete
|
17
29
|
puts "Running harvest of NSIDC catalog from #{nsidc_json_url}"
|
18
30
|
super(method(:harvest_nsidc_json_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:NSIDC][:long_name]}\"")
|
@@ -22,8 +34,22 @@ module SearchSolrTools
|
|
22
34
|
# this is the main entry point for the class
|
23
35
|
def harvest_nsidc_json_into_solr
|
24
36
|
result = docs_with_translated_entries_from_nsidc
|
25
|
-
|
26
|
-
|
37
|
+
|
38
|
+
status = insert_solr_docs result[:add_docs], Base::JSON_CONTENT_TYPE
|
39
|
+
|
40
|
+
status.record_status(Helpers::HarvestStatus::HARVEST_NO_DOCS) if result[:num_docs] == 0
|
41
|
+
|
42
|
+
# Record the number of harvest failures; note that if this is 0, thats OK, the status will stay at 0
|
43
|
+
status.record_status(Helpers::HarvestStatus::HARVEST_FAILURE, result[:failure_ids].length)
|
44
|
+
|
45
|
+
raise Errors::HarvestError, status unless status.ok?
|
46
|
+
rescue Errors::HarvestError => e
|
47
|
+
raise e
|
48
|
+
rescue StandardError => e
|
49
|
+
puts "An unexpected exception occurred while trying to harvest or insert: #{e}"
|
50
|
+
puts e.backtrace
|
51
|
+
status = Helpers::HarvestStatus.new(Helpers::HarvestStatus::OTHER_ERROR => e)
|
52
|
+
raise Errors::HarvestError, status
|
27
53
|
end
|
28
54
|
|
29
55
|
def nsidc_json_url
|
@@ -33,7 +59,7 @@ module SearchSolrTools
|
|
33
59
|
def result_ids_from_nsidc
|
34
60
|
url = SolrEnvironments[@environment][:nsidc_dataset_metadata_url] +
|
35
61
|
SolrEnvironments[@environment][:nsidc_oai_identifiers_url]
|
36
|
-
get_results
|
62
|
+
get_results(url, '//xmlns:identifier') || []
|
37
63
|
end
|
38
64
|
|
39
65
|
# Fetch a JSON representation of a dataset's metadata
|
@@ -48,7 +74,8 @@ module SearchSolrTools
|
|
48
74
|
docs = []
|
49
75
|
failure_ids = []
|
50
76
|
|
51
|
-
|
77
|
+
all_docs = result_ids_from_nsidc
|
78
|
+
all_docs.each do |r|
|
52
79
|
# Each result looks like:
|
53
80
|
# oai:nsidc.org/AE_L2A
|
54
81
|
id = r.text.split('/').last
|
@@ -60,7 +87,7 @@ module SearchSolrTools
|
|
60
87
|
end
|
61
88
|
end
|
62
89
|
|
63
|
-
{ add_docs: docs, failure_ids: failure_ids }
|
90
|
+
{ num_docs: all_docs.size, add_docs: docs, failure_ids: failure_ids }
|
64
91
|
end
|
65
92
|
end
|
66
93
|
end
|
@@ -1,6 +1,6 @@
|
|
1
|
-
require_relative '
|
2
|
-
require_relative '
|
3
|
-
require_relative '
|
1
|
+
require_relative 'iso_namespaces'
|
2
|
+
require_relative 'iso_to_solr_format'
|
3
|
+
require_relative 'solr_format'
|
4
4
|
|
5
5
|
module SearchSolrTools
|
6
6
|
module Helpers
|
@@ -0,0 +1,44 @@
|
|
1
|
+
module SearchSolrTools
|
2
|
+
module Helpers
|
3
|
+
class HarvestStatus
|
4
|
+
INGEST_OK = :ok
|
5
|
+
HARVEST_NO_DOCS = :harvest_none
|
6
|
+
HARVEST_FAILURE = :harvest_fail
|
7
|
+
INGEST_ERR_INVALID_DOC = :invalid
|
8
|
+
INGEST_ERR_SOLR_ERROR = :solr_error
|
9
|
+
OTHER_ERROR = :other
|
10
|
+
PING_SOLR = :ping_solr # used for initialize only
|
11
|
+
PING_SOURCE = :ping_source # used for initialize only
|
12
|
+
|
13
|
+
ERROR_STATUS = [HARVEST_NO_DOCS, HARVEST_FAILURE, INGEST_ERR_INVALID_DOC, INGEST_ERR_SOLR_ERROR, OTHER_ERROR]
|
14
|
+
|
15
|
+
attr_reader :status, :ping_solr, :ping_source
|
16
|
+
attr_writer :ping_solr, :ping_source
|
17
|
+
|
18
|
+
# init_info is an optional hash that contains the various status keys and the documents to
|
19
|
+
# associate with them
|
20
|
+
def initialize(init_info={})
|
21
|
+
@status = { INGEST_OK => 0 }
|
22
|
+
@ping_solr = true
|
23
|
+
@ping_source = true
|
24
|
+
ERROR_STATUS.each { |s| @status[s] = 0 }
|
25
|
+
|
26
|
+
init_info.each do |key, count|
|
27
|
+
@status[key] = count if @status.include? key
|
28
|
+
end
|
29
|
+
|
30
|
+
@ping_solr = init_info[PING_SOLR] if init_info.include? PING_SOLR
|
31
|
+
@ping_source = init_info[PING_SOURCE] if init_info.include? PING_SOURCE
|
32
|
+
end
|
33
|
+
|
34
|
+
def record_status(status, count = 1)
|
35
|
+
@status[status] += count
|
36
|
+
end
|
37
|
+
|
38
|
+
def ok?
|
39
|
+
ERROR_STATUS.each { |s| return false unless @status[s] == 0 }
|
40
|
+
@ping_solr && @ping_source
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -1,8 +1,8 @@
|
|
1
1
|
require 'date'
|
2
2
|
|
3
|
-
require_relative '
|
4
|
-
require_relative '
|
5
|
-
require_relative '
|
3
|
+
require_relative 'iso_namespaces'
|
4
|
+
require_relative 'solr_format'
|
5
|
+
require_relative 'iso_to_solr_format'
|
6
6
|
|
7
7
|
module SearchSolrTools
|
8
8
|
module Helpers
|
@@ -1,6 +1,6 @@
|
|
1
|
-
require_relative '
|
2
|
-
require_relative '
|
3
|
-
require_relative '
|
1
|
+
require_relative 'iso_namespaces'
|
2
|
+
require_relative 'iso_to_solr_format'
|
3
|
+
require_relative 'solr_format'
|
4
4
|
|
5
5
|
module SearchSolrTools
|
6
6
|
module Helpers
|
@@ -1,6 +1,6 @@
|
|
1
|
-
require_relative '
|
2
|
-
require_relative '
|
3
|
-
require_relative '
|
1
|
+
require_relative 'iso_namespaces'
|
2
|
+
require_relative 'iso_to_solr_format'
|
3
|
+
require_relative 'solr_format'
|
4
4
|
|
5
5
|
module SearchSolrTools
|
6
6
|
module Helpers
|
@@ -4,6 +4,9 @@ require 'rgeo/geo_json'
|
|
4
4
|
require 'rgeo/wkrep/wkt_parser'
|
5
5
|
|
6
6
|
require 'search_solr_tools'
|
7
|
+
require_relative '../helpers/solr_format'
|
8
|
+
require_relative '../helpers/translate_temporal_coverage'
|
9
|
+
require_relative '../helpers/translate_spatial_coverage'
|
7
10
|
|
8
11
|
module SearchSolrTools
|
9
12
|
module Translators
|