search_solr_tools 4.2.0 → 5.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -1
- data/README.md +6 -6
- data/bin/search_solr_tools +58 -2
- data/lib/search_solr_tools.rb +9 -7
- data/lib/search_solr_tools/errors/harvest_error.rb +88 -0
- data/lib/search_solr_tools/harvesters/adc.rb +2 -0
- data/lib/search_solr_tools/harvesters/ade_auto_suggest.rb +2 -0
- data/lib/search_solr_tools/harvesters/auto_suggest.rb +5 -1
- data/lib/search_solr_tools/harvesters/base.rb +65 -11
- data/lib/search_solr_tools/harvesters/bcodmo.rb +1 -0
- data/lib/search_solr_tools/harvesters/data_one.rb +2 -0
- data/lib/search_solr_tools/harvesters/echo.rb +2 -0
- data/lib/search_solr_tools/harvesters/gtnp.rb +1 -0
- data/lib/search_solr_tools/harvesters/ices.rb +3 -0
- data/lib/search_solr_tools/harvesters/ncdc_paleo.rb +3 -0
- data/lib/search_solr_tools/harvesters/nmi.rb +2 -0
- data/lib/search_solr_tools/harvesters/nodc.rb +3 -0
- data/lib/search_solr_tools/harvesters/nsidc_auto_suggest.rb +2 -0
- data/lib/search_solr_tools/harvesters/nsidc_json.rb +32 -5
- data/lib/search_solr_tools/harvesters/oai.rb +3 -0
- data/lib/search_solr_tools/harvesters/pdc.rb +2 -0
- data/lib/search_solr_tools/harvesters/r2r.rb +2 -2
- data/lib/search_solr_tools/harvesters/rda.rb +2 -0
- data/lib/search_solr_tools/harvesters/tdar.rb +2 -0
- data/lib/search_solr_tools/harvesters/usgs.rb +3 -0
- data/lib/search_solr_tools/helpers/data_one_format.rb +3 -3
- data/lib/search_solr_tools/helpers/harvest_status.rb +44 -0
- data/lib/search_solr_tools/helpers/iso_to_solr.rb +1 -0
- data/lib/search_solr_tools/helpers/iso_to_solr_format.rb +2 -2
- data/lib/search_solr_tools/helpers/ncdc_paleo_format.rb +3 -3
- data/lib/search_solr_tools/helpers/r2r_format.rb +3 -3
- data/lib/search_solr_tools/helpers/selectors.rb +1 -2
- data/lib/search_solr_tools/helpers/solr_format.rb +1 -0
- data/lib/search_solr_tools/helpers/tdar_format.rb +3 -3
- data/lib/search_solr_tools/helpers/translate_spatial_coverage.rb +2 -2
- data/lib/search_solr_tools/helpers/translate_temporal_coverage.rb +1 -1
- data/lib/search_solr_tools/helpers/usgs_format.rb +2 -2
- data/lib/search_solr_tools/selectors/adc.rb +2 -1
- data/lib/search_solr_tools/selectors/data_one.rb +2 -1
- data/lib/search_solr_tools/selectors/echo_iso.rb +2 -1
- data/lib/search_solr_tools/selectors/ices_iso.rb +2 -1
- data/lib/search_solr_tools/selectors/ncdc_paleo.rb +2 -1
- data/lib/search_solr_tools/selectors/nmi.rb +2 -1
- data/lib/search_solr_tools/selectors/nodc_iso.rb +2 -1
- data/lib/search_solr_tools/selectors/pdc_iso.rb +2 -1
- data/lib/search_solr_tools/selectors/r2r.rb +3 -1
- data/lib/search_solr_tools/selectors/rda.rb +2 -1
- data/lib/search_solr_tools/selectors/tdar_opensearch.rb +3 -1
- data/lib/search_solr_tools/selectors/usgs_iso.rb +3 -1
- data/lib/search_solr_tools/translators/bcodmo_json.rb +3 -0
- data/lib/search_solr_tools/translators/eol_to_solr.rb +6 -0
- data/lib/search_solr_tools/translators/gtnp_json.rb +3 -0
- data/lib/search_solr_tools/translators/nsidc_json.rb +3 -0
- data/lib/search_solr_tools/version.rb +1 -1
- data/search_solr_tools.gemspec +22 -23
- metadata +47 -55
@@ -3,6 +3,7 @@ require 'rest-client'
|
|
3
3
|
|
4
4
|
require 'search_solr_tools'
|
5
5
|
|
6
|
+
|
6
7
|
module SearchSolrTools
|
7
8
|
module Harvesters
|
8
9
|
# Harvests data from NSIDC OAI and inserts it into Solr after it has been translated
|
@@ -13,6 +14,17 @@ module SearchSolrTools
|
|
13
14
|
Helpers::FacetConfiguration.import_bin_configuration(env)
|
14
15
|
end
|
15
16
|
|
17
|
+
def ping_source
|
18
|
+
begin
|
19
|
+
RestClient.options(nsidc_json_url) do |response, _request, _result|
|
20
|
+
return response.code == 200
|
21
|
+
end
|
22
|
+
rescue => e
|
23
|
+
puts "Error trying to get options for #{nsidc_json_url} (ping)"
|
24
|
+
end
|
25
|
+
false
|
26
|
+
end
|
27
|
+
|
16
28
|
def harvest_and_delete
|
17
29
|
puts "Running harvest of NSIDC catalog from #{nsidc_json_url}"
|
18
30
|
super(method(:harvest_nsidc_json_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:NSIDC][:long_name]}\"")
|
@@ -22,8 +34,22 @@ module SearchSolrTools
|
|
22
34
|
# this is the main entry point for the class
|
23
35
|
def harvest_nsidc_json_into_solr
|
24
36
|
result = docs_with_translated_entries_from_nsidc
|
25
|
-
|
26
|
-
|
37
|
+
|
38
|
+
status = insert_solr_docs result[:add_docs], Base::JSON_CONTENT_TYPE
|
39
|
+
|
40
|
+
status.record_status(Helpers::HarvestStatus::HARVEST_NO_DOCS) if result[:num_docs] == 0
|
41
|
+
|
42
|
+
# Record the number of harvest failures; note that if this is 0, thats OK, the status will stay at 0
|
43
|
+
status.record_status(Helpers::HarvestStatus::HARVEST_FAILURE, result[:failure_ids].length)
|
44
|
+
|
45
|
+
raise Errors::HarvestError, status unless status.ok?
|
46
|
+
rescue Errors::HarvestError => e
|
47
|
+
raise e
|
48
|
+
rescue StandardError => e
|
49
|
+
puts "An unexpected exception occurred while trying to harvest or insert: #{e}"
|
50
|
+
puts e.backtrace
|
51
|
+
status = Helpers::HarvestStatus.new(Helpers::HarvestStatus::OTHER_ERROR => e)
|
52
|
+
raise Errors::HarvestError, status
|
27
53
|
end
|
28
54
|
|
29
55
|
def nsidc_json_url
|
@@ -33,7 +59,7 @@ module SearchSolrTools
|
|
33
59
|
def result_ids_from_nsidc
|
34
60
|
url = SolrEnvironments[@environment][:nsidc_dataset_metadata_url] +
|
35
61
|
SolrEnvironments[@environment][:nsidc_oai_identifiers_url]
|
36
|
-
get_results
|
62
|
+
get_results(url, '//xmlns:identifier') || []
|
37
63
|
end
|
38
64
|
|
39
65
|
# Fetch a JSON representation of a dataset's metadata
|
@@ -48,7 +74,8 @@ module SearchSolrTools
|
|
48
74
|
docs = []
|
49
75
|
failure_ids = []
|
50
76
|
|
51
|
-
|
77
|
+
all_docs = result_ids_from_nsidc
|
78
|
+
all_docs.each do |r|
|
52
79
|
# Each result looks like:
|
53
80
|
# oai:nsidc.org/AE_L2A
|
54
81
|
id = r.text.split('/').last
|
@@ -60,7 +87,7 @@ module SearchSolrTools
|
|
60
87
|
end
|
61
88
|
end
|
62
89
|
|
63
|
-
{ add_docs: docs, failure_ids: failure_ids }
|
90
|
+
{ num_docs: all_docs.size, add_docs: docs, failure_ids: failure_ids }
|
64
91
|
end
|
65
92
|
end
|
66
93
|
end
|
@@ -1,6 +1,6 @@
|
|
1
|
-
require_relative '
|
2
|
-
require_relative '
|
3
|
-
require_relative '
|
1
|
+
require_relative 'iso_namespaces'
|
2
|
+
require_relative 'iso_to_solr_format'
|
3
|
+
require_relative 'solr_format'
|
4
4
|
|
5
5
|
module SearchSolrTools
|
6
6
|
module Helpers
|
@@ -0,0 +1,44 @@
|
|
1
|
+
module SearchSolrTools
|
2
|
+
module Helpers
|
3
|
+
class HarvestStatus
|
4
|
+
INGEST_OK = :ok
|
5
|
+
HARVEST_NO_DOCS = :harvest_none
|
6
|
+
HARVEST_FAILURE = :harvest_fail
|
7
|
+
INGEST_ERR_INVALID_DOC = :invalid
|
8
|
+
INGEST_ERR_SOLR_ERROR = :solr_error
|
9
|
+
OTHER_ERROR = :other
|
10
|
+
PING_SOLR = :ping_solr # used for initialize only
|
11
|
+
PING_SOURCE = :ping_source # used for initialize only
|
12
|
+
|
13
|
+
ERROR_STATUS = [HARVEST_NO_DOCS, HARVEST_FAILURE, INGEST_ERR_INVALID_DOC, INGEST_ERR_SOLR_ERROR, OTHER_ERROR]
|
14
|
+
|
15
|
+
attr_reader :status, :ping_solr, :ping_source
|
16
|
+
attr_writer :ping_solr, :ping_source
|
17
|
+
|
18
|
+
# init_info is an optional hash that contains the various status keys and the documents to
|
19
|
+
# associate with them
|
20
|
+
def initialize(init_info={})
|
21
|
+
@status = { INGEST_OK => 0 }
|
22
|
+
@ping_solr = true
|
23
|
+
@ping_source = true
|
24
|
+
ERROR_STATUS.each { |s| @status[s] = 0 }
|
25
|
+
|
26
|
+
init_info.each do |key, count|
|
27
|
+
@status[key] = count if @status.include? key
|
28
|
+
end
|
29
|
+
|
30
|
+
@ping_solr = init_info[PING_SOLR] if init_info.include? PING_SOLR
|
31
|
+
@ping_source = init_info[PING_SOURCE] if init_info.include? PING_SOURCE
|
32
|
+
end
|
33
|
+
|
34
|
+
def record_status(status, count = 1)
|
35
|
+
@status[status] += count
|
36
|
+
end
|
37
|
+
|
38
|
+
def ok?
|
39
|
+
ERROR_STATUS.each { |s| return false unless @status[s] == 0 }
|
40
|
+
@ping_solr && @ping_source
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -1,8 +1,8 @@
|
|
1
1
|
require 'date'
|
2
2
|
|
3
|
-
require_relative '
|
4
|
-
require_relative '
|
5
|
-
require_relative '
|
3
|
+
require_relative 'iso_namespaces'
|
4
|
+
require_relative 'solr_format'
|
5
|
+
require_relative 'iso_to_solr_format'
|
6
6
|
|
7
7
|
module SearchSolrTools
|
8
8
|
module Helpers
|
@@ -1,6 +1,6 @@
|
|
1
|
-
require_relative '
|
2
|
-
require_relative '
|
3
|
-
require_relative '
|
1
|
+
require_relative 'iso_namespaces'
|
2
|
+
require_relative 'iso_to_solr_format'
|
3
|
+
require_relative 'solr_format'
|
4
4
|
|
5
5
|
module SearchSolrTools
|
6
6
|
module Helpers
|
@@ -1,6 +1,6 @@
|
|
1
|
-
require_relative '
|
2
|
-
require_relative '
|
3
|
-
require_relative '
|
1
|
+
require_relative 'iso_namespaces'
|
2
|
+
require_relative 'iso_to_solr_format'
|
3
|
+
require_relative 'solr_format'
|
4
4
|
|
5
5
|
module SearchSolrTools
|
6
6
|
module Helpers
|
@@ -4,6 +4,9 @@ require 'rgeo/geo_json'
|
|
4
4
|
require 'rgeo/wkrep/wkt_parser'
|
5
5
|
|
6
6
|
require 'search_solr_tools'
|
7
|
+
require_relative '../helpers/solr_format'
|
8
|
+
require_relative '../helpers/translate_temporal_coverage'
|
9
|
+
require_relative '../helpers/translate_spatial_coverage'
|
7
10
|
|
8
11
|
module SearchSolrTools
|
9
12
|
module Translators
|