search_solr_tools 6.1.0 → 6.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/bin/search_solr_tools +1 -13
- data/lib/search_solr_tools/config/environments.yaml +0 -32
- data/lib/search_solr_tools/harvesters/base.rb +0 -1
- data/lib/search_solr_tools/helpers/solr_format.rb +0 -15
- data/lib/search_solr_tools/helpers/translate_spatial_coverage.rb +0 -1
- data/lib/search_solr_tools/version.rb +1 -1
- data/lib/search_solr_tools.rb +1 -2
- metadata +2 -44
- data/lib/search_solr_tools/harvesters/adc.rb +0 -49
- data/lib/search_solr_tools/harvesters/ade_auto_suggest.rb +0 -46
- data/lib/search_solr_tools/harvesters/bcodmo.rb +0 -64
- data/lib/search_solr_tools/harvesters/data_one.rb +0 -49
- data/lib/search_solr_tools/harvesters/echo.rb +0 -52
- data/lib/search_solr_tools/harvesters/eol.rb +0 -51
- data/lib/search_solr_tools/harvesters/gtnp.rb +0 -67
- data/lib/search_solr_tools/harvesters/ices.rb +0 -58
- data/lib/search_solr_tools/harvesters/ncdc_paleo.rb +0 -62
- data/lib/search_solr_tools/harvesters/nmi.rb +0 -34
- data/lib/search_solr_tools/harvesters/nodc.rb +0 -75
- data/lib/search_solr_tools/harvesters/oai.rb +0 -62
- data/lib/search_solr_tools/harvesters/pdc.rb +0 -40
- data/lib/search_solr_tools/harvesters/r2r.rb +0 -61
- data/lib/search_solr_tools/harvesters/rda.rb +0 -35
- data/lib/search_solr_tools/harvesters/tdar.rb +0 -71
- data/lib/search_solr_tools/harvesters/usgs.rb +0 -76
- data/lib/search_solr_tools/helpers/csw_iso_query_builder.rb +0 -29
- data/lib/search_solr_tools/helpers/data_one_format.rb +0 -74
- data/lib/search_solr_tools/helpers/iso_to_solr.rb +0 -97
- data/lib/search_solr_tools/helpers/iso_to_solr_format.rb +0 -197
- data/lib/search_solr_tools/helpers/ncdc_paleo_format.rb +0 -61
- data/lib/search_solr_tools/helpers/query_builder.rb +0 -13
- data/lib/search_solr_tools/helpers/r2r_format.rb +0 -25
- data/lib/search_solr_tools/helpers/selectors.rb +0 -22
- data/lib/search_solr_tools/helpers/tdar_format.rb +0 -70
- data/lib/search_solr_tools/helpers/usgs_format.rb +0 -50
- data/lib/search_solr_tools/selectors/adc.rb +0 -96
- data/lib/search_solr_tools/selectors/data_one.rb +0 -96
- data/lib/search_solr_tools/selectors/echo_iso.rb +0 -112
- data/lib/search_solr_tools/selectors/ices_iso.rb +0 -108
- data/lib/search_solr_tools/selectors/ncdc_paleo.rb +0 -90
- data/lib/search_solr_tools/selectors/nmi.rb +0 -107
- data/lib/search_solr_tools/selectors/nodc_iso.rb +0 -108
- data/lib/search_solr_tools/selectors/pdc_iso.rb +0 -109
- data/lib/search_solr_tools/selectors/r2r.rb +0 -115
- data/lib/search_solr_tools/selectors/rda.rb +0 -107
- data/lib/search_solr_tools/selectors/tdar_opensearch.rb +0 -91
- data/lib/search_solr_tools/selectors/usgs_iso.rb +0 -107
- data/lib/search_solr_tools/translators/bcodmo_json.rb +0 -89
- data/lib/search_solr_tools/translators/eol_to_solr.rb +0 -84
- data/lib/search_solr_tools/translators/gtnp_json.rb +0 -59
@@ -1,62 +0,0 @@
|
|
1
|
-
require_relative 'base'
|
2
|
-
require_relative '../helpers/csw_iso_query_builder'
|
3
|
-
|
4
|
-
module SearchSolrTools
|
5
|
-
module Harvesters
|
6
|
-
# Harvests data from NODC PALEO and inserts it into Solr after it has been translated
|
7
|
-
class NcdcPaleo < Base
|
8
|
-
def initialize(env = 'development', die_on_failure = false)
|
9
|
-
super env, die_on_failure
|
10
|
-
@page_size = 50
|
11
|
-
@translator = Helpers::IsoToSolr.new :ncdc_paleo
|
12
|
-
end
|
13
|
-
|
14
|
-
def harvest_and_delete
|
15
|
-
puts "Running harvest of NCDC Paleo catalog from #{ncdc_paleo_url}"
|
16
|
-
super(method(:harvest_ncdc_paleo_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:NCDC_PALEO][:long_name]}\"")
|
17
|
-
end
|
18
|
-
|
19
|
-
def harvest_ncdc_paleo_into_solr
|
20
|
-
start_index = 1
|
21
|
-
while (entries = get_results_from_ncdc_paleo_url(start_index)) && (entries.length > 0)
|
22
|
-
begin
|
23
|
-
insert_solr_docs get_docs_with_translated_entries_from_ncdc_paleo(entries)
|
24
|
-
rescue => e
|
25
|
-
puts "ERROR: #{e}"
|
26
|
-
raise e if @die_on_failure
|
27
|
-
end
|
28
|
-
start_index += @page_size
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
def ncdc_paleo_url
|
33
|
-
SolrEnvironments[@environment][:ncdc_paleo_url]
|
34
|
-
end
|
35
|
-
|
36
|
-
def get_results_from_ncdc_paleo_url(start_index)
|
37
|
-
get_results build_csw_request('results', @page_size, start_index), '//csw:Record'
|
38
|
-
end
|
39
|
-
|
40
|
-
def get_docs_with_translated_entries_from_ncdc_paleo(entries)
|
41
|
-
auth_ids = entries.map { |e| e.xpath("./dc:identifier[@scheme='urn:x-esri:specification:ServiceType:ArcIMS:Metadata:DocID']").text }
|
42
|
-
|
43
|
-
auth_ids.map do |record|
|
44
|
-
result_xml = get_results("https://gis.ncdc.noaa.gov/gptpaleo/csw?getxml=#{record}",
|
45
|
-
'/rdf:RDF/rdf:Description').first
|
46
|
-
solr_doc = create_new_solr_add_doc_with_child(@translator.translate(result_xml).root)
|
47
|
-
insert_node = solr_doc.at_xpath('//doc')
|
48
|
-
insert_node.add_child("<field name='authoritative_id'>#{record}</field>")
|
49
|
-
insert_node.add_child("<field name='dataset_url'>https://gis.ncdc.noaa.gov/gptpaleo/catalog/search/resource/details.page?uuid=#{record}")
|
50
|
-
solr_doc.root
|
51
|
-
end
|
52
|
-
end
|
53
|
-
|
54
|
-
def build_csw_request(resultType = 'results', maxRecords = '1000', startPosition = '1')
|
55
|
-
Helpers::CswIsoQueryBuilder.get_query_string(ncdc_paleo_url,
|
56
|
-
'resultType' => resultType,
|
57
|
-
'maxRecords' => maxRecords,
|
58
|
-
'startPosition' => startPosition)
|
59
|
-
end
|
60
|
-
end
|
61
|
-
end
|
62
|
-
end
|
@@ -1,34 +0,0 @@
|
|
1
|
-
require_relative 'oai'
|
2
|
-
|
3
|
-
module SearchSolrTools
|
4
|
-
module Harvesters
|
5
|
-
class Nmi < Oai
|
6
|
-
def initialize(env = 'development', die_on_failure = false)
|
7
|
-
super
|
8
|
-
@data_centers = Helpers::SolrFormat::DATA_CENTER_NAMES[:NMI][:long_name]
|
9
|
-
@translator = Helpers::IsoToSolr.new :nmi
|
10
|
-
end
|
11
|
-
|
12
|
-
def metadata_url
|
13
|
-
SolrEnvironments[@environment][:nmi_url]
|
14
|
-
end
|
15
|
-
|
16
|
-
# resumption_token must be empty to stop the harvest loop; NMI's feed does not
|
17
|
-
# provide any resumption token and gets all the records in just one go
|
18
|
-
def results
|
19
|
-
@resumption_token = ''
|
20
|
-
list_records_oai_response = get_results(request_string, '//oai:ListRecords', '')
|
21
|
-
list_records_oai_response.xpath('.//oai:record', Helpers::IsoNamespaces.namespaces)
|
22
|
-
end
|
23
|
-
|
24
|
-
private
|
25
|
-
|
26
|
-
def request_params
|
27
|
-
{
|
28
|
-
verb: 'ListRecords',
|
29
|
-
metadataPrefix: 'dif'
|
30
|
-
}
|
31
|
-
end
|
32
|
-
end
|
33
|
-
end
|
34
|
-
end
|
@@ -1,75 +0,0 @@
|
|
1
|
-
require_relative 'base'
|
2
|
-
require_relative '../helpers/csw_iso_query_builder'
|
3
|
-
|
4
|
-
module SearchSolrTools
|
5
|
-
module Harvesters
|
6
|
-
# Harvests data from NODC and inserts it into Solr after it has been translated
|
7
|
-
class Nodc < Base
|
8
|
-
def initialize(env = 'development', die_on_failure = false)
|
9
|
-
super env, die_on_failure
|
10
|
-
@page_size = 50
|
11
|
-
@translator = Helpers::IsoToSolr.new :nodc
|
12
|
-
end
|
13
|
-
|
14
|
-
def harvest_and_delete
|
15
|
-
puts "Running harvest of NODC catalog from #{nodc_url}"
|
16
|
-
super(method(:harvest_nodc_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:NODC][:long_name]}\"")
|
17
|
-
end
|
18
|
-
|
19
|
-
# get translated entries from NODC and add them to Solr
|
20
|
-
# this is the main entry point for the class
|
21
|
-
def harvest_nodc_into_solr
|
22
|
-
start_index = 1
|
23
|
-
while (entries = get_results_from_nodc(start_index)) && (entries.length > 0)
|
24
|
-
begin
|
25
|
-
insert_solr_docs get_docs_with_translated_entries_from_nodc(entries)
|
26
|
-
rescue => e
|
27
|
-
puts "ERROR: #{e}"
|
28
|
-
raise e if @die_on_failure
|
29
|
-
end
|
30
|
-
start_index += @page_size
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
def nodc_url
|
35
|
-
SolrEnvironments[@environment][:nodc_url]
|
36
|
-
end
|
37
|
-
|
38
|
-
def get_results_from_nodc(start_index)
|
39
|
-
get_results build_csw_request('results', @page_size, start_index), '//gmi:MI_Metadata'
|
40
|
-
end
|
41
|
-
|
42
|
-
def get_docs_with_translated_entries_from_nodc(entries)
|
43
|
-
entries.map do |entry|
|
44
|
-
create_new_solr_add_doc_with_child(@translator.translate(entry).root)
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
def build_csw_request(resultType = 'results', maxRecords = '25', startPosition = '1')
|
49
|
-
Helpers::CswIsoQueryBuilder.get_query_string(nodc_url,
|
50
|
-
'resultType' => resultType,
|
51
|
-
'maxRecords' => maxRecords,
|
52
|
-
'startPosition' => startPosition,
|
53
|
-
'constraint' => bbox_constraint,
|
54
|
-
'outputSchema' => 'http://www.isotc211.org/2005/gmd')
|
55
|
-
end
|
56
|
-
|
57
|
-
def bbox_constraint
|
58
|
-
bbox = {
|
59
|
-
west: '-180',
|
60
|
-
south: '45',
|
61
|
-
east: '180',
|
62
|
-
north: '90'
|
63
|
-
}
|
64
|
-
|
65
|
-
URI.encode '<Filter xmlns:ogc="http://www.opengis.net/ogc" ' \
|
66
|
-
'xmlns:gml="http://www.opengis.net/gml" ' \
|
67
|
-
'xmlns:apiso="http://www.opengis.net/cat/csw/apiso/1.0">' \
|
68
|
-
'<ogc:BBOX><PropertyName>apiso:BoundingBox</PropertyName><gml:Envelope>' \
|
69
|
-
'<gml:lowerCorner>' + bbox[:west] + ' ' + bbox[:south] + '</gml:lowerCorner>' \
|
70
|
-
'<gml:upperCorner>' + bbox[:east] + ' ' + bbox[:north] + '</gml:upperCorner>' \
|
71
|
-
'</gml:Envelope></ogc:BBOX></Filter>'
|
72
|
-
end
|
73
|
-
end
|
74
|
-
end
|
75
|
-
end
|
@@ -1,62 +0,0 @@
|
|
1
|
-
require_relative 'base'
|
2
|
-
require_relative '../helpers/query_builder'
|
3
|
-
|
4
|
-
require 'json'
|
5
|
-
|
6
|
-
module SearchSolrTools
|
7
|
-
module Harvesters
|
8
|
-
# Base class for harvesting Oai feeds into SOLR
|
9
|
-
class Oai < Base
|
10
|
-
# Used in query string params, resumptionToken
|
11
|
-
|
12
|
-
def initialize(env = 'development', die_on_failure = false)
|
13
|
-
super env, die_on_failure
|
14
|
-
# This is updated when we harvest based on the response
|
15
|
-
# from the server.
|
16
|
-
@resumption_token = nil
|
17
|
-
end
|
18
|
-
|
19
|
-
def encode_data_provider_url(url)
|
20
|
-
URI.encode(url)
|
21
|
-
end
|
22
|
-
|
23
|
-
def harvest_and_delete
|
24
|
-
puts "Running #{self.class.name} at #{metadata_url}"
|
25
|
-
super(method(:harvest), %(data_centers:"#{@data_centers}"))
|
26
|
-
end
|
27
|
-
|
28
|
-
def harvest
|
29
|
-
while @resumption_token.nil? || !@resumption_token.empty?
|
30
|
-
begin
|
31
|
-
insert_solr_docs(translated_docs(results))
|
32
|
-
rescue => e
|
33
|
-
puts "ERROR: #{e.class} #{e}"
|
34
|
-
raise e if @die_on_failure
|
35
|
-
end
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|
39
|
-
def results
|
40
|
-
fail NotImplementedError
|
41
|
-
end
|
42
|
-
|
43
|
-
def metadata_url
|
44
|
-
fail NotImplementedError
|
45
|
-
end
|
46
|
-
|
47
|
-
def translated_docs(entries)
|
48
|
-
entries.map { |e| create_new_solr_add_doc_with_child(@translator.translate(e).root) }
|
49
|
-
end
|
50
|
-
|
51
|
-
private
|
52
|
-
|
53
|
-
def request_params
|
54
|
-
fail NotImplementedError
|
55
|
-
end
|
56
|
-
|
57
|
-
def request_string
|
58
|
-
"#{metadata_url}#{Helpers::QueryBuilder.build(request_params)}"
|
59
|
-
end
|
60
|
-
end
|
61
|
-
end
|
62
|
-
end
|
@@ -1,40 +0,0 @@
|
|
1
|
-
require_relative 'oai'
|
2
|
-
|
3
|
-
module SearchSolrTools
|
4
|
-
module Harvesters
|
5
|
-
# Harvests data from Polar data catalogue and inserts it into
|
6
|
-
# Solr after it has been translated
|
7
|
-
class Pdc < Oai
|
8
|
-
def initialize(env = 'development', die_on_failure = false)
|
9
|
-
super
|
10
|
-
@data_centers = Helpers::SolrFormat::DATA_CENTER_NAMES[:PDC][:long_name]
|
11
|
-
@translator = Helpers::IsoToSolr.new :pdc
|
12
|
-
end
|
13
|
-
|
14
|
-
def metadata_url
|
15
|
-
SolrEnvironments[@environment][:pdc_url]
|
16
|
-
end
|
17
|
-
|
18
|
-
def results
|
19
|
-
list_records_oai_response = get_results(request_string, '//oai:ListRecords', '')
|
20
|
-
|
21
|
-
@resumption_token = list_records_oai_response.xpath('.//oai:resumptionToken', Helpers::IsoNamespaces.namespaces).first.text
|
22
|
-
|
23
|
-
list_records_oai_response.xpath('.//oai:record', Helpers::IsoNamespaces.namespaces)
|
24
|
-
end
|
25
|
-
|
26
|
-
private
|
27
|
-
|
28
|
-
def request_params
|
29
|
-
# If a 'resumptionToken' is supplied with any arguments other than 'verb',
|
30
|
-
# the response from PDC gives a badArgument error, saying "The argument
|
31
|
-
# 'resumptionToken' must be supplied without other arguments"
|
32
|
-
{
|
33
|
-
verb: 'ListRecords',
|
34
|
-
metadataPrefix: @resumption_token.nil? ? 'iso' : nil,
|
35
|
-
resumptionToken: @resumption_token
|
36
|
-
}.delete_if { |_k, v| v.nil? }
|
37
|
-
end
|
38
|
-
end
|
39
|
-
end
|
40
|
-
end
|
@@ -1,61 +0,0 @@
|
|
1
|
-
require_relative 'base'
|
2
|
-
|
3
|
-
require 'nokogiri'
|
4
|
-
require 'rest-client'
|
5
|
-
|
6
|
-
module SearchSolrTools
|
7
|
-
module Harvesters
|
8
|
-
class R2R < Base
|
9
|
-
def initialize(env = 'development', die_on_failure = false)
|
10
|
-
super
|
11
|
-
@data_centers = Helpers::SolrFormat::DATA_CENTER_NAMES[:R2R][:long_name]
|
12
|
-
@translator = Helpers::IsoToSolr.new :r2r
|
13
|
-
@metadata_url = SolrEnvironments[@environment][:r2r_url]
|
14
|
-
end
|
15
|
-
|
16
|
-
def harvest_and_delete
|
17
|
-
puts "Running #{self.class.name} at #{@metadata_url}"
|
18
|
-
super(method(:harvest), %(data_centers:"#{@data_centers}"))
|
19
|
-
end
|
20
|
-
|
21
|
-
# rubocop: disable MethodLength
|
22
|
-
# rubocop: disable AbcSize
|
23
|
-
def harvest
|
24
|
-
# first fetch list of available records at http://get.rvdata.us/services/cruise/
|
25
|
-
# then loop through each one of those, using the root <gmi:MI_Metadata> tag
|
26
|
-
puts "Getting list of records from #{@data_centers}"
|
27
|
-
RestClient.get(@metadata_url) do |resp, _req, _result, &_block|
|
28
|
-
unless resp.code == 200
|
29
|
-
puts "Got code #{resp.code} from #{@metadata_url}, skipping R2R harvest."
|
30
|
-
next
|
31
|
-
end
|
32
|
-
|
33
|
-
doc = Nokogiri::HTML(resp.body)
|
34
|
-
|
35
|
-
urls = doc.xpath('//a').map do |node|
|
36
|
-
"#{@metadata_url}#{node.attr('href')}"
|
37
|
-
end
|
38
|
-
|
39
|
-
urls.each_slice(50) do |url_subset|
|
40
|
-
# each result is a nokogirii doc with root element
|
41
|
-
# <gmi:MI_Metadata>
|
42
|
-
results = url_subset.map do |url|
|
43
|
-
get_results(url, '//gmi:MI_Metadata').first
|
44
|
-
end
|
45
|
-
|
46
|
-
begin
|
47
|
-
translated = results.map do |e|
|
48
|
-
create_new_solr_add_doc_with_child(@translator.translate(e).root)
|
49
|
-
end
|
50
|
-
|
51
|
-
insert_solr_docs(translated)
|
52
|
-
rescue => e
|
53
|
-
puts "ERROR: #{e}"
|
54
|
-
raise e if @die_on_failure
|
55
|
-
end
|
56
|
-
end
|
57
|
-
end
|
58
|
-
end
|
59
|
-
end
|
60
|
-
end
|
61
|
-
end
|
@@ -1,35 +0,0 @@
|
|
1
|
-
require_relative 'oai'
|
2
|
-
|
3
|
-
module SearchSolrTools
|
4
|
-
module Harvesters
|
5
|
-
# Harvests the RDA feed
|
6
|
-
class Rda < Oai
|
7
|
-
def initialize(env = 'development', die_on_failure = false)
|
8
|
-
super
|
9
|
-
@data_centers = Helpers::SolrFormat::DATA_CENTER_NAMES[:RDA][:long_name]
|
10
|
-
@translator = Helpers::IsoToSolr.new :rda
|
11
|
-
end
|
12
|
-
|
13
|
-
def metadata_url
|
14
|
-
SolrEnvironments[@environment][:rda_url]
|
15
|
-
end
|
16
|
-
|
17
|
-
# resumption_token must be empty to stop the harvest loop; RDA's feed does not
|
18
|
-
# provide any resumption token and gets all the records in just one go
|
19
|
-
def results
|
20
|
-
@resumption_token = ''
|
21
|
-
list_records_oai_response = get_results(request_string, '//oai:ListRecords', '')
|
22
|
-
list_records_oai_response.xpath('.//oai:record', Helpers::IsoNamespaces.namespaces)
|
23
|
-
end
|
24
|
-
|
25
|
-
private
|
26
|
-
|
27
|
-
def request_params
|
28
|
-
{
|
29
|
-
verb: 'ListRecords',
|
30
|
-
metadataPrefix: 'dif'
|
31
|
-
}
|
32
|
-
end
|
33
|
-
end
|
34
|
-
end
|
35
|
-
end
|
@@ -1,71 +0,0 @@
|
|
1
|
-
require_relative 'base'
|
2
|
-
|
3
|
-
module SearchSolrTools
|
4
|
-
module Harvesters
|
5
|
-
# Harvests data from TDAR and inserts it into Solr after it has been translated
|
6
|
-
class Tdar < Base
|
7
|
-
def initialize(env = 'development', die_on_failure = false)
|
8
|
-
super env, die_on_failure
|
9
|
-
@page_size = 100
|
10
|
-
@translator = Helpers::IsoToSolr.new :tdar
|
11
|
-
end
|
12
|
-
|
13
|
-
def harvest_and_delete
|
14
|
-
puts "Running harvest of TDAR catalog from #{tdar_url}"
|
15
|
-
super(method(:harvest_tdar_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:TDAR][:long_name]}\"")
|
16
|
-
end
|
17
|
-
|
18
|
-
def harvest_tdar_into_solr
|
19
|
-
start_record = 0
|
20
|
-
total_harvested = 0
|
21
|
-
total_expected = total_results
|
22
|
-
while (entries = get_results_from_tdar(start_record)) && (entries.length > 0)
|
23
|
-
begin
|
24
|
-
insert_solr_docs(get_docs_with_translated_entries_from_tdar(entries))
|
25
|
-
rescue => e
|
26
|
-
puts "ERROR: #{e}\n\n"
|
27
|
-
raise e if @die_on_failure
|
28
|
-
end
|
29
|
-
|
30
|
-
# if we have all the records we expect, don't attempt another request;
|
31
|
-
# it would result in an error
|
32
|
-
total_harvested += entries.length
|
33
|
-
break if total_harvested >= total_expected
|
34
|
-
|
35
|
-
start_record += @page_size
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|
39
|
-
def tdar_url
|
40
|
-
SolrEnvironments[@environment][:tdar_url]
|
41
|
-
end
|
42
|
-
|
43
|
-
def get_results_from_tdar(start_record)
|
44
|
-
get_results(build_request(@page_size, start_record), './/atom:entry', 'application/xml')
|
45
|
-
end
|
46
|
-
|
47
|
-
def get_docs_with_translated_entries_from_tdar(entries)
|
48
|
-
entries.map do |entry|
|
49
|
-
create_new_solr_add_doc_with_child(@translator.translate(entry).root)
|
50
|
-
end
|
51
|
-
end
|
52
|
-
|
53
|
-
def build_request(max_records = '25', start_record = '0')
|
54
|
-
request_url = tdar_url + '?_tDAR.searchType=ACADIS_RSS&'\
|
55
|
-
'resourceTypes=DATASET&'\
|
56
|
-
'groups[0].latitudeLongitudeBoxes[0].maximumLongitude=180&'\
|
57
|
-
'groups[0].latitudeLongitudeBoxes[0].minimumLatitude=45&'\
|
58
|
-
'groups[0].latitudeLongitudeBoxes[0].minimumLongitude=-180&'\
|
59
|
-
'groups[0].latitudeLongitudeBoxes[0].maximumLatitude=90&'\
|
60
|
-
'geoMode=ENVELOPE&'\
|
61
|
-
'recordsPerPage=' + max_records.to_s + '&startRecord=' + start_record.to_s
|
62
|
-
|
63
|
-
request_url
|
64
|
-
end
|
65
|
-
|
66
|
-
def total_results
|
67
|
-
get_results(build_request(0, 0), './/opensearch:totalResults').text.to_i
|
68
|
-
end
|
69
|
-
end
|
70
|
-
end
|
71
|
-
end
|
@@ -1,76 +0,0 @@
|
|
1
|
-
require_relative 'base'
|
2
|
-
require_relative '../helpers/csw_iso_query_builder'
|
3
|
-
|
4
|
-
module SearchSolrTools
|
5
|
-
module Harvesters
|
6
|
-
# Harvests data from USGS and inserts it into Solr after it has been translated
|
7
|
-
class Usgs < Base
|
8
|
-
def initialize(env = 'development', die_on_failure = false)
|
9
|
-
super env, die_on_failure
|
10
|
-
@page_size = 100
|
11
|
-
@translator = Helpers::IsoToSolr.new :usgs
|
12
|
-
end
|
13
|
-
|
14
|
-
def harvest_and_delete
|
15
|
-
puts "Running harvest of USGS catalog from #{usgs_url}"
|
16
|
-
super(method(:harvest_usgs_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:USGS][:long_name]}\"")
|
17
|
-
end
|
18
|
-
|
19
|
-
# get translated entries from USGS and add them to Solr
|
20
|
-
# this is the main entry point for the class
|
21
|
-
def harvest_usgs_into_solr
|
22
|
-
start_index = 1
|
23
|
-
while (entries = get_results_from_usgs(start_index)) && (entries.length > 0)
|
24
|
-
begin
|
25
|
-
insert_solr_docs get_docs_with_translated_entries_from_usgs(entries)
|
26
|
-
rescue => e
|
27
|
-
puts "ERROR: #{e}"
|
28
|
-
raise e if @die_on_failure
|
29
|
-
end
|
30
|
-
start_index += @page_size
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
def usgs_url
|
35
|
-
SolrEnvironments[@environment][:usgs_url]
|
36
|
-
end
|
37
|
-
|
38
|
-
def get_results_from_usgs(start_index)
|
39
|
-
get_results build_csw_request('results', @page_size, start_index), '//gmd:MD_Metadata', ''
|
40
|
-
end
|
41
|
-
|
42
|
-
def get_docs_with_translated_entries_from_usgs(entries)
|
43
|
-
entries.map do |entry|
|
44
|
-
create_new_solr_add_doc_with_child(@translator.translate(entry).root)
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
def build_csw_request(resultType = 'results', maxRecords = '25', startPosition = '1')
|
49
|
-
Helpers::CswIsoQueryBuilder.get_query_string(usgs_url,
|
50
|
-
'resultType' => resultType,
|
51
|
-
'maxRecords' => maxRecords,
|
52
|
-
'startPosition' => startPosition,
|
53
|
-
'TypeNames' => '',
|
54
|
-
'constraint' => bbox_constraint,
|
55
|
-
'outputSchema' => 'http://www.isotc211.org/2005/gmd')
|
56
|
-
end
|
57
|
-
|
58
|
-
def bbox_constraint
|
59
|
-
bbox = {
|
60
|
-
west: '-180',
|
61
|
-
south: '45',
|
62
|
-
east: '180',
|
63
|
-
north: '90'
|
64
|
-
}
|
65
|
-
|
66
|
-
URI.encode '<Filter xmlns:ogc="http://www.opengis.net/ogc" ' \
|
67
|
-
'xmlns:gml="http://www.opengis.net/gml" ' \
|
68
|
-
'xmlns:apiso="http://www.opengis.net/cat/csw/apiso/1.0">' \
|
69
|
-
'<ogc:BBOX><PropertyName>apiso:BoundingBox</PropertyName><gml:Envelope>' \
|
70
|
-
'<gml:lowerCorner>' + bbox[:west] + ' ' + bbox[:south] + '</gml:lowerCorner>' \
|
71
|
-
'<gml:upperCorner>' + bbox[:east] + ' ' + bbox[:north] + '</gml:upperCorner>' \
|
72
|
-
'</gml:Envelope></ogc:BBOX></Filter>'
|
73
|
-
end
|
74
|
-
end
|
75
|
-
end
|
76
|
-
end
|
@@ -1,29 +0,0 @@
|
|
1
|
-
require 'search_solr_tools/helpers/query_builder'
|
2
|
-
|
3
|
-
module SearchSolrTools
|
4
|
-
module Helpers
|
5
|
-
# Constructs the string to query a CSW endpoint
|
6
|
-
class CswIsoQueryBuilder
|
7
|
-
DEFAULT_PARAMS = {
|
8
|
-
service: 'CSW',
|
9
|
-
version: '2.0.2',
|
10
|
-
request: 'GetRecords',
|
11
|
-
'TypeNames' => 'gmd:MD_Metadata',
|
12
|
-
'ElementSetName' => 'full',
|
13
|
-
'resultType' => 'results',
|
14
|
-
'outputFormat' => 'application/xml',
|
15
|
-
'maxRecords' => '25',
|
16
|
-
'startPosition' => '1'
|
17
|
-
}
|
18
|
-
|
19
|
-
def self.get_query_string(url, query_params = {})
|
20
|
-
all_params = query_params(query_params)
|
21
|
-
QueryBuilder.build(all_params).prepend(url)
|
22
|
-
end
|
23
|
-
|
24
|
-
def self.query_params(query_params = {})
|
25
|
-
DEFAULT_PARAMS.merge(query_params)
|
26
|
-
end
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
@@ -1,74 +0,0 @@
|
|
1
|
-
require_relative 'iso_namespaces'
|
2
|
-
require_relative 'iso_to_solr_format'
|
3
|
-
require_relative 'solr_format'
|
4
|
-
|
5
|
-
module SearchSolrTools
|
6
|
-
module Helpers
|
7
|
-
class DataOneFormat < IsoToSolrFormat
|
8
|
-
class << self
|
9
|
-
def date_range(node)
|
10
|
-
{
|
11
|
-
start: SolrFormat.date_str(node.xpath('.//date[@name="beginDate"]').text.strip),
|
12
|
-
end: SolrFormat.date_str(node.xpath('.//date[@name="endDate"]').text.strip)
|
13
|
-
}
|
14
|
-
end
|
15
|
-
|
16
|
-
def bounding_box(node)
|
17
|
-
{
|
18
|
-
north: node.xpath('.//float[@name="northBoundCoord"]').text.strip,
|
19
|
-
south: node.xpath('.//float[@name="southBoundCoord"]').text.strip,
|
20
|
-
east: node.xpath('.//float[@name="eastBoundCoord"]').text.strip,
|
21
|
-
west: node.xpath('.//float[@name="westBoundCoord"]').text.strip
|
22
|
-
}
|
23
|
-
end
|
24
|
-
|
25
|
-
def spatial_display(node)
|
26
|
-
box = bounding_box(node)
|
27
|
-
|
28
|
-
[box[:south], box[:west], box[:north], box[:east]].join(' ')
|
29
|
-
end
|
30
|
-
|
31
|
-
def spatial_index(node)
|
32
|
-
box = bounding_box(node)
|
33
|
-
|
34
|
-
if box[:west] == box[:east] && box[:south] == box[:north]
|
35
|
-
[box[:west], box[:south]]
|
36
|
-
else
|
37
|
-
[box[:west], box[:south], box[:east], box[:north]]
|
38
|
-
end.join(' ')
|
39
|
-
end
|
40
|
-
|
41
|
-
def spatial_area(node)
|
42
|
-
box = bounding_box(node)
|
43
|
-
|
44
|
-
box[:north].to_f - box[:south].to_f
|
45
|
-
end
|
46
|
-
|
47
|
-
def temporal_coverage(node)
|
48
|
-
SolrFormat.temporal_display_str(date_range(node))
|
49
|
-
end
|
50
|
-
|
51
|
-
def temporal_duration(node)
|
52
|
-
dr = date_range(node)
|
53
|
-
end_time = dr[:end].to_s.empty? ? Time.now : Time.parse(dr[:end])
|
54
|
-
SolrFormat.get_temporal_duration(Time.parse(dr[:start]), end_time) unless dr[:start].to_s.empty?
|
55
|
-
end
|
56
|
-
|
57
|
-
def temporal_index_string(node)
|
58
|
-
dr = date_range(node)
|
59
|
-
SolrFormat.temporal_index_str(dr)
|
60
|
-
end
|
61
|
-
|
62
|
-
def facet_spatial_scope(node)
|
63
|
-
box = bounding_box(node)
|
64
|
-
SolrFormat.get_spatial_scope_facet_with_bounding_box(box)
|
65
|
-
end
|
66
|
-
|
67
|
-
def facet_temporal_duration(node)
|
68
|
-
duration = temporal_duration(node)
|
69
|
-
SolrFormat.get_temporal_duration_facet(duration)
|
70
|
-
end
|
71
|
-
end
|
72
|
-
end
|
73
|
-
end
|
74
|
-
end
|