search_solr_tools 6.1.0 → 6.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -2
- data/bin/search_solr_tools +5 -17
- data/lib/search_solr_tools/config/environments.rb +3 -1
- data/lib/search_solr_tools/config/environments.yaml +0 -32
- data/lib/search_solr_tools/errors/harvest_error.rb +44 -31
- data/lib/search_solr_tools/harvesters/auto_suggest.rb +5 -3
- data/lib/search_solr_tools/harvesters/base.rb +21 -20
- data/lib/search_solr_tools/harvesters/nsidc_auto_suggest.rb +7 -5
- data/lib/search_solr_tools/harvesters/nsidc_json.rb +9 -8
- data/lib/search_solr_tools/helpers/bounding_box_util.rb +8 -8
- data/lib/search_solr_tools/helpers/facet_configuration.rb +3 -1
- data/lib/search_solr_tools/helpers/harvest_status.rb +10 -8
- data/lib/search_solr_tools/helpers/iso_namespaces.rb +3 -1
- data/lib/search_solr_tools/helpers/solr_format.rb +25 -45
- data/lib/search_solr_tools/helpers/translate_spatial_coverage.rb +13 -10
- data/lib/search_solr_tools/helpers/translate_temporal_coverage.rb +2 -0
- data/lib/search_solr_tools/translators/nsidc_json.rb +48 -44
- data/lib/search_solr_tools/version.rb +3 -1
- data/lib/search_solr_tools.rb +3 -2
- metadata +3 -45
- data/lib/search_solr_tools/harvesters/adc.rb +0 -49
- data/lib/search_solr_tools/harvesters/ade_auto_suggest.rb +0 -46
- data/lib/search_solr_tools/harvesters/bcodmo.rb +0 -64
- data/lib/search_solr_tools/harvesters/data_one.rb +0 -49
- data/lib/search_solr_tools/harvesters/echo.rb +0 -52
- data/lib/search_solr_tools/harvesters/eol.rb +0 -51
- data/lib/search_solr_tools/harvesters/gtnp.rb +0 -67
- data/lib/search_solr_tools/harvesters/ices.rb +0 -58
- data/lib/search_solr_tools/harvesters/ncdc_paleo.rb +0 -62
- data/lib/search_solr_tools/harvesters/nmi.rb +0 -34
- data/lib/search_solr_tools/harvesters/nodc.rb +0 -75
- data/lib/search_solr_tools/harvesters/oai.rb +0 -62
- data/lib/search_solr_tools/harvesters/pdc.rb +0 -40
- data/lib/search_solr_tools/harvesters/r2r.rb +0 -61
- data/lib/search_solr_tools/harvesters/rda.rb +0 -35
- data/lib/search_solr_tools/harvesters/tdar.rb +0 -71
- data/lib/search_solr_tools/harvesters/usgs.rb +0 -76
- data/lib/search_solr_tools/helpers/csw_iso_query_builder.rb +0 -29
- data/lib/search_solr_tools/helpers/data_one_format.rb +0 -74
- data/lib/search_solr_tools/helpers/iso_to_solr.rb +0 -97
- data/lib/search_solr_tools/helpers/iso_to_solr_format.rb +0 -197
- data/lib/search_solr_tools/helpers/ncdc_paleo_format.rb +0 -61
- data/lib/search_solr_tools/helpers/query_builder.rb +0 -13
- data/lib/search_solr_tools/helpers/r2r_format.rb +0 -25
- data/lib/search_solr_tools/helpers/selectors.rb +0 -22
- data/lib/search_solr_tools/helpers/tdar_format.rb +0 -70
- data/lib/search_solr_tools/helpers/usgs_format.rb +0 -50
- data/lib/search_solr_tools/selectors/adc.rb +0 -96
- data/lib/search_solr_tools/selectors/data_one.rb +0 -96
- data/lib/search_solr_tools/selectors/echo_iso.rb +0 -112
- data/lib/search_solr_tools/selectors/ices_iso.rb +0 -108
- data/lib/search_solr_tools/selectors/ncdc_paleo.rb +0 -90
- data/lib/search_solr_tools/selectors/nmi.rb +0 -107
- data/lib/search_solr_tools/selectors/nodc_iso.rb +0 -108
- data/lib/search_solr_tools/selectors/pdc_iso.rb +0 -109
- data/lib/search_solr_tools/selectors/r2r.rb +0 -115
- data/lib/search_solr_tools/selectors/rda.rb +0 -107
- data/lib/search_solr_tools/selectors/tdar_opensearch.rb +0 -91
- data/lib/search_solr_tools/selectors/usgs_iso.rb +0 -107
- data/lib/search_solr_tools/translators/bcodmo_json.rb +0 -89
- data/lib/search_solr_tools/translators/eol_to_solr.rb +0 -84
- data/lib/search_solr_tools/translators/gtnp_json.rb +0 -59
@@ -1,35 +0,0 @@
|
|
1
|
-
require_relative 'oai'
|
2
|
-
|
3
|
-
module SearchSolrTools
|
4
|
-
module Harvesters
|
5
|
-
# Harvests the RDA feed
|
6
|
-
class Rda < Oai
|
7
|
-
def initialize(env = 'development', die_on_failure = false)
|
8
|
-
super
|
9
|
-
@data_centers = Helpers::SolrFormat::DATA_CENTER_NAMES[:RDA][:long_name]
|
10
|
-
@translator = Helpers::IsoToSolr.new :rda
|
11
|
-
end
|
12
|
-
|
13
|
-
def metadata_url
|
14
|
-
SolrEnvironments[@environment][:rda_url]
|
15
|
-
end
|
16
|
-
|
17
|
-
# resumption_token must be empty to stop the harvest loop; RDA's feed does not
|
18
|
-
# provide any resumption token and gets all the records in just one go
|
19
|
-
def results
|
20
|
-
@resumption_token = ''
|
21
|
-
list_records_oai_response = get_results(request_string, '//oai:ListRecords', '')
|
22
|
-
list_records_oai_response.xpath('.//oai:record', Helpers::IsoNamespaces.namespaces)
|
23
|
-
end
|
24
|
-
|
25
|
-
private
|
26
|
-
|
27
|
-
def request_params
|
28
|
-
{
|
29
|
-
verb: 'ListRecords',
|
30
|
-
metadataPrefix: 'dif'
|
31
|
-
}
|
32
|
-
end
|
33
|
-
end
|
34
|
-
end
|
35
|
-
end
|
@@ -1,71 +0,0 @@
|
|
1
|
-
require_relative 'base'
|
2
|
-
|
3
|
-
module SearchSolrTools
|
4
|
-
module Harvesters
|
5
|
-
# Harvests data from TDAR and inserts it into Solr after it has been translated
|
6
|
-
class Tdar < Base
|
7
|
-
def initialize(env = 'development', die_on_failure = false)
|
8
|
-
super env, die_on_failure
|
9
|
-
@page_size = 100
|
10
|
-
@translator = Helpers::IsoToSolr.new :tdar
|
11
|
-
end
|
12
|
-
|
13
|
-
def harvest_and_delete
|
14
|
-
puts "Running harvest of TDAR catalog from #{tdar_url}"
|
15
|
-
super(method(:harvest_tdar_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:TDAR][:long_name]}\"")
|
16
|
-
end
|
17
|
-
|
18
|
-
def harvest_tdar_into_solr
|
19
|
-
start_record = 0
|
20
|
-
total_harvested = 0
|
21
|
-
total_expected = total_results
|
22
|
-
while (entries = get_results_from_tdar(start_record)) && (entries.length > 0)
|
23
|
-
begin
|
24
|
-
insert_solr_docs(get_docs_with_translated_entries_from_tdar(entries))
|
25
|
-
rescue => e
|
26
|
-
puts "ERROR: #{e}\n\n"
|
27
|
-
raise e if @die_on_failure
|
28
|
-
end
|
29
|
-
|
30
|
-
# if we have all the records we expect, don't attempt another request;
|
31
|
-
# it would result in an error
|
32
|
-
total_harvested += entries.length
|
33
|
-
break if total_harvested >= total_expected
|
34
|
-
|
35
|
-
start_record += @page_size
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|
39
|
-
def tdar_url
|
40
|
-
SolrEnvironments[@environment][:tdar_url]
|
41
|
-
end
|
42
|
-
|
43
|
-
def get_results_from_tdar(start_record)
|
44
|
-
get_results(build_request(@page_size, start_record), './/atom:entry', 'application/xml')
|
45
|
-
end
|
46
|
-
|
47
|
-
def get_docs_with_translated_entries_from_tdar(entries)
|
48
|
-
entries.map do |entry|
|
49
|
-
create_new_solr_add_doc_with_child(@translator.translate(entry).root)
|
50
|
-
end
|
51
|
-
end
|
52
|
-
|
53
|
-
def build_request(max_records = '25', start_record = '0')
|
54
|
-
request_url = tdar_url + '?_tDAR.searchType=ACADIS_RSS&'\
|
55
|
-
'resourceTypes=DATASET&'\
|
56
|
-
'groups[0].latitudeLongitudeBoxes[0].maximumLongitude=180&'\
|
57
|
-
'groups[0].latitudeLongitudeBoxes[0].minimumLatitude=45&'\
|
58
|
-
'groups[0].latitudeLongitudeBoxes[0].minimumLongitude=-180&'\
|
59
|
-
'groups[0].latitudeLongitudeBoxes[0].maximumLatitude=90&'\
|
60
|
-
'geoMode=ENVELOPE&'\
|
61
|
-
'recordsPerPage=' + max_records.to_s + '&startRecord=' + start_record.to_s
|
62
|
-
|
63
|
-
request_url
|
64
|
-
end
|
65
|
-
|
66
|
-
def total_results
|
67
|
-
get_results(build_request(0, 0), './/opensearch:totalResults').text.to_i
|
68
|
-
end
|
69
|
-
end
|
70
|
-
end
|
71
|
-
end
|
@@ -1,76 +0,0 @@
|
|
1
|
-
require_relative 'base'
|
2
|
-
require_relative '../helpers/csw_iso_query_builder'
|
3
|
-
|
4
|
-
module SearchSolrTools
|
5
|
-
module Harvesters
|
6
|
-
# Harvests data from USGS and inserts it into Solr after it has been translated
|
7
|
-
class Usgs < Base
|
8
|
-
def initialize(env = 'development', die_on_failure = false)
|
9
|
-
super env, die_on_failure
|
10
|
-
@page_size = 100
|
11
|
-
@translator = Helpers::IsoToSolr.new :usgs
|
12
|
-
end
|
13
|
-
|
14
|
-
def harvest_and_delete
|
15
|
-
puts "Running harvest of USGS catalog from #{usgs_url}"
|
16
|
-
super(method(:harvest_usgs_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:USGS][:long_name]}\"")
|
17
|
-
end
|
18
|
-
|
19
|
-
# get translated entries from USGS and add them to Solr
|
20
|
-
# this is the main entry point for the class
|
21
|
-
def harvest_usgs_into_solr
|
22
|
-
start_index = 1
|
23
|
-
while (entries = get_results_from_usgs(start_index)) && (entries.length > 0)
|
24
|
-
begin
|
25
|
-
insert_solr_docs get_docs_with_translated_entries_from_usgs(entries)
|
26
|
-
rescue => e
|
27
|
-
puts "ERROR: #{e}"
|
28
|
-
raise e if @die_on_failure
|
29
|
-
end
|
30
|
-
start_index += @page_size
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
def usgs_url
|
35
|
-
SolrEnvironments[@environment][:usgs_url]
|
36
|
-
end
|
37
|
-
|
38
|
-
def get_results_from_usgs(start_index)
|
39
|
-
get_results build_csw_request('results', @page_size, start_index), '//gmd:MD_Metadata', ''
|
40
|
-
end
|
41
|
-
|
42
|
-
def get_docs_with_translated_entries_from_usgs(entries)
|
43
|
-
entries.map do |entry|
|
44
|
-
create_new_solr_add_doc_with_child(@translator.translate(entry).root)
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
def build_csw_request(resultType = 'results', maxRecords = '25', startPosition = '1')
|
49
|
-
Helpers::CswIsoQueryBuilder.get_query_string(usgs_url,
|
50
|
-
'resultType' => resultType,
|
51
|
-
'maxRecords' => maxRecords,
|
52
|
-
'startPosition' => startPosition,
|
53
|
-
'TypeNames' => '',
|
54
|
-
'constraint' => bbox_constraint,
|
55
|
-
'outputSchema' => 'http://www.isotc211.org/2005/gmd')
|
56
|
-
end
|
57
|
-
|
58
|
-
def bbox_constraint
|
59
|
-
bbox = {
|
60
|
-
west: '-180',
|
61
|
-
south: '45',
|
62
|
-
east: '180',
|
63
|
-
north: '90'
|
64
|
-
}
|
65
|
-
|
66
|
-
URI.encode '<Filter xmlns:ogc="http://www.opengis.net/ogc" ' \
|
67
|
-
'xmlns:gml="http://www.opengis.net/gml" ' \
|
68
|
-
'xmlns:apiso="http://www.opengis.net/cat/csw/apiso/1.0">' \
|
69
|
-
'<ogc:BBOX><PropertyName>apiso:BoundingBox</PropertyName><gml:Envelope>' \
|
70
|
-
'<gml:lowerCorner>' + bbox[:west] + ' ' + bbox[:south] + '</gml:lowerCorner>' \
|
71
|
-
'<gml:upperCorner>' + bbox[:east] + ' ' + bbox[:north] + '</gml:upperCorner>' \
|
72
|
-
'</gml:Envelope></ogc:BBOX></Filter>'
|
73
|
-
end
|
74
|
-
end
|
75
|
-
end
|
76
|
-
end
|
@@ -1,29 +0,0 @@
|
|
1
|
-
require 'search_solr_tools/helpers/query_builder'
|
2
|
-
|
3
|
-
module SearchSolrTools
|
4
|
-
module Helpers
|
5
|
-
# Constructs the string to query a CSW endpoint
|
6
|
-
class CswIsoQueryBuilder
|
7
|
-
DEFAULT_PARAMS = {
|
8
|
-
service: 'CSW',
|
9
|
-
version: '2.0.2',
|
10
|
-
request: 'GetRecords',
|
11
|
-
'TypeNames' => 'gmd:MD_Metadata',
|
12
|
-
'ElementSetName' => 'full',
|
13
|
-
'resultType' => 'results',
|
14
|
-
'outputFormat' => 'application/xml',
|
15
|
-
'maxRecords' => '25',
|
16
|
-
'startPosition' => '1'
|
17
|
-
}
|
18
|
-
|
19
|
-
def self.get_query_string(url, query_params = {})
|
20
|
-
all_params = query_params(query_params)
|
21
|
-
QueryBuilder.build(all_params).prepend(url)
|
22
|
-
end
|
23
|
-
|
24
|
-
def self.query_params(query_params = {})
|
25
|
-
DEFAULT_PARAMS.merge(query_params)
|
26
|
-
end
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
@@ -1,74 +0,0 @@
|
|
1
|
-
require_relative 'iso_namespaces'
|
2
|
-
require_relative 'iso_to_solr_format'
|
3
|
-
require_relative 'solr_format'
|
4
|
-
|
5
|
-
module SearchSolrTools
|
6
|
-
module Helpers
|
7
|
-
class DataOneFormat < IsoToSolrFormat
|
8
|
-
class << self
|
9
|
-
def date_range(node)
|
10
|
-
{
|
11
|
-
start: SolrFormat.date_str(node.xpath('.//date[@name="beginDate"]').text.strip),
|
12
|
-
end: SolrFormat.date_str(node.xpath('.//date[@name="endDate"]').text.strip)
|
13
|
-
}
|
14
|
-
end
|
15
|
-
|
16
|
-
def bounding_box(node)
|
17
|
-
{
|
18
|
-
north: node.xpath('.//float[@name="northBoundCoord"]').text.strip,
|
19
|
-
south: node.xpath('.//float[@name="southBoundCoord"]').text.strip,
|
20
|
-
east: node.xpath('.//float[@name="eastBoundCoord"]').text.strip,
|
21
|
-
west: node.xpath('.//float[@name="westBoundCoord"]').text.strip
|
22
|
-
}
|
23
|
-
end
|
24
|
-
|
25
|
-
def spatial_display(node)
|
26
|
-
box = bounding_box(node)
|
27
|
-
|
28
|
-
[box[:south], box[:west], box[:north], box[:east]].join(' ')
|
29
|
-
end
|
30
|
-
|
31
|
-
def spatial_index(node)
|
32
|
-
box = bounding_box(node)
|
33
|
-
|
34
|
-
if box[:west] == box[:east] && box[:south] == box[:north]
|
35
|
-
[box[:west], box[:south]]
|
36
|
-
else
|
37
|
-
[box[:west], box[:south], box[:east], box[:north]]
|
38
|
-
end.join(' ')
|
39
|
-
end
|
40
|
-
|
41
|
-
def spatial_area(node)
|
42
|
-
box = bounding_box(node)
|
43
|
-
|
44
|
-
box[:north].to_f - box[:south].to_f
|
45
|
-
end
|
46
|
-
|
47
|
-
def temporal_coverage(node)
|
48
|
-
SolrFormat.temporal_display_str(date_range(node))
|
49
|
-
end
|
50
|
-
|
51
|
-
def temporal_duration(node)
|
52
|
-
dr = date_range(node)
|
53
|
-
end_time = dr[:end].to_s.empty? ? Time.now : Time.parse(dr[:end])
|
54
|
-
SolrFormat.get_temporal_duration(Time.parse(dr[:start]), end_time) unless dr[:start].to_s.empty?
|
55
|
-
end
|
56
|
-
|
57
|
-
def temporal_index_string(node)
|
58
|
-
dr = date_range(node)
|
59
|
-
SolrFormat.temporal_index_str(dr)
|
60
|
-
end
|
61
|
-
|
62
|
-
def facet_spatial_scope(node)
|
63
|
-
box = bounding_box(node)
|
64
|
-
SolrFormat.get_spatial_scope_facet_with_bounding_box(box)
|
65
|
-
end
|
66
|
-
|
67
|
-
def facet_temporal_duration(node)
|
68
|
-
duration = temporal_duration(node)
|
69
|
-
SolrFormat.get_temporal_duration_facet(duration)
|
70
|
-
end
|
71
|
-
end
|
72
|
-
end
|
73
|
-
end
|
74
|
-
end
|
@@ -1,97 +0,0 @@
|
|
1
|
-
require_relative 'selectors'
|
2
|
-
require 'nokogiri'
|
3
|
-
|
4
|
-
module SearchSolrTools
|
5
|
-
module Helpers
|
6
|
-
# Translates ISO nokogiri documents into solr nokogiri documents using a hash driver object.
|
7
|
-
# This class should be constructed passing the selector file hash as a parameter (see selectors.rb).
|
8
|
-
# After creating an instance we call translate with a nokogiri iso document as a parameter.
|
9
|
-
class IsoToSolr
|
10
|
-
def initialize(selector)
|
11
|
-
@fields = SELECTORS[selector]
|
12
|
-
@multiple_whitespace = /\s{2,}/ # save the regex so it is not recompiled every time format_field() is called
|
13
|
-
end
|
14
|
-
|
15
|
-
# this will return a nodeset with all the elements that matched the xpath
|
16
|
-
def eval_xpath(iso_xml_doc, xpath, multivalue, reduce)
|
17
|
-
fields = []
|
18
|
-
begin
|
19
|
-
iso_xml_doc.xpath(xpath, IsoNamespaces.namespaces(iso_xml_doc)).each do |f|
|
20
|
-
fields.push(f)
|
21
|
-
break if multivalue == false && reduce.nil?
|
22
|
-
end
|
23
|
-
rescue
|
24
|
-
fields = []
|
25
|
-
end
|
26
|
-
fields
|
27
|
-
end
|
28
|
-
|
29
|
-
def get_default_values(selector)
|
30
|
-
selector.key?(:default_values) ? selector[:default_values] : ['']
|
31
|
-
end
|
32
|
-
|
33
|
-
def format_text(field)
|
34
|
-
field.respond_to?(:text) ? field.text : field
|
35
|
-
end
|
36
|
-
|
37
|
-
def format_field(selector, field)
|
38
|
-
formatted = selector.key?(:format) ? selector[:format].call(field) : format_text(field) rescue format_text(field)
|
39
|
-
formatted = strip_invalid_utf8_bytes(formatted)
|
40
|
-
formatted.strip! if formatted.respond_to?(:strip!)
|
41
|
-
formatted.gsub!(@multiple_whitespace, ' ') if formatted.respond_to?(:gsub!)
|
42
|
-
formatted
|
43
|
-
end
|
44
|
-
|
45
|
-
def format_fields(selector, fields, reduce = nil)
|
46
|
-
formatted = fields.map { |f| format_field(selector, f) }.flatten
|
47
|
-
formatted = [reduce.call(formatted)] unless reduce.nil?
|
48
|
-
selector[:unique] ? formatted.uniq : formatted
|
49
|
-
end
|
50
|
-
|
51
|
-
def create_solr_fields(iso_xml_doc, selector)
|
52
|
-
selector[:xpaths].each do |xpath|
|
53
|
-
fields = eval_xpath(iso_xml_doc, xpath, selector[:multivalue], selector[:reduce])
|
54
|
-
|
55
|
-
# stop evaluating xpaths once we find data in one of them
|
56
|
-
if fields.size > 0 && fields.any? { |f| strip_invalid_utf8_bytes(f.text).strip.length > 0 }
|
57
|
-
return format_fields(selector, fields, selector[:reduce])
|
58
|
-
end
|
59
|
-
end
|
60
|
-
format_fields(selector, get_default_values(selector))
|
61
|
-
end
|
62
|
-
|
63
|
-
def translate(iso_xml_doc)
|
64
|
-
solr_xml_doc = Nokogiri::XML::Builder.new do |xml|
|
65
|
-
xml.doc_ do
|
66
|
-
build_fields(xml, iso_xml_doc)
|
67
|
-
end
|
68
|
-
end
|
69
|
-
solr_xml_doc.doc
|
70
|
-
end
|
71
|
-
|
72
|
-
def build_fields(xml, iso_xml_doc)
|
73
|
-
@fields.each do |field_name, selector|
|
74
|
-
create_solr_fields(iso_xml_doc, selector).each do |value|
|
75
|
-
if value.is_a? Array
|
76
|
-
value.each do |v|
|
77
|
-
xml.field_({ name: field_name }, v) unless v.nil? || v.eql?('')
|
78
|
-
end
|
79
|
-
else
|
80
|
-
xml.field_({ name: field_name }, value) unless value.nil? || value.eql?('')
|
81
|
-
end
|
82
|
-
end
|
83
|
-
end
|
84
|
-
end
|
85
|
-
|
86
|
-
def strip_invalid_utf8_bytes(text)
|
87
|
-
if text.respond_to?(:encode) && !text.valid_encoding?
|
88
|
-
text.encode!('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
|
89
|
-
end
|
90
|
-
|
91
|
-
text.delete!("\u00BF") if text.respond_to?(:delete!)
|
92
|
-
|
93
|
-
text
|
94
|
-
end
|
95
|
-
end
|
96
|
-
end
|
97
|
-
end
|
@@ -1,197 +0,0 @@
|
|
1
|
-
require 'date'
|
2
|
-
|
3
|
-
require_relative 'iso_namespaces'
|
4
|
-
require_relative 'solr_format'
|
5
|
-
|
6
|
-
module SearchSolrTools
|
7
|
-
module Helpers
|
8
|
-
# Methods for generating formatted strings from ISO xml nodes that can be indexed by SOLR
|
9
|
-
# rubocop:disable ClassLength
|
10
|
-
class IsoToSolrFormat
|
11
|
-
KEYWORDS = proc { |keywords| build_keyword_list keywords }
|
12
|
-
|
13
|
-
SPATIAL_DISPLAY = proc { |node| IsoToSolrFormat.spatial_display_str(node) }
|
14
|
-
SPATIAL_INDEX = proc { |node| IsoToSolrFormat.spatial_index_str(node) }
|
15
|
-
SPATIAL_AREA = proc { |node| IsoToSolrFormat.spatial_area_str(node) }
|
16
|
-
MAX_SPATIAL_AREA = proc { |values| IsoToSolrFormat.get_max_spatial_area(values) }
|
17
|
-
|
18
|
-
FACET_SPONSORED_PROGRAM = proc { |node| IsoToSolrFormat.sponsored_program_facet node }
|
19
|
-
FACET_SPATIAL_COVERAGE = proc { |node| IsoToSolrFormat.get_spatial_facet(node) }
|
20
|
-
FACET_SPATIAL_SCOPE = proc { |node| IsoToSolrFormat.get_spatial_scope_facet(node) }
|
21
|
-
FACET_TEMPORAL_DURATION = proc { |node| IsoToSolrFormat.get_temporal_duration_facet(node) }
|
22
|
-
|
23
|
-
TEMPORAL_DURATION = proc { |node| IsoToSolrFormat.get_temporal_duration(node) }
|
24
|
-
TEMPORAL_INDEX_STRING = proc { |node| IsoToSolrFormat.temporal_index_str node }
|
25
|
-
TEMPORAL_DISPLAY_STRING = proc { |node| IsoToSolrFormat.temporal_display_str node }
|
26
|
-
TEMPORAL_DISPLAY_STRING_FORMATTED = proc { |node| IsoToSolrFormat.temporal_display_str(node, true) }
|
27
|
-
|
28
|
-
DATASET_URL = proc { |node| IsoToSolrFormat.dataset_url(node) }
|
29
|
-
ICES_DATASET_URL = proc { |node| IsoToSolrFormat.ices_dataset_url(node) }
|
30
|
-
EOL_AUTHOR_FORMAT = proc { |node| IsoToSolrFormat.eol_author_format(node) }
|
31
|
-
|
32
|
-
def self.spatial_display_str(box_node)
|
33
|
-
box = bounding_box(box_node)
|
34
|
-
"#{box[:south]} #{box[:west]} #{box[:north]} #{box[:east]}"
|
35
|
-
end
|
36
|
-
|
37
|
-
def self.spatial_index_str(box_node)
|
38
|
-
box = bounding_box(box_node)
|
39
|
-
if box[:west] == box[:east] && box[:south] == box[:north]
|
40
|
-
[box[:west], box[:south]]
|
41
|
-
else
|
42
|
-
[box[:west], box[:south], box[:east], box[:north]]
|
43
|
-
end.join(' ')
|
44
|
-
end
|
45
|
-
|
46
|
-
def self.spatial_area_str(box_node)
|
47
|
-
box = bounding_box(box_node)
|
48
|
-
area = box[:north].to_f - box[:south].to_f
|
49
|
-
area
|
50
|
-
end
|
51
|
-
|
52
|
-
def self.get_max_spatial_area(values)
|
53
|
-
values.map(&:to_f).max
|
54
|
-
end
|
55
|
-
|
56
|
-
def self.get_spatial_facet(box_node)
|
57
|
-
box = bounding_box(box_node)
|
58
|
-
|
59
|
-
if BoundingBoxUtil.box_invalid?(box)
|
60
|
-
facet = nil
|
61
|
-
elsif BoundingBoxUtil.box_global?(box)
|
62
|
-
facet = 'Global'
|
63
|
-
else
|
64
|
-
facet = 'Non Global'
|
65
|
-
end
|
66
|
-
facet
|
67
|
-
end
|
68
|
-
|
69
|
-
def self.get_spatial_scope_facet(box_node)
|
70
|
-
box = bounding_box(box_node)
|
71
|
-
SolrFormat.get_spatial_scope_facet_with_bounding_box(box)
|
72
|
-
end
|
73
|
-
|
74
|
-
def self.temporal_display_str(temporal_node, formatted = false)
|
75
|
-
SolrFormat.temporal_display_str(date_range(temporal_node, formatted))
|
76
|
-
end
|
77
|
-
|
78
|
-
def self.get_temporal_duration(temporal_node)
|
79
|
-
dr = date_range(temporal_node)
|
80
|
-
end_time = dr[:end].to_s.empty? ? Time.now : Time.parse(dr[:end])
|
81
|
-
SolrFormat.get_temporal_duration(Time.parse(dr[:start]), end_time) unless dr[:start].to_s.empty?
|
82
|
-
end
|
83
|
-
|
84
|
-
def self.get_temporal_duration_facet(temporal_node)
|
85
|
-
duration = get_temporal_duration(temporal_node)
|
86
|
-
SolrFormat.get_temporal_duration_facet(duration)
|
87
|
-
end
|
88
|
-
|
89
|
-
def self.temporal_index_str(temporal_node)
|
90
|
-
dr = date_range(temporal_node)
|
91
|
-
SolrFormat.temporal_index_str(dr)
|
92
|
-
end
|
93
|
-
|
94
|
-
def self.sponsored_program_facet(node)
|
95
|
-
long_name = node.xpath('.//gmd:organisationName', IsoNamespaces.namespaces(node)).text.strip
|
96
|
-
short_name = node.xpath('.//gmd:organisationShortName', IsoNamespaces.namespaces(node)).text.strip
|
97
|
-
|
98
|
-
[long_name, short_name].join(' | ')
|
99
|
-
end
|
100
|
-
|
101
|
-
def self.build_keyword_list(keywords)
|
102
|
-
category = keywords.xpath('.//CategoryKeyword').text
|
103
|
-
topic = keywords.xpath('.//TopicKeyword').text
|
104
|
-
term = keywords.xpath('.//TermKeyword').text
|
105
|
-
category << ' > ' << topic << ' > ' << term
|
106
|
-
end
|
107
|
-
|
108
|
-
def self.date_range(temporal_node, formatted = false)
|
109
|
-
start_date = get_first_matching_child(
|
110
|
-
temporal_node,
|
111
|
-
['.//gml:beginPosition', './/BeginningDateTime', './/gco:Date', './/dif:Start_Date']
|
112
|
-
)
|
113
|
-
start_date = '' unless SolrFormat.date?(start_date)
|
114
|
-
start_date = SolrFormat.date_str(start_date) if formatted
|
115
|
-
|
116
|
-
end_date = get_first_matching_child(
|
117
|
-
temporal_node,
|
118
|
-
['.//gml:endPosition', './/EndingDateTime', './/gco:Date', './/dif:Stop_Date']
|
119
|
-
)
|
120
|
-
end_date = '' unless SolrFormat.date?(end_date)
|
121
|
-
end_date = SolrFormat.date_str(end_date) if formatted
|
122
|
-
|
123
|
-
{
|
124
|
-
start: start_date,
|
125
|
-
end: end_date
|
126
|
-
}
|
127
|
-
end
|
128
|
-
|
129
|
-
# Met.no sometimes has bad metadata, such as <gmd:URL>SU-1 (planned activity)</gmd:URL>
|
130
|
-
def self.dataset_url(url_node)
|
131
|
-
url_node.text.strip =~ %r{http[s]?://} ? url_node.text.strip : ''
|
132
|
-
end
|
133
|
-
|
134
|
-
def self.ices_dataset_url(auth_id)
|
135
|
-
'http://geo.ices.dk/geonetwork/srv/en/main.home?uuid=' + auth_id
|
136
|
-
end
|
137
|
-
|
138
|
-
def self.get_first_matching_child(node, paths)
|
139
|
-
matching_nodes = node.at_xpath(paths.join(' | '), IsoNamespaces.namespaces(node))
|
140
|
-
matching_nodes.nil? ? '' : matching_nodes.text
|
141
|
-
end
|
142
|
-
|
143
|
-
def self.bounding_box(box_node)
|
144
|
-
{
|
145
|
-
west: get_bound(box_node, :west),
|
146
|
-
south: get_bound(box_node, :south),
|
147
|
-
east: get_bound(box_node, :east),
|
148
|
-
north: get_bound(box_node, :north)
|
149
|
-
}
|
150
|
-
end
|
151
|
-
|
152
|
-
def self.axis_label(direction)
|
153
|
-
{
|
154
|
-
north: 'Latitude',
|
155
|
-
south: 'Latitude',
|
156
|
-
east: 'Longitude',
|
157
|
-
west: 'Longitude'
|
158
|
-
}[direction]
|
159
|
-
end
|
160
|
-
|
161
|
-
def self.coordinate_boundary(lat_lon)
|
162
|
-
{
|
163
|
-
'Latitude' => 90,
|
164
|
-
'Longitude' => 180
|
165
|
-
}[lat_lon]
|
166
|
-
end
|
167
|
-
|
168
|
-
def self.node_values(box_node, direction, lat_lon)
|
169
|
-
get_first_matching_child(
|
170
|
-
box_node,
|
171
|
-
[
|
172
|
-
"./gmd:#{direction.to_s.downcase}Bounding#{lat_lon}/gco:Decimal",
|
173
|
-
"./gmd:#{direction.to_s.downcase}Bound#{lat_lon}/gco:Decimal",
|
174
|
-
"./#{direction.to_s.capitalize}BoundingCoordinate",
|
175
|
-
"./dif:#{direction.to_s.capitalize}ernmost_#{lat_lon}"
|
176
|
-
]
|
177
|
-
).split(' ')
|
178
|
-
end
|
179
|
-
|
180
|
-
def self.get_bound(box_node, direction)
|
181
|
-
lat_lon = axis_label(direction)
|
182
|
-
|
183
|
-
vals = node_values(box_node, direction, lat_lon)
|
184
|
-
val = vals.first
|
185
|
-
|
186
|
-
boundary = coordinate_boundary(lat_lon)
|
187
|
-
out_of_bounds = boundary < val.to_f.abs
|
188
|
-
|
189
|
-
return '' if vals.empty? || out_of_bounds
|
190
|
-
|
191
|
-
val = -val.to_f if %w(West South).include?(vals.last)
|
192
|
-
|
193
|
-
val.to_f.to_s
|
194
|
-
end
|
195
|
-
end
|
196
|
-
end
|
197
|
-
end
|
@@ -1,61 +0,0 @@
|
|
1
|
-
require 'date'
|
2
|
-
|
3
|
-
require_relative 'iso_namespaces'
|
4
|
-
require_relative 'solr_format'
|
5
|
-
require_relative 'iso_to_solr_format'
|
6
|
-
|
7
|
-
module SearchSolrTools
|
8
|
-
module Helpers
|
9
|
-
class NcdcPaleoFormat < IsoToSolrFormat
|
10
|
-
def self.bounding_box(node)
|
11
|
-
east, north = node.xpath('./ows:UpperCorner').text.split
|
12
|
-
west, south = node.xpath('./ows:LowerCorner').text.split
|
13
|
-
{ north: north, south: south, east: east, west: west }
|
14
|
-
end
|
15
|
-
|
16
|
-
def self.date_range(node, _formatted = false)
|
17
|
-
if node.text.include?('START YEAR')
|
18
|
-
if node.text.include?('AD')
|
19
|
-
format_ad_time(node.text)
|
20
|
-
elsif node.text.include?('yr BP')
|
21
|
-
format_cal_yr_bp_time(node.text)
|
22
|
-
end
|
23
|
-
end
|
24
|
-
end
|
25
|
-
|
26
|
-
def self.format_ad_time(node_text)
|
27
|
-
match = node_text.match(/START YEAR:(?<start>[^*]*)AD\s*\* END YEAR:(?<end>[^*]*)AD/)
|
28
|
-
{
|
29
|
-
start: DateTime.strptime(match[:start].strip, '%Y'),
|
30
|
-
end: DateTime.strptime(match[:end].strip, '%Y')
|
31
|
-
}
|
32
|
-
end
|
33
|
-
|
34
|
-
def self.format_cal_yr_bp_time(node_text)
|
35
|
-
zero_year = 1950
|
36
|
-
match = node_text.match(/START YEAR:(?<start>[^*]*)... yr BP\s*\* END YEAR:(?<end>[^*]*)... yr BP/)
|
37
|
-
{
|
38
|
-
start: DateTime.strptime((-(match[:start].strip.to_i) - zero_year).to_s, '%Y'),
|
39
|
-
end: DateTime.strptime((-(match[:end].strip.to_i) - zero_year).to_s, '%Y')
|
40
|
-
}
|
41
|
-
end
|
42
|
-
|
43
|
-
def self.temporal_index_str(node)
|
44
|
-
range = date_range(node)
|
45
|
-
SolrFormat.temporal_index_str(start: range[:start].to_s, end: range[:end].to_s) unless range.nil?
|
46
|
-
end
|
47
|
-
|
48
|
-
def self.get_temporal_duration(node)
|
49
|
-
range = date_range(node)
|
50
|
-
return if range.to_s.empty?
|
51
|
-
(range[:start] - range[:end]).to_i.abs
|
52
|
-
end
|
53
|
-
|
54
|
-
def self.author(node)
|
55
|
-
return node if node == ''
|
56
|
-
return if node.text.include? ';'
|
57
|
-
node.text
|
58
|
-
end
|
59
|
-
end
|
60
|
-
end
|
61
|
-
end
|
@@ -1,13 +0,0 @@
|
|
1
|
-
module SearchSolrTools
|
2
|
-
module Helpers
|
3
|
-
# Class to build a query string based on a hash of params
|
4
|
-
class QueryBuilder
|
5
|
-
class << self
|
6
|
-
def build(params)
|
7
|
-
param_str = params.map { |k, v| "#{k}=#{v}" }.join('&')
|
8
|
-
"?#{param_str}"
|
9
|
-
end
|
10
|
-
end
|
11
|
-
end
|
12
|
-
end
|
13
|
-
end
|
@@ -1,25 +0,0 @@
|
|
1
|
-
require_relative 'iso_namespaces'
|
2
|
-
require_relative 'iso_to_solr_format'
|
3
|
-
require_relative 'solr_format'
|
4
|
-
|
5
|
-
module SearchSolrTools
|
6
|
-
module Helpers
|
7
|
-
class R2RFormat < IsoToSolrFormat
|
8
|
-
TEMPORAL_INDEX_STRING = proc { |node| R2RFormat.temporal_index_str(node) }
|
9
|
-
TEMPORAL_DISPLAY_STRING = proc { |node| R2RFormat.temporal_display_str(node) }
|
10
|
-
TEMPORAL_DURATION = proc { |node| R2RFormat.get_temporal_duration(node) }
|
11
|
-
FACET_TEMPORAL_DURATION = proc { |node| R2RFormat.get_temporal_duration_facet(node) }
|
12
|
-
|
13
|
-
def self.date_range(temporal_node, _formatted = false)
|
14
|
-
xpath_start = './/gmd:temporalElement/gmd:EX_SpatialTemporalExtent/gmd:extent/'\
|
15
|
-
'gml:TimeInstant[@gml:id="start"]/gml:timePosition'
|
16
|
-
xpath_end = xpath_start.gsub('start', 'end')
|
17
|
-
|
18
|
-
{
|
19
|
-
start: temporal_node.xpath(xpath_start).text,
|
20
|
-
end: temporal_node.xpath(xpath_end).text
|
21
|
-
}
|
22
|
-
end
|
23
|
-
end
|
24
|
-
end
|
25
|
-
end
|