search_solr_tools 6.1.0 → 6.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -2
- data/bin/search_solr_tools +5 -17
- data/lib/search_solr_tools/config/environments.rb +3 -1
- data/lib/search_solr_tools/config/environments.yaml +0 -32
- data/lib/search_solr_tools/errors/harvest_error.rb +44 -31
- data/lib/search_solr_tools/harvesters/auto_suggest.rb +5 -3
- data/lib/search_solr_tools/harvesters/base.rb +21 -20
- data/lib/search_solr_tools/harvesters/nsidc_auto_suggest.rb +7 -5
- data/lib/search_solr_tools/harvesters/nsidc_json.rb +9 -8
- data/lib/search_solr_tools/helpers/bounding_box_util.rb +8 -8
- data/lib/search_solr_tools/helpers/facet_configuration.rb +3 -1
- data/lib/search_solr_tools/helpers/harvest_status.rb +10 -8
- data/lib/search_solr_tools/helpers/iso_namespaces.rb +3 -1
- data/lib/search_solr_tools/helpers/solr_format.rb +25 -45
- data/lib/search_solr_tools/helpers/translate_spatial_coverage.rb +13 -10
- data/lib/search_solr_tools/helpers/translate_temporal_coverage.rb +2 -0
- data/lib/search_solr_tools/translators/nsidc_json.rb +48 -44
- data/lib/search_solr_tools/version.rb +3 -1
- data/lib/search_solr_tools.rb +3 -2
- metadata +3 -45
- data/lib/search_solr_tools/harvesters/adc.rb +0 -49
- data/lib/search_solr_tools/harvesters/ade_auto_suggest.rb +0 -46
- data/lib/search_solr_tools/harvesters/bcodmo.rb +0 -64
- data/lib/search_solr_tools/harvesters/data_one.rb +0 -49
- data/lib/search_solr_tools/harvesters/echo.rb +0 -52
- data/lib/search_solr_tools/harvesters/eol.rb +0 -51
- data/lib/search_solr_tools/harvesters/gtnp.rb +0 -67
- data/lib/search_solr_tools/harvesters/ices.rb +0 -58
- data/lib/search_solr_tools/harvesters/ncdc_paleo.rb +0 -62
- data/lib/search_solr_tools/harvesters/nmi.rb +0 -34
- data/lib/search_solr_tools/harvesters/nodc.rb +0 -75
- data/lib/search_solr_tools/harvesters/oai.rb +0 -62
- data/lib/search_solr_tools/harvesters/pdc.rb +0 -40
- data/lib/search_solr_tools/harvesters/r2r.rb +0 -61
- data/lib/search_solr_tools/harvesters/rda.rb +0 -35
- data/lib/search_solr_tools/harvesters/tdar.rb +0 -71
- data/lib/search_solr_tools/harvesters/usgs.rb +0 -76
- data/lib/search_solr_tools/helpers/csw_iso_query_builder.rb +0 -29
- data/lib/search_solr_tools/helpers/data_one_format.rb +0 -74
- data/lib/search_solr_tools/helpers/iso_to_solr.rb +0 -97
- data/lib/search_solr_tools/helpers/iso_to_solr_format.rb +0 -197
- data/lib/search_solr_tools/helpers/ncdc_paleo_format.rb +0 -61
- data/lib/search_solr_tools/helpers/query_builder.rb +0 -13
- data/lib/search_solr_tools/helpers/r2r_format.rb +0 -25
- data/lib/search_solr_tools/helpers/selectors.rb +0 -22
- data/lib/search_solr_tools/helpers/tdar_format.rb +0 -70
- data/lib/search_solr_tools/helpers/usgs_format.rb +0 -50
- data/lib/search_solr_tools/selectors/adc.rb +0 -96
- data/lib/search_solr_tools/selectors/data_one.rb +0 -96
- data/lib/search_solr_tools/selectors/echo_iso.rb +0 -112
- data/lib/search_solr_tools/selectors/ices_iso.rb +0 -108
- data/lib/search_solr_tools/selectors/ncdc_paleo.rb +0 -90
- data/lib/search_solr_tools/selectors/nmi.rb +0 -107
- data/lib/search_solr_tools/selectors/nodc_iso.rb +0 -108
- data/lib/search_solr_tools/selectors/pdc_iso.rb +0 -109
- data/lib/search_solr_tools/selectors/r2r.rb +0 -115
- data/lib/search_solr_tools/selectors/rda.rb +0 -107
- data/lib/search_solr_tools/selectors/tdar_opensearch.rb +0 -91
- data/lib/search_solr_tools/selectors/usgs_iso.rb +0 -107
- data/lib/search_solr_tools/translators/bcodmo_json.rb +0 -89
- data/lib/search_solr_tools/translators/eol_to_solr.rb +0 -84
- data/lib/search_solr_tools/translators/gtnp_json.rb +0 -59
@@ -1,35 +0,0 @@
|
|
1
|
-
require_relative 'oai'
|
2
|
-
|
3
|
-
module SearchSolrTools
|
4
|
-
module Harvesters
|
5
|
-
# Harvests the RDA feed
|
6
|
-
class Rda < Oai
|
7
|
-
def initialize(env = 'development', die_on_failure = false)
|
8
|
-
super
|
9
|
-
@data_centers = Helpers::SolrFormat::DATA_CENTER_NAMES[:RDA][:long_name]
|
10
|
-
@translator = Helpers::IsoToSolr.new :rda
|
11
|
-
end
|
12
|
-
|
13
|
-
def metadata_url
|
14
|
-
SolrEnvironments[@environment][:rda_url]
|
15
|
-
end
|
16
|
-
|
17
|
-
# resumption_token must be empty to stop the harvest loop; RDA's feed does not
|
18
|
-
# provide any resumption token and gets all the records in just one go
|
19
|
-
def results
|
20
|
-
@resumption_token = ''
|
21
|
-
list_records_oai_response = get_results(request_string, '//oai:ListRecords', '')
|
22
|
-
list_records_oai_response.xpath('.//oai:record', Helpers::IsoNamespaces.namespaces)
|
23
|
-
end
|
24
|
-
|
25
|
-
private
|
26
|
-
|
27
|
-
def request_params
|
28
|
-
{
|
29
|
-
verb: 'ListRecords',
|
30
|
-
metadataPrefix: 'dif'
|
31
|
-
}
|
32
|
-
end
|
33
|
-
end
|
34
|
-
end
|
35
|
-
end
|
@@ -1,71 +0,0 @@
|
|
1
|
-
require_relative 'base'
|
2
|
-
|
3
|
-
module SearchSolrTools
|
4
|
-
module Harvesters
|
5
|
-
# Harvests data from TDAR and inserts it into Solr after it has been translated
|
6
|
-
class Tdar < Base
|
7
|
-
def initialize(env = 'development', die_on_failure = false)
|
8
|
-
super env, die_on_failure
|
9
|
-
@page_size = 100
|
10
|
-
@translator = Helpers::IsoToSolr.new :tdar
|
11
|
-
end
|
12
|
-
|
13
|
-
def harvest_and_delete
|
14
|
-
puts "Running harvest of TDAR catalog from #{tdar_url}"
|
15
|
-
super(method(:harvest_tdar_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:TDAR][:long_name]}\"")
|
16
|
-
end
|
17
|
-
|
18
|
-
def harvest_tdar_into_solr
|
19
|
-
start_record = 0
|
20
|
-
total_harvested = 0
|
21
|
-
total_expected = total_results
|
22
|
-
while (entries = get_results_from_tdar(start_record)) && (entries.length > 0)
|
23
|
-
begin
|
24
|
-
insert_solr_docs(get_docs_with_translated_entries_from_tdar(entries))
|
25
|
-
rescue => e
|
26
|
-
puts "ERROR: #{e}\n\n"
|
27
|
-
raise e if @die_on_failure
|
28
|
-
end
|
29
|
-
|
30
|
-
# if we have all the records we expect, don't attempt another request;
|
31
|
-
# it would result in an error
|
32
|
-
total_harvested += entries.length
|
33
|
-
break if total_harvested >= total_expected
|
34
|
-
|
35
|
-
start_record += @page_size
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|
39
|
-
def tdar_url
|
40
|
-
SolrEnvironments[@environment][:tdar_url]
|
41
|
-
end
|
42
|
-
|
43
|
-
def get_results_from_tdar(start_record)
|
44
|
-
get_results(build_request(@page_size, start_record), './/atom:entry', 'application/xml')
|
45
|
-
end
|
46
|
-
|
47
|
-
def get_docs_with_translated_entries_from_tdar(entries)
|
48
|
-
entries.map do |entry|
|
49
|
-
create_new_solr_add_doc_with_child(@translator.translate(entry).root)
|
50
|
-
end
|
51
|
-
end
|
52
|
-
|
53
|
-
def build_request(max_records = '25', start_record = '0')
|
54
|
-
request_url = tdar_url + '?_tDAR.searchType=ACADIS_RSS&'\
|
55
|
-
'resourceTypes=DATASET&'\
|
56
|
-
'groups[0].latitudeLongitudeBoxes[0].maximumLongitude=180&'\
|
57
|
-
'groups[0].latitudeLongitudeBoxes[0].minimumLatitude=45&'\
|
58
|
-
'groups[0].latitudeLongitudeBoxes[0].minimumLongitude=-180&'\
|
59
|
-
'groups[0].latitudeLongitudeBoxes[0].maximumLatitude=90&'\
|
60
|
-
'geoMode=ENVELOPE&'\
|
61
|
-
'recordsPerPage=' + max_records.to_s + '&startRecord=' + start_record.to_s
|
62
|
-
|
63
|
-
request_url
|
64
|
-
end
|
65
|
-
|
66
|
-
def total_results
|
67
|
-
get_results(build_request(0, 0), './/opensearch:totalResults').text.to_i
|
68
|
-
end
|
69
|
-
end
|
70
|
-
end
|
71
|
-
end
|
@@ -1,76 +0,0 @@
|
|
1
|
-
require_relative 'base'
|
2
|
-
require_relative '../helpers/csw_iso_query_builder'
|
3
|
-
|
4
|
-
module SearchSolrTools
|
5
|
-
module Harvesters
|
6
|
-
# Harvests data from USGS and inserts it into Solr after it has been translated
|
7
|
-
class Usgs < Base
|
8
|
-
def initialize(env = 'development', die_on_failure = false)
|
9
|
-
super env, die_on_failure
|
10
|
-
@page_size = 100
|
11
|
-
@translator = Helpers::IsoToSolr.new :usgs
|
12
|
-
end
|
13
|
-
|
14
|
-
def harvest_and_delete
|
15
|
-
puts "Running harvest of USGS catalog from #{usgs_url}"
|
16
|
-
super(method(:harvest_usgs_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:USGS][:long_name]}\"")
|
17
|
-
end
|
18
|
-
|
19
|
-
# get translated entries from USGS and add them to Solr
|
20
|
-
# this is the main entry point for the class
|
21
|
-
def harvest_usgs_into_solr
|
22
|
-
start_index = 1
|
23
|
-
while (entries = get_results_from_usgs(start_index)) && (entries.length > 0)
|
24
|
-
begin
|
25
|
-
insert_solr_docs get_docs_with_translated_entries_from_usgs(entries)
|
26
|
-
rescue => e
|
27
|
-
puts "ERROR: #{e}"
|
28
|
-
raise e if @die_on_failure
|
29
|
-
end
|
30
|
-
start_index += @page_size
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
def usgs_url
|
35
|
-
SolrEnvironments[@environment][:usgs_url]
|
36
|
-
end
|
37
|
-
|
38
|
-
def get_results_from_usgs(start_index)
|
39
|
-
get_results build_csw_request('results', @page_size, start_index), '//gmd:MD_Metadata', ''
|
40
|
-
end
|
41
|
-
|
42
|
-
def get_docs_with_translated_entries_from_usgs(entries)
|
43
|
-
entries.map do |entry|
|
44
|
-
create_new_solr_add_doc_with_child(@translator.translate(entry).root)
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
def build_csw_request(resultType = 'results', maxRecords = '25', startPosition = '1')
|
49
|
-
Helpers::CswIsoQueryBuilder.get_query_string(usgs_url,
|
50
|
-
'resultType' => resultType,
|
51
|
-
'maxRecords' => maxRecords,
|
52
|
-
'startPosition' => startPosition,
|
53
|
-
'TypeNames' => '',
|
54
|
-
'constraint' => bbox_constraint,
|
55
|
-
'outputSchema' => 'http://www.isotc211.org/2005/gmd')
|
56
|
-
end
|
57
|
-
|
58
|
-
def bbox_constraint
|
59
|
-
bbox = {
|
60
|
-
west: '-180',
|
61
|
-
south: '45',
|
62
|
-
east: '180',
|
63
|
-
north: '90'
|
64
|
-
}
|
65
|
-
|
66
|
-
URI.encode '<Filter xmlns:ogc="http://www.opengis.net/ogc" ' \
|
67
|
-
'xmlns:gml="http://www.opengis.net/gml" ' \
|
68
|
-
'xmlns:apiso="http://www.opengis.net/cat/csw/apiso/1.0">' \
|
69
|
-
'<ogc:BBOX><PropertyName>apiso:BoundingBox</PropertyName><gml:Envelope>' \
|
70
|
-
'<gml:lowerCorner>' + bbox[:west] + ' ' + bbox[:south] + '</gml:lowerCorner>' \
|
71
|
-
'<gml:upperCorner>' + bbox[:east] + ' ' + bbox[:north] + '</gml:upperCorner>' \
|
72
|
-
'</gml:Envelope></ogc:BBOX></Filter>'
|
73
|
-
end
|
74
|
-
end
|
75
|
-
end
|
76
|
-
end
|
@@ -1,29 +0,0 @@
|
|
1
|
-
require 'search_solr_tools/helpers/query_builder'
|
2
|
-
|
3
|
-
module SearchSolrTools
|
4
|
-
module Helpers
|
5
|
-
# Constructs the string to query a CSW endpoint
|
6
|
-
class CswIsoQueryBuilder
|
7
|
-
DEFAULT_PARAMS = {
|
8
|
-
service: 'CSW',
|
9
|
-
version: '2.0.2',
|
10
|
-
request: 'GetRecords',
|
11
|
-
'TypeNames' => 'gmd:MD_Metadata',
|
12
|
-
'ElementSetName' => 'full',
|
13
|
-
'resultType' => 'results',
|
14
|
-
'outputFormat' => 'application/xml',
|
15
|
-
'maxRecords' => '25',
|
16
|
-
'startPosition' => '1'
|
17
|
-
}
|
18
|
-
|
19
|
-
def self.get_query_string(url, query_params = {})
|
20
|
-
all_params = query_params(query_params)
|
21
|
-
QueryBuilder.build(all_params).prepend(url)
|
22
|
-
end
|
23
|
-
|
24
|
-
def self.query_params(query_params = {})
|
25
|
-
DEFAULT_PARAMS.merge(query_params)
|
26
|
-
end
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
@@ -1,74 +0,0 @@
|
|
1
|
-
require_relative 'iso_namespaces'
|
2
|
-
require_relative 'iso_to_solr_format'
|
3
|
-
require_relative 'solr_format'
|
4
|
-
|
5
|
-
module SearchSolrTools
|
6
|
-
module Helpers
|
7
|
-
class DataOneFormat < IsoToSolrFormat
|
8
|
-
class << self
|
9
|
-
def date_range(node)
|
10
|
-
{
|
11
|
-
start: SolrFormat.date_str(node.xpath('.//date[@name="beginDate"]').text.strip),
|
12
|
-
end: SolrFormat.date_str(node.xpath('.//date[@name="endDate"]').text.strip)
|
13
|
-
}
|
14
|
-
end
|
15
|
-
|
16
|
-
def bounding_box(node)
|
17
|
-
{
|
18
|
-
north: node.xpath('.//float[@name="northBoundCoord"]').text.strip,
|
19
|
-
south: node.xpath('.//float[@name="southBoundCoord"]').text.strip,
|
20
|
-
east: node.xpath('.//float[@name="eastBoundCoord"]').text.strip,
|
21
|
-
west: node.xpath('.//float[@name="westBoundCoord"]').text.strip
|
22
|
-
}
|
23
|
-
end
|
24
|
-
|
25
|
-
def spatial_display(node)
|
26
|
-
box = bounding_box(node)
|
27
|
-
|
28
|
-
[box[:south], box[:west], box[:north], box[:east]].join(' ')
|
29
|
-
end
|
30
|
-
|
31
|
-
def spatial_index(node)
|
32
|
-
box = bounding_box(node)
|
33
|
-
|
34
|
-
if box[:west] == box[:east] && box[:south] == box[:north]
|
35
|
-
[box[:west], box[:south]]
|
36
|
-
else
|
37
|
-
[box[:west], box[:south], box[:east], box[:north]]
|
38
|
-
end.join(' ')
|
39
|
-
end
|
40
|
-
|
41
|
-
def spatial_area(node)
|
42
|
-
box = bounding_box(node)
|
43
|
-
|
44
|
-
box[:north].to_f - box[:south].to_f
|
45
|
-
end
|
46
|
-
|
47
|
-
def temporal_coverage(node)
|
48
|
-
SolrFormat.temporal_display_str(date_range(node))
|
49
|
-
end
|
50
|
-
|
51
|
-
def temporal_duration(node)
|
52
|
-
dr = date_range(node)
|
53
|
-
end_time = dr[:end].to_s.empty? ? Time.now : Time.parse(dr[:end])
|
54
|
-
SolrFormat.get_temporal_duration(Time.parse(dr[:start]), end_time) unless dr[:start].to_s.empty?
|
55
|
-
end
|
56
|
-
|
57
|
-
def temporal_index_string(node)
|
58
|
-
dr = date_range(node)
|
59
|
-
SolrFormat.temporal_index_str(dr)
|
60
|
-
end
|
61
|
-
|
62
|
-
def facet_spatial_scope(node)
|
63
|
-
box = bounding_box(node)
|
64
|
-
SolrFormat.get_spatial_scope_facet_with_bounding_box(box)
|
65
|
-
end
|
66
|
-
|
67
|
-
def facet_temporal_duration(node)
|
68
|
-
duration = temporal_duration(node)
|
69
|
-
SolrFormat.get_temporal_duration_facet(duration)
|
70
|
-
end
|
71
|
-
end
|
72
|
-
end
|
73
|
-
end
|
74
|
-
end
|
@@ -1,97 +0,0 @@
|
|
1
|
-
require_relative 'selectors'
|
2
|
-
require 'nokogiri'
|
3
|
-
|
4
|
-
module SearchSolrTools
|
5
|
-
module Helpers
|
6
|
-
# Translates ISO nokogiri documents into solr nokogiri documents using a hash driver object.
|
7
|
-
# This class should be constructed passing the selector file hash as a parameter (see selectors.rb).
|
8
|
-
# After creating an instance we call translate with a nokogiri iso document as a parameter.
|
9
|
-
class IsoToSolr
|
10
|
-
def initialize(selector)
|
11
|
-
@fields = SELECTORS[selector]
|
12
|
-
@multiple_whitespace = /\s{2,}/ # save the regex so it is not recompiled every time format_field() is called
|
13
|
-
end
|
14
|
-
|
15
|
-
# this will return a nodeset with all the elements that matched the xpath
|
16
|
-
def eval_xpath(iso_xml_doc, xpath, multivalue, reduce)
|
17
|
-
fields = []
|
18
|
-
begin
|
19
|
-
iso_xml_doc.xpath(xpath, IsoNamespaces.namespaces(iso_xml_doc)).each do |f|
|
20
|
-
fields.push(f)
|
21
|
-
break if multivalue == false && reduce.nil?
|
22
|
-
end
|
23
|
-
rescue
|
24
|
-
fields = []
|
25
|
-
end
|
26
|
-
fields
|
27
|
-
end
|
28
|
-
|
29
|
-
def get_default_values(selector)
|
30
|
-
selector.key?(:default_values) ? selector[:default_values] : ['']
|
31
|
-
end
|
32
|
-
|
33
|
-
def format_text(field)
|
34
|
-
field.respond_to?(:text) ? field.text : field
|
35
|
-
end
|
36
|
-
|
37
|
-
def format_field(selector, field)
|
38
|
-
formatted = selector.key?(:format) ? selector[:format].call(field) : format_text(field) rescue format_text(field)
|
39
|
-
formatted = strip_invalid_utf8_bytes(formatted)
|
40
|
-
formatted.strip! if formatted.respond_to?(:strip!)
|
41
|
-
formatted.gsub!(@multiple_whitespace, ' ') if formatted.respond_to?(:gsub!)
|
42
|
-
formatted
|
43
|
-
end
|
44
|
-
|
45
|
-
def format_fields(selector, fields, reduce = nil)
|
46
|
-
formatted = fields.map { |f| format_field(selector, f) }.flatten
|
47
|
-
formatted = [reduce.call(formatted)] unless reduce.nil?
|
48
|
-
selector[:unique] ? formatted.uniq : formatted
|
49
|
-
end
|
50
|
-
|
51
|
-
def create_solr_fields(iso_xml_doc, selector)
|
52
|
-
selector[:xpaths].each do |xpath|
|
53
|
-
fields = eval_xpath(iso_xml_doc, xpath, selector[:multivalue], selector[:reduce])
|
54
|
-
|
55
|
-
# stop evaluating xpaths once we find data in one of them
|
56
|
-
if fields.size > 0 && fields.any? { |f| strip_invalid_utf8_bytes(f.text).strip.length > 0 }
|
57
|
-
return format_fields(selector, fields, selector[:reduce])
|
58
|
-
end
|
59
|
-
end
|
60
|
-
format_fields(selector, get_default_values(selector))
|
61
|
-
end
|
62
|
-
|
63
|
-
def translate(iso_xml_doc)
|
64
|
-
solr_xml_doc = Nokogiri::XML::Builder.new do |xml|
|
65
|
-
xml.doc_ do
|
66
|
-
build_fields(xml, iso_xml_doc)
|
67
|
-
end
|
68
|
-
end
|
69
|
-
solr_xml_doc.doc
|
70
|
-
end
|
71
|
-
|
72
|
-
def build_fields(xml, iso_xml_doc)
|
73
|
-
@fields.each do |field_name, selector|
|
74
|
-
create_solr_fields(iso_xml_doc, selector).each do |value|
|
75
|
-
if value.is_a? Array
|
76
|
-
value.each do |v|
|
77
|
-
xml.field_({ name: field_name }, v) unless v.nil? || v.eql?('')
|
78
|
-
end
|
79
|
-
else
|
80
|
-
xml.field_({ name: field_name }, value) unless value.nil? || value.eql?('')
|
81
|
-
end
|
82
|
-
end
|
83
|
-
end
|
84
|
-
end
|
85
|
-
|
86
|
-
def strip_invalid_utf8_bytes(text)
|
87
|
-
if text.respond_to?(:encode) && !text.valid_encoding?
|
88
|
-
text.encode!('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
|
89
|
-
end
|
90
|
-
|
91
|
-
text.delete!("\u00BF") if text.respond_to?(:delete!)
|
92
|
-
|
93
|
-
text
|
94
|
-
end
|
95
|
-
end
|
96
|
-
end
|
97
|
-
end
|
@@ -1,197 +0,0 @@
|
|
1
|
-
require 'date'
|
2
|
-
|
3
|
-
require_relative 'iso_namespaces'
|
4
|
-
require_relative 'solr_format'
|
5
|
-
|
6
|
-
module SearchSolrTools
|
7
|
-
module Helpers
|
8
|
-
# Methods for generating formatted strings from ISO xml nodes that can be indexed by SOLR
|
9
|
-
# rubocop:disable ClassLength
|
10
|
-
class IsoToSolrFormat
|
11
|
-
KEYWORDS = proc { |keywords| build_keyword_list keywords }
|
12
|
-
|
13
|
-
SPATIAL_DISPLAY = proc { |node| IsoToSolrFormat.spatial_display_str(node) }
|
14
|
-
SPATIAL_INDEX = proc { |node| IsoToSolrFormat.spatial_index_str(node) }
|
15
|
-
SPATIAL_AREA = proc { |node| IsoToSolrFormat.spatial_area_str(node) }
|
16
|
-
MAX_SPATIAL_AREA = proc { |values| IsoToSolrFormat.get_max_spatial_area(values) }
|
17
|
-
|
18
|
-
FACET_SPONSORED_PROGRAM = proc { |node| IsoToSolrFormat.sponsored_program_facet node }
|
19
|
-
FACET_SPATIAL_COVERAGE = proc { |node| IsoToSolrFormat.get_spatial_facet(node) }
|
20
|
-
FACET_SPATIAL_SCOPE = proc { |node| IsoToSolrFormat.get_spatial_scope_facet(node) }
|
21
|
-
FACET_TEMPORAL_DURATION = proc { |node| IsoToSolrFormat.get_temporal_duration_facet(node) }
|
22
|
-
|
23
|
-
TEMPORAL_DURATION = proc { |node| IsoToSolrFormat.get_temporal_duration(node) }
|
24
|
-
TEMPORAL_INDEX_STRING = proc { |node| IsoToSolrFormat.temporal_index_str node }
|
25
|
-
TEMPORAL_DISPLAY_STRING = proc { |node| IsoToSolrFormat.temporal_display_str node }
|
26
|
-
TEMPORAL_DISPLAY_STRING_FORMATTED = proc { |node| IsoToSolrFormat.temporal_display_str(node, true) }
|
27
|
-
|
28
|
-
DATASET_URL = proc { |node| IsoToSolrFormat.dataset_url(node) }
|
29
|
-
ICES_DATASET_URL = proc { |node| IsoToSolrFormat.ices_dataset_url(node) }
|
30
|
-
EOL_AUTHOR_FORMAT = proc { |node| IsoToSolrFormat.eol_author_format(node) }
|
31
|
-
|
32
|
-
def self.spatial_display_str(box_node)
|
33
|
-
box = bounding_box(box_node)
|
34
|
-
"#{box[:south]} #{box[:west]} #{box[:north]} #{box[:east]}"
|
35
|
-
end
|
36
|
-
|
37
|
-
def self.spatial_index_str(box_node)
|
38
|
-
box = bounding_box(box_node)
|
39
|
-
if box[:west] == box[:east] && box[:south] == box[:north]
|
40
|
-
[box[:west], box[:south]]
|
41
|
-
else
|
42
|
-
[box[:west], box[:south], box[:east], box[:north]]
|
43
|
-
end.join(' ')
|
44
|
-
end
|
45
|
-
|
46
|
-
def self.spatial_area_str(box_node)
|
47
|
-
box = bounding_box(box_node)
|
48
|
-
area = box[:north].to_f - box[:south].to_f
|
49
|
-
area
|
50
|
-
end
|
51
|
-
|
52
|
-
def self.get_max_spatial_area(values)
|
53
|
-
values.map(&:to_f).max
|
54
|
-
end
|
55
|
-
|
56
|
-
def self.get_spatial_facet(box_node)
|
57
|
-
box = bounding_box(box_node)
|
58
|
-
|
59
|
-
if BoundingBoxUtil.box_invalid?(box)
|
60
|
-
facet = nil
|
61
|
-
elsif BoundingBoxUtil.box_global?(box)
|
62
|
-
facet = 'Global'
|
63
|
-
else
|
64
|
-
facet = 'Non Global'
|
65
|
-
end
|
66
|
-
facet
|
67
|
-
end
|
68
|
-
|
69
|
-
def self.get_spatial_scope_facet(box_node)
|
70
|
-
box = bounding_box(box_node)
|
71
|
-
SolrFormat.get_spatial_scope_facet_with_bounding_box(box)
|
72
|
-
end
|
73
|
-
|
74
|
-
def self.temporal_display_str(temporal_node, formatted = false)
|
75
|
-
SolrFormat.temporal_display_str(date_range(temporal_node, formatted))
|
76
|
-
end
|
77
|
-
|
78
|
-
def self.get_temporal_duration(temporal_node)
|
79
|
-
dr = date_range(temporal_node)
|
80
|
-
end_time = dr[:end].to_s.empty? ? Time.now : Time.parse(dr[:end])
|
81
|
-
SolrFormat.get_temporal_duration(Time.parse(dr[:start]), end_time) unless dr[:start].to_s.empty?
|
82
|
-
end
|
83
|
-
|
84
|
-
def self.get_temporal_duration_facet(temporal_node)
|
85
|
-
duration = get_temporal_duration(temporal_node)
|
86
|
-
SolrFormat.get_temporal_duration_facet(duration)
|
87
|
-
end
|
88
|
-
|
89
|
-
def self.temporal_index_str(temporal_node)
|
90
|
-
dr = date_range(temporal_node)
|
91
|
-
SolrFormat.temporal_index_str(dr)
|
92
|
-
end
|
93
|
-
|
94
|
-
def self.sponsored_program_facet(node)
|
95
|
-
long_name = node.xpath('.//gmd:organisationName', IsoNamespaces.namespaces(node)).text.strip
|
96
|
-
short_name = node.xpath('.//gmd:organisationShortName', IsoNamespaces.namespaces(node)).text.strip
|
97
|
-
|
98
|
-
[long_name, short_name].join(' | ')
|
99
|
-
end
|
100
|
-
|
101
|
-
def self.build_keyword_list(keywords)
|
102
|
-
category = keywords.xpath('.//CategoryKeyword').text
|
103
|
-
topic = keywords.xpath('.//TopicKeyword').text
|
104
|
-
term = keywords.xpath('.//TermKeyword').text
|
105
|
-
category << ' > ' << topic << ' > ' << term
|
106
|
-
end
|
107
|
-
|
108
|
-
def self.date_range(temporal_node, formatted = false)
|
109
|
-
start_date = get_first_matching_child(
|
110
|
-
temporal_node,
|
111
|
-
['.//gml:beginPosition', './/BeginningDateTime', './/gco:Date', './/dif:Start_Date']
|
112
|
-
)
|
113
|
-
start_date = '' unless SolrFormat.date?(start_date)
|
114
|
-
start_date = SolrFormat.date_str(start_date) if formatted
|
115
|
-
|
116
|
-
end_date = get_first_matching_child(
|
117
|
-
temporal_node,
|
118
|
-
['.//gml:endPosition', './/EndingDateTime', './/gco:Date', './/dif:Stop_Date']
|
119
|
-
)
|
120
|
-
end_date = '' unless SolrFormat.date?(end_date)
|
121
|
-
end_date = SolrFormat.date_str(end_date) if formatted
|
122
|
-
|
123
|
-
{
|
124
|
-
start: start_date,
|
125
|
-
end: end_date
|
126
|
-
}
|
127
|
-
end
|
128
|
-
|
129
|
-
# Met.no sometimes has bad metadata, such as <gmd:URL>SU-1 (planned activity)</gmd:URL>
|
130
|
-
def self.dataset_url(url_node)
|
131
|
-
url_node.text.strip =~ %r{http[s]?://} ? url_node.text.strip : ''
|
132
|
-
end
|
133
|
-
|
134
|
-
def self.ices_dataset_url(auth_id)
|
135
|
-
'http://geo.ices.dk/geonetwork/srv/en/main.home?uuid=' + auth_id
|
136
|
-
end
|
137
|
-
|
138
|
-
def self.get_first_matching_child(node, paths)
|
139
|
-
matching_nodes = node.at_xpath(paths.join(' | '), IsoNamespaces.namespaces(node))
|
140
|
-
matching_nodes.nil? ? '' : matching_nodes.text
|
141
|
-
end
|
142
|
-
|
143
|
-
def self.bounding_box(box_node)
|
144
|
-
{
|
145
|
-
west: get_bound(box_node, :west),
|
146
|
-
south: get_bound(box_node, :south),
|
147
|
-
east: get_bound(box_node, :east),
|
148
|
-
north: get_bound(box_node, :north)
|
149
|
-
}
|
150
|
-
end
|
151
|
-
|
152
|
-
def self.axis_label(direction)
|
153
|
-
{
|
154
|
-
north: 'Latitude',
|
155
|
-
south: 'Latitude',
|
156
|
-
east: 'Longitude',
|
157
|
-
west: 'Longitude'
|
158
|
-
}[direction]
|
159
|
-
end
|
160
|
-
|
161
|
-
def self.coordinate_boundary(lat_lon)
|
162
|
-
{
|
163
|
-
'Latitude' => 90,
|
164
|
-
'Longitude' => 180
|
165
|
-
}[lat_lon]
|
166
|
-
end
|
167
|
-
|
168
|
-
def self.node_values(box_node, direction, lat_lon)
|
169
|
-
get_first_matching_child(
|
170
|
-
box_node,
|
171
|
-
[
|
172
|
-
"./gmd:#{direction.to_s.downcase}Bounding#{lat_lon}/gco:Decimal",
|
173
|
-
"./gmd:#{direction.to_s.downcase}Bound#{lat_lon}/gco:Decimal",
|
174
|
-
"./#{direction.to_s.capitalize}BoundingCoordinate",
|
175
|
-
"./dif:#{direction.to_s.capitalize}ernmost_#{lat_lon}"
|
176
|
-
]
|
177
|
-
).split(' ')
|
178
|
-
end
|
179
|
-
|
180
|
-
def self.get_bound(box_node, direction)
|
181
|
-
lat_lon = axis_label(direction)
|
182
|
-
|
183
|
-
vals = node_values(box_node, direction, lat_lon)
|
184
|
-
val = vals.first
|
185
|
-
|
186
|
-
boundary = coordinate_boundary(lat_lon)
|
187
|
-
out_of_bounds = boundary < val.to_f.abs
|
188
|
-
|
189
|
-
return '' if vals.empty? || out_of_bounds
|
190
|
-
|
191
|
-
val = -val.to_f if %w(West South).include?(vals.last)
|
192
|
-
|
193
|
-
val.to_f.to_s
|
194
|
-
end
|
195
|
-
end
|
196
|
-
end
|
197
|
-
end
|
@@ -1,61 +0,0 @@
|
|
1
|
-
require 'date'
|
2
|
-
|
3
|
-
require_relative 'iso_namespaces'
|
4
|
-
require_relative 'solr_format'
|
5
|
-
require_relative 'iso_to_solr_format'
|
6
|
-
|
7
|
-
module SearchSolrTools
|
8
|
-
module Helpers
|
9
|
-
class NcdcPaleoFormat < IsoToSolrFormat
|
10
|
-
def self.bounding_box(node)
|
11
|
-
east, north = node.xpath('./ows:UpperCorner').text.split
|
12
|
-
west, south = node.xpath('./ows:LowerCorner').text.split
|
13
|
-
{ north: north, south: south, east: east, west: west }
|
14
|
-
end
|
15
|
-
|
16
|
-
def self.date_range(node, _formatted = false)
|
17
|
-
if node.text.include?('START YEAR')
|
18
|
-
if node.text.include?('AD')
|
19
|
-
format_ad_time(node.text)
|
20
|
-
elsif node.text.include?('yr BP')
|
21
|
-
format_cal_yr_bp_time(node.text)
|
22
|
-
end
|
23
|
-
end
|
24
|
-
end
|
25
|
-
|
26
|
-
def self.format_ad_time(node_text)
|
27
|
-
match = node_text.match(/START YEAR:(?<start>[^*]*)AD\s*\* END YEAR:(?<end>[^*]*)AD/)
|
28
|
-
{
|
29
|
-
start: DateTime.strptime(match[:start].strip, '%Y'),
|
30
|
-
end: DateTime.strptime(match[:end].strip, '%Y')
|
31
|
-
}
|
32
|
-
end
|
33
|
-
|
34
|
-
def self.format_cal_yr_bp_time(node_text)
|
35
|
-
zero_year = 1950
|
36
|
-
match = node_text.match(/START YEAR:(?<start>[^*]*)... yr BP\s*\* END YEAR:(?<end>[^*]*)... yr BP/)
|
37
|
-
{
|
38
|
-
start: DateTime.strptime((-(match[:start].strip.to_i) - zero_year).to_s, '%Y'),
|
39
|
-
end: DateTime.strptime((-(match[:end].strip.to_i) - zero_year).to_s, '%Y')
|
40
|
-
}
|
41
|
-
end
|
42
|
-
|
43
|
-
def self.temporal_index_str(node)
|
44
|
-
range = date_range(node)
|
45
|
-
SolrFormat.temporal_index_str(start: range[:start].to_s, end: range[:end].to_s) unless range.nil?
|
46
|
-
end
|
47
|
-
|
48
|
-
def self.get_temporal_duration(node)
|
49
|
-
range = date_range(node)
|
50
|
-
return if range.to_s.empty?
|
51
|
-
(range[:start] - range[:end]).to_i.abs
|
52
|
-
end
|
53
|
-
|
54
|
-
def self.author(node)
|
55
|
-
return node if node == ''
|
56
|
-
return if node.text.include? ';'
|
57
|
-
node.text
|
58
|
-
end
|
59
|
-
end
|
60
|
-
end
|
61
|
-
end
|
@@ -1,13 +0,0 @@
|
|
1
|
-
module SearchSolrTools
|
2
|
-
module Helpers
|
3
|
-
# Class to build a query string based on a hash of params
|
4
|
-
class QueryBuilder
|
5
|
-
class << self
|
6
|
-
def build(params)
|
7
|
-
param_str = params.map { |k, v| "#{k}=#{v}" }.join('&')
|
8
|
-
"?#{param_str}"
|
9
|
-
end
|
10
|
-
end
|
11
|
-
end
|
12
|
-
end
|
13
|
-
end
|
@@ -1,25 +0,0 @@
|
|
1
|
-
require_relative 'iso_namespaces'
|
2
|
-
require_relative 'iso_to_solr_format'
|
3
|
-
require_relative 'solr_format'
|
4
|
-
|
5
|
-
module SearchSolrTools
|
6
|
-
module Helpers
|
7
|
-
class R2RFormat < IsoToSolrFormat
|
8
|
-
TEMPORAL_INDEX_STRING = proc { |node| R2RFormat.temporal_index_str(node) }
|
9
|
-
TEMPORAL_DISPLAY_STRING = proc { |node| R2RFormat.temporal_display_str(node) }
|
10
|
-
TEMPORAL_DURATION = proc { |node| R2RFormat.get_temporal_duration(node) }
|
11
|
-
FACET_TEMPORAL_DURATION = proc { |node| R2RFormat.get_temporal_duration_facet(node) }
|
12
|
-
|
13
|
-
def self.date_range(temporal_node, _formatted = false)
|
14
|
-
xpath_start = './/gmd:temporalElement/gmd:EX_SpatialTemporalExtent/gmd:extent/'\
|
15
|
-
'gml:TimeInstant[@gml:id="start"]/gml:timePosition'
|
16
|
-
xpath_end = xpath_start.gsub('start', 'end')
|
17
|
-
|
18
|
-
{
|
19
|
-
start: temporal_node.xpath(xpath_start).text,
|
20
|
-
end: temporal_node.xpath(xpath_end).text
|
21
|
-
}
|
22
|
-
end
|
23
|
-
end
|
24
|
-
end
|
25
|
-
end
|