search_solr_tools 6.1.0 → 6.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -2
- data/bin/search_solr_tools +5 -17
- data/lib/search_solr_tools/config/environments.rb +3 -1
- data/lib/search_solr_tools/config/environments.yaml +0 -32
- data/lib/search_solr_tools/errors/harvest_error.rb +44 -31
- data/lib/search_solr_tools/harvesters/auto_suggest.rb +5 -3
- data/lib/search_solr_tools/harvesters/base.rb +21 -20
- data/lib/search_solr_tools/harvesters/nsidc_auto_suggest.rb +7 -5
- data/lib/search_solr_tools/harvesters/nsidc_json.rb +9 -8
- data/lib/search_solr_tools/helpers/bounding_box_util.rb +8 -8
- data/lib/search_solr_tools/helpers/facet_configuration.rb +3 -1
- data/lib/search_solr_tools/helpers/harvest_status.rb +10 -8
- data/lib/search_solr_tools/helpers/iso_namespaces.rb +3 -1
- data/lib/search_solr_tools/helpers/solr_format.rb +25 -45
- data/lib/search_solr_tools/helpers/translate_spatial_coverage.rb +13 -10
- data/lib/search_solr_tools/helpers/translate_temporal_coverage.rb +2 -0
- data/lib/search_solr_tools/translators/nsidc_json.rb +48 -44
- data/lib/search_solr_tools/version.rb +3 -1
- data/lib/search_solr_tools.rb +3 -2
- metadata +3 -45
- data/lib/search_solr_tools/harvesters/adc.rb +0 -49
- data/lib/search_solr_tools/harvesters/ade_auto_suggest.rb +0 -46
- data/lib/search_solr_tools/harvesters/bcodmo.rb +0 -64
- data/lib/search_solr_tools/harvesters/data_one.rb +0 -49
- data/lib/search_solr_tools/harvesters/echo.rb +0 -52
- data/lib/search_solr_tools/harvesters/eol.rb +0 -51
- data/lib/search_solr_tools/harvesters/gtnp.rb +0 -67
- data/lib/search_solr_tools/harvesters/ices.rb +0 -58
- data/lib/search_solr_tools/harvesters/ncdc_paleo.rb +0 -62
- data/lib/search_solr_tools/harvesters/nmi.rb +0 -34
- data/lib/search_solr_tools/harvesters/nodc.rb +0 -75
- data/lib/search_solr_tools/harvesters/oai.rb +0 -62
- data/lib/search_solr_tools/harvesters/pdc.rb +0 -40
- data/lib/search_solr_tools/harvesters/r2r.rb +0 -61
- data/lib/search_solr_tools/harvesters/rda.rb +0 -35
- data/lib/search_solr_tools/harvesters/tdar.rb +0 -71
- data/lib/search_solr_tools/harvesters/usgs.rb +0 -76
- data/lib/search_solr_tools/helpers/csw_iso_query_builder.rb +0 -29
- data/lib/search_solr_tools/helpers/data_one_format.rb +0 -74
- data/lib/search_solr_tools/helpers/iso_to_solr.rb +0 -97
- data/lib/search_solr_tools/helpers/iso_to_solr_format.rb +0 -197
- data/lib/search_solr_tools/helpers/ncdc_paleo_format.rb +0 -61
- data/lib/search_solr_tools/helpers/query_builder.rb +0 -13
- data/lib/search_solr_tools/helpers/r2r_format.rb +0 -25
- data/lib/search_solr_tools/helpers/selectors.rb +0 -22
- data/lib/search_solr_tools/helpers/tdar_format.rb +0 -70
- data/lib/search_solr_tools/helpers/usgs_format.rb +0 -50
- data/lib/search_solr_tools/selectors/adc.rb +0 -96
- data/lib/search_solr_tools/selectors/data_one.rb +0 -96
- data/lib/search_solr_tools/selectors/echo_iso.rb +0 -112
- data/lib/search_solr_tools/selectors/ices_iso.rb +0 -108
- data/lib/search_solr_tools/selectors/ncdc_paleo.rb +0 -90
- data/lib/search_solr_tools/selectors/nmi.rb +0 -107
- data/lib/search_solr_tools/selectors/nodc_iso.rb +0 -108
- data/lib/search_solr_tools/selectors/pdc_iso.rb +0 -109
- data/lib/search_solr_tools/selectors/r2r.rb +0 -115
- data/lib/search_solr_tools/selectors/rda.rb +0 -107
- data/lib/search_solr_tools/selectors/tdar_opensearch.rb +0 -91
- data/lib/search_solr_tools/selectors/usgs_iso.rb +0 -107
- data/lib/search_solr_tools/translators/bcodmo_json.rb +0 -89
- data/lib/search_solr_tools/translators/eol_to_solr.rb +0 -84
- data/lib/search_solr_tools/translators/gtnp_json.rb +0 -59
@@ -1,107 +0,0 @@
|
|
1
|
-
require_relative '../helpers/solr_format'
|
2
|
-
require_relative '../helpers/iso_to_solr_format'
|
3
|
-
require_relative '../helpers/usgs_format'
|
4
|
-
|
5
|
-
module SearchSolrTools
|
6
|
-
module Selectors
|
7
|
-
# The hash contains keys that should map to the fields in the solr schema,
|
8
|
-
# the keys are called selectors and are in charge of selecting the nodes
|
9
|
-
# from the ISO document, applying the default value if none of the xpaths
|
10
|
-
# resolved to a value and formatting the field. xpaths and multivalue are
|
11
|
-
# required, default_value and format are optional
|
12
|
-
USGS = {
|
13
|
-
authoritative_id: {
|
14
|
-
xpaths: ['.//gmd:fileIdentifier/gco:CharacterString'],
|
15
|
-
multivalue: false
|
16
|
-
},
|
17
|
-
title: {
|
18
|
-
xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:title/gco:CharacterString'],
|
19
|
-
multivalue: false
|
20
|
-
},
|
21
|
-
summary: {
|
22
|
-
xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:abstract/gco:CharacterString'],
|
23
|
-
multivalue: false
|
24
|
-
},
|
25
|
-
data_centers: {
|
26
|
-
xpaths: [''],
|
27
|
-
default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:USGS][:long_name]],
|
28
|
-
multivalue: false
|
29
|
-
},
|
30
|
-
authors: {
|
31
|
-
xpaths: [".//gmd:contact/gmd:CI_ResponsibleParty[./gmd:role/gmd:CI_RoleCode[@codeListValue='originator']]/gmd:organisationName/gco:CharacterString"],
|
32
|
-
multivalue: true
|
33
|
-
},
|
34
|
-
keywords: {
|
35
|
-
xpaths: ['.//gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gco:CharacterString'],
|
36
|
-
multivalue: true
|
37
|
-
},
|
38
|
-
last_revision_date: {
|
39
|
-
xpaths: ['.//gmd:dateStamp/gco:DateTime'],
|
40
|
-
default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
|
41
|
-
multivalue: false,
|
42
|
-
format: Helpers::SolrFormat::DATE
|
43
|
-
},
|
44
|
-
dataset_url: {
|
45
|
-
xpaths: ['.//gmd:transferOptions/gmd:MD_DigitalTransferOptions/gmd:onLine/gmd:CI_OnlineResource[contains(./gmd:name/gco:CharacterString/text(),"Summary")]/gmd:linkage/gmd:URL'],
|
46
|
-
multivalue: false
|
47
|
-
},
|
48
|
-
spatial_coverages: {
|
49
|
-
xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
|
50
|
-
multivalue: true,
|
51
|
-
format: Helpers::IsoToSolrFormat::SPATIAL_DISPLAY
|
52
|
-
},
|
53
|
-
spatial: {
|
54
|
-
xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
|
55
|
-
multivalue: true,
|
56
|
-
format: Helpers::IsoToSolrFormat::SPATIAL_INDEX
|
57
|
-
},
|
58
|
-
spatial_area: {
|
59
|
-
xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
|
60
|
-
multivalue: false,
|
61
|
-
reduce: Helpers::IsoToSolrFormat::MAX_SPATIAL_AREA,
|
62
|
-
format: Helpers::IsoToSolrFormat::SPATIAL_AREA
|
63
|
-
},
|
64
|
-
temporal: {
|
65
|
-
xpaths: [".//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:date/gmd:CI_Date[./gmd:dateType/gmd:CI_DateTypeCode[@codeListValue='Time Period']]/gmd:date"],
|
66
|
-
multivalue: true,
|
67
|
-
format: Helpers::UsgsFormat::TEMPORAL_INDEX_STRING
|
68
|
-
},
|
69
|
-
temporal_coverages: {
|
70
|
-
xpaths: [".//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:date/gmd:CI_Date[./gmd:dateType/gmd:CI_DateTypeCode[@codeListValue='Time Period']]/gmd:date"],
|
71
|
-
multivalue: true,
|
72
|
-
format: Helpers::UsgsFormat::TEMPORAL_DISPLAY_STRING
|
73
|
-
},
|
74
|
-
temporal_duration: {
|
75
|
-
xpaths: [".//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:date/gmd:CI_Date[./gmd:dateType/gmd:CI_DateTypeCode[@codeListValue='Time Period']]/gmd:date"],
|
76
|
-
multivalue: false,
|
77
|
-
reduce: Helpers::SolrFormat::REDUCE_TEMPORAL_DURATION,
|
78
|
-
format: Helpers::UsgsFormat::TEMPORAL_DURATION
|
79
|
-
},
|
80
|
-
sensors: {
|
81
|
-
xpaths: [''],
|
82
|
-
multivalue: true
|
83
|
-
},
|
84
|
-
source: {
|
85
|
-
xpaths: [''],
|
86
|
-
default_values: ['ADE'],
|
87
|
-
multivalue: false
|
88
|
-
},
|
89
|
-
facet_data_center: {
|
90
|
-
xpaths: [''],
|
91
|
-
default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:USGS][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:USGS][:short_name]}"],
|
92
|
-
multivalue: false
|
93
|
-
},
|
94
|
-
facet_spatial_scope: {
|
95
|
-
xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
|
96
|
-
multivalue: true,
|
97
|
-
format: Helpers::IsoToSolrFormat::FACET_SPATIAL_SCOPE
|
98
|
-
},
|
99
|
-
facet_temporal_duration: {
|
100
|
-
xpaths: [".//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:date/gmd:CI_Date[./gmd:dateType/gmd:CI_DateTypeCode[@codeListValue='Time Period']]/gmd:date"],
|
101
|
-
default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
|
102
|
-
format: Helpers::UsgsFormat::FACET_TEMPORAL_DURATION,
|
103
|
-
multivalue: true
|
104
|
-
}
|
105
|
-
}
|
106
|
-
end
|
107
|
-
end
|
@@ -1,89 +0,0 @@
|
|
1
|
-
require 'json'
|
2
|
-
require 'rest-client'
|
3
|
-
require 'rgeo/geo_json'
|
4
|
-
require 'rgeo/wkrep/wkt_parser'
|
5
|
-
|
6
|
-
require 'search_solr_tools'
|
7
|
-
require_relative '../helpers/solr_format'
|
8
|
-
require_relative '../helpers/translate_temporal_coverage'
|
9
|
-
require_relative '../helpers/translate_spatial_coverage'
|
10
|
-
|
11
|
-
module SearchSolrTools
|
12
|
-
module Translators
|
13
|
-
# Translates Bcodmo json to solr json format
|
14
|
-
class BcodmoJsonToSolr
|
15
|
-
# rubocop:disable MethodLength
|
16
|
-
# rubocop:disable AbcSize
|
17
|
-
def translate(json_doc, json_record, geometry)
|
18
|
-
originators = json_doc.key?('people') ? JSON.parse(RestClient.get((json_doc['people']))) : []
|
19
|
-
spatial_values = translate_geometry geometry
|
20
|
-
temporal_coverage_values = Helpers::TranslateTemporalCoverage.translate_coverages [{ 'start' => "#{json_record['startDate']}", 'end' => "#{json_record['endDate']}" }]
|
21
|
-
{
|
22
|
-
'title' => json_doc['dataset_name'],
|
23
|
-
'authoritative_id' => json_record['id'] + json_doc['dataset_nid'],
|
24
|
-
'dataset_version' => translate_dataset_version(json_doc['dataset_version']),
|
25
|
-
'data_centers' => Helpers::SolrFormat::DATA_CENTER_NAMES[:BCODMO][:long_name],
|
26
|
-
'facet_data_center' => "#{Helpers::SolrFormat::DATA_CENTER_NAMES[:BCODMO][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:BCODMO][:short_name]}",
|
27
|
-
'summary' => json_doc['dataset_description'].to_s.empty? ? json_doc['dataset_brief_description'] : json_doc['dataset_description'],
|
28
|
-
'temporal_coverages' => temporal_coverage_values['temporal_coverages'],
|
29
|
-
'temporal_duration' => temporal_coverage_values['temporal_duration'],
|
30
|
-
'temporal' => temporal_coverage_values['temporal'],
|
31
|
-
'facet_temporal_duration' => temporal_coverage_values['facet_temporal_duration'],
|
32
|
-
'last_revision_date' => json_doc['dataset_deployment_version_date'].to_s.empty? ? nil : Time.parse(json_doc['dataset_deployment_version_date']).strftime('%Y-%m-%dT%H:%M:%SZ'),
|
33
|
-
'dataset_url' => json_doc['dataset_url'],
|
34
|
-
'source' => 'ADE',
|
35
|
-
'facet_spatial_coverage' => spatial_values[:global_facet],
|
36
|
-
'facet_spatial_scope' => spatial_values[:spatial_scope_facet],
|
37
|
-
'spatial_coverages' => spatial_values[:spatial_display],
|
38
|
-
'spatial_area' => spatial_values[:spatial_area],
|
39
|
-
'spatial' => spatial_values[:spatial_index],
|
40
|
-
'data_access_urls' => json_doc.key?('dataset_deployment_url') ? json_doc['dataset_deployment_url'] : [],
|
41
|
-
'authors' => parse_people(originators)
|
42
|
-
}
|
43
|
-
end
|
44
|
-
# rubocop:enable MethodLength
|
45
|
-
|
46
|
-
def translate_dataset_version(dataset_version)
|
47
|
-
version_translation = dataset_version.to_s.gsub(/\D/, '')
|
48
|
-
version_translation.empty? ? nil : version_translation
|
49
|
-
end
|
50
|
-
|
51
|
-
def parse_people(people_json)
|
52
|
-
people_json.map { |entry| entry['person_name'] } unless people_json.empty?
|
53
|
-
end
|
54
|
-
|
55
|
-
def translate_geometry(wkt_geom)
|
56
|
-
if wkt_geom['geometry']['type'] == 'LineString'
|
57
|
-
wkt_geom['geometry']['type'] = 'MultiPoint'
|
58
|
-
end
|
59
|
-
geometry = RGeo::GeoJSON.decode(wkt_geom).geometry
|
60
|
-
geometry = RGeo::Feature.cast(geometry, RGeo::Feature::MultiPoint)
|
61
|
-
|
62
|
-
# This feed sometimes returns MultiLineString but wrongly calls them 'LineString'
|
63
|
-
# If the above fails, we assume this is why. If the feed gets fixed, this code
|
64
|
-
# should still handle that.
|
65
|
-
if geometry.nil? || geometry.num_geometries == 0
|
66
|
-
# Try to decode as an actual MultiLineString.
|
67
|
-
wkt_geom['geometry']['type'] = 'MultiLineString'
|
68
|
-
geometry = RGeo::GeoJSON.decode(wkt_geom).geometry
|
69
|
-
|
70
|
-
# Convert to a MultiPoint, for passing into the helper functions below.
|
71
|
-
coords = geometry.coordinates.flatten
|
72
|
-
coords = coords.each_slice(2).to_a
|
73
|
-
f = RGeo::Geos.factory
|
74
|
-
points = []
|
75
|
-
coords.each { |x, y| points << f.point(x, y) }
|
76
|
-
geometry = f.multi_point(points)
|
77
|
-
end
|
78
|
-
|
79
|
-
{
|
80
|
-
spatial_display: Helpers::TranslateSpatialCoverage.geojson_to_spatial_display_str([geometry]),
|
81
|
-
spatial_index: Helpers::TranslateSpatialCoverage.geojson_to_spatial_index_str([geometry]),
|
82
|
-
spatial_area: Helpers::TranslateSpatialCoverage.geojson_to_spatial_area([geometry]),
|
83
|
-
global_facet: Helpers::TranslateSpatialCoverage.geojson_to_global_facet([geometry]),
|
84
|
-
spatial_scope_facet: Helpers::TranslateSpatialCoverage.geojson_to_spatial_scope_facet([geometry])
|
85
|
-
}
|
86
|
-
end
|
87
|
-
end
|
88
|
-
end
|
89
|
-
end
|
@@ -1,84 +0,0 @@
|
|
1
|
-
require 'search_solr_tools'
|
2
|
-
require_relative '../helpers/solr_format'
|
3
|
-
require_relative '../helpers/translate_temporal_coverage'
|
4
|
-
require_relative '../helpers/translate_spatial_coverage'
|
5
|
-
require_relative '../helpers/bounding_box_util'
|
6
|
-
|
7
|
-
module SearchSolrTools
|
8
|
-
module Translators
|
9
|
-
# Translates an EOL THREDDS dataset link set into a SOLR json ingest record
|
10
|
-
class EolToSolr
|
11
|
-
# rubocop:disable Metrics/AbcSize
|
12
|
-
# rubocop:disable Metrics/MethodLength
|
13
|
-
|
14
|
-
def translate(title_metadata, dataset_metadata)
|
15
|
-
temporal_coverage_values = Helpers::TranslateTemporalCoverage.translate_coverages get_time_coverages(dataset_metadata)
|
16
|
-
rev_date = dataset_metadata.xpath('//xmlns:date[@type="metadataCreated"]').text
|
17
|
-
geospatial_coverage = parse_geospatial_coverages(dataset_metadata)
|
18
|
-
{
|
19
|
-
'title' => title_metadata.xpath('//xmlns:dataset').first['name'],
|
20
|
-
'authoritative_id' => title_metadata.xpath('//xmlns:dataset').first['ID'],
|
21
|
-
'data_centers' => Helpers::SolrFormat::DATA_CENTER_NAMES[:EOL][:long_name],
|
22
|
-
'facet_data_center' => "#{Helpers::SolrFormat::DATA_CENTER_NAMES[:EOL][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:EOL][:short_name]}",
|
23
|
-
'summary' => dataset_metadata.xpath('//xmlns:documentation[@type="summary"]').text,
|
24
|
-
'temporal_coverages' => temporal_coverage_values['temporal_coverages'],
|
25
|
-
'temporal_duration' => temporal_coverage_values['temporal_duration'],
|
26
|
-
'temporal' => temporal_coverage_values['temporal'],
|
27
|
-
'facet_temporal_duration' => temporal_coverage_values['facet_temporal_duration'],
|
28
|
-
'last_revision_date' => rev_date.empty? ? Helpers::SolrFormat.date_str(DateTime.now) : Helpers::SolrFormat.date_str(rev_date),
|
29
|
-
'source' => 'ADE',
|
30
|
-
'keywords' => dataset_metadata.xpath('//xmlns:keyword').map(&:text),
|
31
|
-
'authors' => dataset_metadata.xpath('//xmlns:contributor[@role="author"]').map { |node| parse_eol_authors(node.text) }.join(', '),
|
32
|
-
'dataset_url' => eol_dataset_url(dataset_metadata),
|
33
|
-
'facet_spatial_coverage' => Helpers::BoundingBoxUtil.box_global?(geospatial_coverage),
|
34
|
-
'facet_spatial_scope' => Helpers::SolrFormat.get_spatial_scope_facet_with_bounding_box(geospatial_coverage),
|
35
|
-
'spatial_coverages' => %i(south west north east).map { |d| geospatial_coverage[d] }.join(' '),
|
36
|
-
'spatial_area' => spatial_coverage_to_spatial_area(geospatial_coverage),
|
37
|
-
'spatial' => %i(west south east north).map { |d| geospatial_coverage[d] }.join(' ')
|
38
|
-
}
|
39
|
-
end
|
40
|
-
|
41
|
-
def eol_dataset_url(node)
|
42
|
-
node.xpath('//xmlns:documentation[@xlink:href]').each do |doc|
|
43
|
-
return doc['xlink:href'] if doc['xlink:href'].match('http://data.eol.ucar.edu/codiac/dss/id=(\S*)')
|
44
|
-
end
|
45
|
-
rescue Nokogiri::XML::XPath::SyntaxError
|
46
|
-
puts "Warning - no documentation URL found in the following node: #{node.to_html}"
|
47
|
-
end
|
48
|
-
|
49
|
-
def parse_eol_authors(author)
|
50
|
-
if author.include?(' AT ') && author.include?(' dot ')
|
51
|
-
author = author[0..author.rindex(',') - 1]
|
52
|
-
end
|
53
|
-
author
|
54
|
-
end
|
55
|
-
|
56
|
-
def get_time_coverages(doc)
|
57
|
-
doc.xpath('//xmlns:timeCoverage').map do |node|
|
58
|
-
{ 'start' => node.xpath('./xmlns:start').text,
|
59
|
-
'end' => node.xpath('./xmlns:end').text }
|
60
|
-
end
|
61
|
-
end
|
62
|
-
|
63
|
-
def spatial_coverage_to_spatial_area(coverage)
|
64
|
-
return if [:north, :south].any? { |x| coverage[x].nil? }
|
65
|
-
coverage[:north].abs - coverage[:south].abs
|
66
|
-
end
|
67
|
-
|
68
|
-
def parse_geospatial_coverages(doc)
|
69
|
-
node = doc.xpath('//xmlns:geospatialCoverage')
|
70
|
-
south = node.xpath('./xmlns:northsouth/xmlns:start').text.to_f
|
71
|
-
north = south + node.xpath('./xmlns:northsouth/xmlns:size').text.to_f
|
72
|
-
west = node.xpath('./xmlns:eastwest/xmlns:start').text.to_f
|
73
|
-
east = west + node.xpath('./xmlns:eastwest/xmlns:size').text.to_f
|
74
|
-
# EOL uses out-of-range east-west values to represent bounding boxes
|
75
|
-
# that cross the date line. For any box with a value out of range,
|
76
|
-
# adjust the east/west value to lie within the -180 to 180 range.
|
77
|
-
east -= 360 if east > 180
|
78
|
-
west += 360 if west < -180
|
79
|
-
|
80
|
-
{ east: east, west: west, north: north, south: south }
|
81
|
-
end
|
82
|
-
end
|
83
|
-
end
|
84
|
-
end
|
@@ -1,59 +0,0 @@
|
|
1
|
-
require 'json'
|
2
|
-
require 'rest-client'
|
3
|
-
require 'rgeo/geo_json'
|
4
|
-
|
5
|
-
require 'search_solr_tools'
|
6
|
-
require_relative '../helpers/solr_format'
|
7
|
-
require_relative '../helpers/translate_temporal_coverage'
|
8
|
-
require_relative '../helpers/translate_spatial_coverage'
|
9
|
-
|
10
|
-
module SearchSolrTools
|
11
|
-
module Translators
|
12
|
-
# Translates GTN-P json to solr json format
|
13
|
-
class GtnpJsonToSolr
|
14
|
-
# rubocop:disable AbcSize
|
15
|
-
def translate(json_doc, json_record)
|
16
|
-
json_geo = json_doc['geo'].nil? ? json_doc['coordinates'] : json_doc['geo']['coordinates']
|
17
|
-
concatenated_name = "#{json_record['title']} - #{json_doc['name']}"
|
18
|
-
spatial_values = translate_geometry json_geo
|
19
|
-
{
|
20
|
-
'title' => concatenated_name,
|
21
|
-
'authoritative_id' => concatenated_name,
|
22
|
-
'data_centers' => Helpers::SolrFormat::DATA_CENTER_NAMES[:GTNP][:long_name],
|
23
|
-
'facet_data_center' => "#{Helpers::SolrFormat::DATA_CENTER_NAMES[:GTNP][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:GTNP][:short_name]}",
|
24
|
-
'summary' => json_record['abstract'].to_s,
|
25
|
-
'dataset_url' => json_doc['link'],
|
26
|
-
'source' => 'ADE',
|
27
|
-
'facet_spatial_scope' => spatial_values[:spatial_scope_facet],
|
28
|
-
'spatial_coverages' => spatial_values[:spatial_display],
|
29
|
-
'spatial_area' => spatial_values[:spatial_area],
|
30
|
-
'spatial' => spatial_values[:spatial_index],
|
31
|
-
'temporal_coverages' => Helpers::SolrFormat::NOT_SPECIFIED,
|
32
|
-
'facet_temporal_duration' => Helpers::SolrFormat::NOT_SPECIFIED,
|
33
|
-
'authors' => parse_people(json_doc)
|
34
|
-
}
|
35
|
-
end
|
36
|
-
|
37
|
-
def parse_people(json_doc)
|
38
|
-
people_found = []
|
39
|
-
return people_found unless json_doc.key?('citation') && json_doc['citation'].key?('contacts')
|
40
|
-
citation = json_doc['citation']
|
41
|
-
citation['contacts'].each do |person|
|
42
|
-
people_found << "#{person['givenName']} #{person['familyName']}"
|
43
|
-
end
|
44
|
-
people_found
|
45
|
-
end
|
46
|
-
|
47
|
-
def translate_geometry(json_geom)
|
48
|
-
geo_string = "{\"type\":\"Point\",\"coordinates\":[#{json_geom['longitude']},#{json_geom['latitude']}]}"
|
49
|
-
geometry = RGeo::GeoJSON.decode(geo_string, json_parser: :json)
|
50
|
-
{
|
51
|
-
spatial_display: Helpers::TranslateSpatialCoverage.geojson_to_spatial_display_str([geometry]),
|
52
|
-
spatial_index: Helpers::TranslateSpatialCoverage.geojson_to_spatial_index_str([geometry]),
|
53
|
-
spatial_area: Helpers::TranslateSpatialCoverage.geojson_to_spatial_area([geometry]),
|
54
|
-
spatial_scope_facet: Helpers::TranslateSpatialCoverage.geojson_to_spatial_scope_facet([geometry])
|
55
|
-
}
|
56
|
-
end
|
57
|
-
end
|
58
|
-
end
|
59
|
-
end
|