search_solr_tools 6.1.0 → 6.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -2
- data/bin/search_solr_tools +5 -17
- data/lib/search_solr_tools/config/environments.rb +3 -1
- data/lib/search_solr_tools/config/environments.yaml +0 -32
- data/lib/search_solr_tools/errors/harvest_error.rb +44 -31
- data/lib/search_solr_tools/harvesters/auto_suggest.rb +5 -3
- data/lib/search_solr_tools/harvesters/base.rb +21 -20
- data/lib/search_solr_tools/harvesters/nsidc_auto_suggest.rb +7 -5
- data/lib/search_solr_tools/harvesters/nsidc_json.rb +9 -8
- data/lib/search_solr_tools/helpers/bounding_box_util.rb +8 -8
- data/lib/search_solr_tools/helpers/facet_configuration.rb +3 -1
- data/lib/search_solr_tools/helpers/harvest_status.rb +10 -8
- data/lib/search_solr_tools/helpers/iso_namespaces.rb +3 -1
- data/lib/search_solr_tools/helpers/solr_format.rb +25 -45
- data/lib/search_solr_tools/helpers/translate_spatial_coverage.rb +13 -10
- data/lib/search_solr_tools/helpers/translate_temporal_coverage.rb +2 -0
- data/lib/search_solr_tools/translators/nsidc_json.rb +48 -44
- data/lib/search_solr_tools/version.rb +3 -1
- data/lib/search_solr_tools.rb +3 -2
- metadata +3 -45
- data/lib/search_solr_tools/harvesters/adc.rb +0 -49
- data/lib/search_solr_tools/harvesters/ade_auto_suggest.rb +0 -46
- data/lib/search_solr_tools/harvesters/bcodmo.rb +0 -64
- data/lib/search_solr_tools/harvesters/data_one.rb +0 -49
- data/lib/search_solr_tools/harvesters/echo.rb +0 -52
- data/lib/search_solr_tools/harvesters/eol.rb +0 -51
- data/lib/search_solr_tools/harvesters/gtnp.rb +0 -67
- data/lib/search_solr_tools/harvesters/ices.rb +0 -58
- data/lib/search_solr_tools/harvesters/ncdc_paleo.rb +0 -62
- data/lib/search_solr_tools/harvesters/nmi.rb +0 -34
- data/lib/search_solr_tools/harvesters/nodc.rb +0 -75
- data/lib/search_solr_tools/harvesters/oai.rb +0 -62
- data/lib/search_solr_tools/harvesters/pdc.rb +0 -40
- data/lib/search_solr_tools/harvesters/r2r.rb +0 -61
- data/lib/search_solr_tools/harvesters/rda.rb +0 -35
- data/lib/search_solr_tools/harvesters/tdar.rb +0 -71
- data/lib/search_solr_tools/harvesters/usgs.rb +0 -76
- data/lib/search_solr_tools/helpers/csw_iso_query_builder.rb +0 -29
- data/lib/search_solr_tools/helpers/data_one_format.rb +0 -74
- data/lib/search_solr_tools/helpers/iso_to_solr.rb +0 -97
- data/lib/search_solr_tools/helpers/iso_to_solr_format.rb +0 -197
- data/lib/search_solr_tools/helpers/ncdc_paleo_format.rb +0 -61
- data/lib/search_solr_tools/helpers/query_builder.rb +0 -13
- data/lib/search_solr_tools/helpers/r2r_format.rb +0 -25
- data/lib/search_solr_tools/helpers/selectors.rb +0 -22
- data/lib/search_solr_tools/helpers/tdar_format.rb +0 -70
- data/lib/search_solr_tools/helpers/usgs_format.rb +0 -50
- data/lib/search_solr_tools/selectors/adc.rb +0 -96
- data/lib/search_solr_tools/selectors/data_one.rb +0 -96
- data/lib/search_solr_tools/selectors/echo_iso.rb +0 -112
- data/lib/search_solr_tools/selectors/ices_iso.rb +0 -108
- data/lib/search_solr_tools/selectors/ncdc_paleo.rb +0 -90
- data/lib/search_solr_tools/selectors/nmi.rb +0 -107
- data/lib/search_solr_tools/selectors/nodc_iso.rb +0 -108
- data/lib/search_solr_tools/selectors/pdc_iso.rb +0 -109
- data/lib/search_solr_tools/selectors/r2r.rb +0 -115
- data/lib/search_solr_tools/selectors/rda.rb +0 -107
- data/lib/search_solr_tools/selectors/tdar_opensearch.rb +0 -91
- data/lib/search_solr_tools/selectors/usgs_iso.rb +0 -107
- data/lib/search_solr_tools/translators/bcodmo_json.rb +0 -89
- data/lib/search_solr_tools/translators/eol_to_solr.rb +0 -84
- data/lib/search_solr_tools/translators/gtnp_json.rb +0 -59
@@ -1,107 +0,0 @@
|
|
1
|
-
require_relative '../helpers/solr_format'
|
2
|
-
require_relative '../helpers/iso_to_solr_format'
|
3
|
-
require_relative '../helpers/usgs_format'
|
4
|
-
|
5
|
-
module SearchSolrTools
|
6
|
-
module Selectors
|
7
|
-
# The hash contains keys that should map to the fields in the solr schema,
|
8
|
-
# the keys are called selectors and are in charge of selecting the nodes
|
9
|
-
# from the ISO document, applying the default value if none of the xpaths
|
10
|
-
# resolved to a value and formatting the field. xpaths and multivalue are
|
11
|
-
# required, default_value and format are optional
|
12
|
-
USGS = {
|
13
|
-
authoritative_id: {
|
14
|
-
xpaths: ['.//gmd:fileIdentifier/gco:CharacterString'],
|
15
|
-
multivalue: false
|
16
|
-
},
|
17
|
-
title: {
|
18
|
-
xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:title/gco:CharacterString'],
|
19
|
-
multivalue: false
|
20
|
-
},
|
21
|
-
summary: {
|
22
|
-
xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:abstract/gco:CharacterString'],
|
23
|
-
multivalue: false
|
24
|
-
},
|
25
|
-
data_centers: {
|
26
|
-
xpaths: [''],
|
27
|
-
default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:USGS][:long_name]],
|
28
|
-
multivalue: false
|
29
|
-
},
|
30
|
-
authors: {
|
31
|
-
xpaths: [".//gmd:contact/gmd:CI_ResponsibleParty[./gmd:role/gmd:CI_RoleCode[@codeListValue='originator']]/gmd:organisationName/gco:CharacterString"],
|
32
|
-
multivalue: true
|
33
|
-
},
|
34
|
-
keywords: {
|
35
|
-
xpaths: ['.//gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gco:CharacterString'],
|
36
|
-
multivalue: true
|
37
|
-
},
|
38
|
-
last_revision_date: {
|
39
|
-
xpaths: ['.//gmd:dateStamp/gco:DateTime'],
|
40
|
-
default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
|
41
|
-
multivalue: false,
|
42
|
-
format: Helpers::SolrFormat::DATE
|
43
|
-
},
|
44
|
-
dataset_url: {
|
45
|
-
xpaths: ['.//gmd:transferOptions/gmd:MD_DigitalTransferOptions/gmd:onLine/gmd:CI_OnlineResource[contains(./gmd:name/gco:CharacterString/text(),"Summary")]/gmd:linkage/gmd:URL'],
|
46
|
-
multivalue: false
|
47
|
-
},
|
48
|
-
spatial_coverages: {
|
49
|
-
xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
|
50
|
-
multivalue: true,
|
51
|
-
format: Helpers::IsoToSolrFormat::SPATIAL_DISPLAY
|
52
|
-
},
|
53
|
-
spatial: {
|
54
|
-
xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
|
55
|
-
multivalue: true,
|
56
|
-
format: Helpers::IsoToSolrFormat::SPATIAL_INDEX
|
57
|
-
},
|
58
|
-
spatial_area: {
|
59
|
-
xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
|
60
|
-
multivalue: false,
|
61
|
-
reduce: Helpers::IsoToSolrFormat::MAX_SPATIAL_AREA,
|
62
|
-
format: Helpers::IsoToSolrFormat::SPATIAL_AREA
|
63
|
-
},
|
64
|
-
temporal: {
|
65
|
-
xpaths: [".//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:date/gmd:CI_Date[./gmd:dateType/gmd:CI_DateTypeCode[@codeListValue='Time Period']]/gmd:date"],
|
66
|
-
multivalue: true,
|
67
|
-
format: Helpers::UsgsFormat::TEMPORAL_INDEX_STRING
|
68
|
-
},
|
69
|
-
temporal_coverages: {
|
70
|
-
xpaths: [".//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:date/gmd:CI_Date[./gmd:dateType/gmd:CI_DateTypeCode[@codeListValue='Time Period']]/gmd:date"],
|
71
|
-
multivalue: true,
|
72
|
-
format: Helpers::UsgsFormat::TEMPORAL_DISPLAY_STRING
|
73
|
-
},
|
74
|
-
temporal_duration: {
|
75
|
-
xpaths: [".//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:date/gmd:CI_Date[./gmd:dateType/gmd:CI_DateTypeCode[@codeListValue='Time Period']]/gmd:date"],
|
76
|
-
multivalue: false,
|
77
|
-
reduce: Helpers::SolrFormat::REDUCE_TEMPORAL_DURATION,
|
78
|
-
format: Helpers::UsgsFormat::TEMPORAL_DURATION
|
79
|
-
},
|
80
|
-
sensors: {
|
81
|
-
xpaths: [''],
|
82
|
-
multivalue: true
|
83
|
-
},
|
84
|
-
source: {
|
85
|
-
xpaths: [''],
|
86
|
-
default_values: ['ADE'],
|
87
|
-
multivalue: false
|
88
|
-
},
|
89
|
-
facet_data_center: {
|
90
|
-
xpaths: [''],
|
91
|
-
default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:USGS][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:USGS][:short_name]}"],
|
92
|
-
multivalue: false
|
93
|
-
},
|
94
|
-
facet_spatial_scope: {
|
95
|
-
xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
|
96
|
-
multivalue: true,
|
97
|
-
format: Helpers::IsoToSolrFormat::FACET_SPATIAL_SCOPE
|
98
|
-
},
|
99
|
-
facet_temporal_duration: {
|
100
|
-
xpaths: [".//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:date/gmd:CI_Date[./gmd:dateType/gmd:CI_DateTypeCode[@codeListValue='Time Period']]/gmd:date"],
|
101
|
-
default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
|
102
|
-
format: Helpers::UsgsFormat::FACET_TEMPORAL_DURATION,
|
103
|
-
multivalue: true
|
104
|
-
}
|
105
|
-
}
|
106
|
-
end
|
107
|
-
end
|
@@ -1,89 +0,0 @@
|
|
1
|
-
require 'json'
|
2
|
-
require 'rest-client'
|
3
|
-
require 'rgeo/geo_json'
|
4
|
-
require 'rgeo/wkrep/wkt_parser'
|
5
|
-
|
6
|
-
require 'search_solr_tools'
|
7
|
-
require_relative '../helpers/solr_format'
|
8
|
-
require_relative '../helpers/translate_temporal_coverage'
|
9
|
-
require_relative '../helpers/translate_spatial_coverage'
|
10
|
-
|
11
|
-
module SearchSolrTools
|
12
|
-
module Translators
|
13
|
-
# Translates Bcodmo json to solr json format
|
14
|
-
class BcodmoJsonToSolr
|
15
|
-
# rubocop:disable MethodLength
|
16
|
-
# rubocop:disable AbcSize
|
17
|
-
def translate(json_doc, json_record, geometry)
|
18
|
-
originators = json_doc.key?('people') ? JSON.parse(RestClient.get((json_doc['people']))) : []
|
19
|
-
spatial_values = translate_geometry geometry
|
20
|
-
temporal_coverage_values = Helpers::TranslateTemporalCoverage.translate_coverages [{ 'start' => "#{json_record['startDate']}", 'end' => "#{json_record['endDate']}" }]
|
21
|
-
{
|
22
|
-
'title' => json_doc['dataset_name'],
|
23
|
-
'authoritative_id' => json_record['id'] + json_doc['dataset_nid'],
|
24
|
-
'dataset_version' => translate_dataset_version(json_doc['dataset_version']),
|
25
|
-
'data_centers' => Helpers::SolrFormat::DATA_CENTER_NAMES[:BCODMO][:long_name],
|
26
|
-
'facet_data_center' => "#{Helpers::SolrFormat::DATA_CENTER_NAMES[:BCODMO][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:BCODMO][:short_name]}",
|
27
|
-
'summary' => json_doc['dataset_description'].to_s.empty? ? json_doc['dataset_brief_description'] : json_doc['dataset_description'],
|
28
|
-
'temporal_coverages' => temporal_coverage_values['temporal_coverages'],
|
29
|
-
'temporal_duration' => temporal_coverage_values['temporal_duration'],
|
30
|
-
'temporal' => temporal_coverage_values['temporal'],
|
31
|
-
'facet_temporal_duration' => temporal_coverage_values['facet_temporal_duration'],
|
32
|
-
'last_revision_date' => json_doc['dataset_deployment_version_date'].to_s.empty? ? nil : Time.parse(json_doc['dataset_deployment_version_date']).strftime('%Y-%m-%dT%H:%M:%SZ'),
|
33
|
-
'dataset_url' => json_doc['dataset_url'],
|
34
|
-
'source' => 'ADE',
|
35
|
-
'facet_spatial_coverage' => spatial_values[:global_facet],
|
36
|
-
'facet_spatial_scope' => spatial_values[:spatial_scope_facet],
|
37
|
-
'spatial_coverages' => spatial_values[:spatial_display],
|
38
|
-
'spatial_area' => spatial_values[:spatial_area],
|
39
|
-
'spatial' => spatial_values[:spatial_index],
|
40
|
-
'data_access_urls' => json_doc.key?('dataset_deployment_url') ? json_doc['dataset_deployment_url'] : [],
|
41
|
-
'authors' => parse_people(originators)
|
42
|
-
}
|
43
|
-
end
|
44
|
-
# rubocop:enable MethodLength
|
45
|
-
|
46
|
-
def translate_dataset_version(dataset_version)
|
47
|
-
version_translation = dataset_version.to_s.gsub(/\D/, '')
|
48
|
-
version_translation.empty? ? nil : version_translation
|
49
|
-
end
|
50
|
-
|
51
|
-
def parse_people(people_json)
|
52
|
-
people_json.map { |entry| entry['person_name'] } unless people_json.empty?
|
53
|
-
end
|
54
|
-
|
55
|
-
def translate_geometry(wkt_geom)
|
56
|
-
if wkt_geom['geometry']['type'] == 'LineString'
|
57
|
-
wkt_geom['geometry']['type'] = 'MultiPoint'
|
58
|
-
end
|
59
|
-
geometry = RGeo::GeoJSON.decode(wkt_geom).geometry
|
60
|
-
geometry = RGeo::Feature.cast(geometry, RGeo::Feature::MultiPoint)
|
61
|
-
|
62
|
-
# This feed sometimes returns MultiLineString but wrongly calls them 'LineString'
|
63
|
-
# If the above fails, we assume this is why. If the feed gets fixed, this code
|
64
|
-
# should still handle that.
|
65
|
-
if geometry.nil? || geometry.num_geometries == 0
|
66
|
-
# Try to decode as an actual MultiLineString.
|
67
|
-
wkt_geom['geometry']['type'] = 'MultiLineString'
|
68
|
-
geometry = RGeo::GeoJSON.decode(wkt_geom).geometry
|
69
|
-
|
70
|
-
# Convert to a MultiPoint, for passing into the helper functions below.
|
71
|
-
coords = geometry.coordinates.flatten
|
72
|
-
coords = coords.each_slice(2).to_a
|
73
|
-
f = RGeo::Geos.factory
|
74
|
-
points = []
|
75
|
-
coords.each { |x, y| points << f.point(x, y) }
|
76
|
-
geometry = f.multi_point(points)
|
77
|
-
end
|
78
|
-
|
79
|
-
{
|
80
|
-
spatial_display: Helpers::TranslateSpatialCoverage.geojson_to_spatial_display_str([geometry]),
|
81
|
-
spatial_index: Helpers::TranslateSpatialCoverage.geojson_to_spatial_index_str([geometry]),
|
82
|
-
spatial_area: Helpers::TranslateSpatialCoverage.geojson_to_spatial_area([geometry]),
|
83
|
-
global_facet: Helpers::TranslateSpatialCoverage.geojson_to_global_facet([geometry]),
|
84
|
-
spatial_scope_facet: Helpers::TranslateSpatialCoverage.geojson_to_spatial_scope_facet([geometry])
|
85
|
-
}
|
86
|
-
end
|
87
|
-
end
|
88
|
-
end
|
89
|
-
end
|
@@ -1,84 +0,0 @@
|
|
1
|
-
require 'search_solr_tools'
|
2
|
-
require_relative '../helpers/solr_format'
|
3
|
-
require_relative '../helpers/translate_temporal_coverage'
|
4
|
-
require_relative '../helpers/translate_spatial_coverage'
|
5
|
-
require_relative '../helpers/bounding_box_util'
|
6
|
-
|
7
|
-
module SearchSolrTools
|
8
|
-
module Translators
|
9
|
-
# Translates an EOL THREDDS dataset link set into a SOLR json ingest record
|
10
|
-
class EolToSolr
|
11
|
-
# rubocop:disable Metrics/AbcSize
|
12
|
-
# rubocop:disable Metrics/MethodLength
|
13
|
-
|
14
|
-
def translate(title_metadata, dataset_metadata)
|
15
|
-
temporal_coverage_values = Helpers::TranslateTemporalCoverage.translate_coverages get_time_coverages(dataset_metadata)
|
16
|
-
rev_date = dataset_metadata.xpath('//xmlns:date[@type="metadataCreated"]').text
|
17
|
-
geospatial_coverage = parse_geospatial_coverages(dataset_metadata)
|
18
|
-
{
|
19
|
-
'title' => title_metadata.xpath('//xmlns:dataset').first['name'],
|
20
|
-
'authoritative_id' => title_metadata.xpath('//xmlns:dataset').first['ID'],
|
21
|
-
'data_centers' => Helpers::SolrFormat::DATA_CENTER_NAMES[:EOL][:long_name],
|
22
|
-
'facet_data_center' => "#{Helpers::SolrFormat::DATA_CENTER_NAMES[:EOL][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:EOL][:short_name]}",
|
23
|
-
'summary' => dataset_metadata.xpath('//xmlns:documentation[@type="summary"]').text,
|
24
|
-
'temporal_coverages' => temporal_coverage_values['temporal_coverages'],
|
25
|
-
'temporal_duration' => temporal_coverage_values['temporal_duration'],
|
26
|
-
'temporal' => temporal_coverage_values['temporal'],
|
27
|
-
'facet_temporal_duration' => temporal_coverage_values['facet_temporal_duration'],
|
28
|
-
'last_revision_date' => rev_date.empty? ? Helpers::SolrFormat.date_str(DateTime.now) : Helpers::SolrFormat.date_str(rev_date),
|
29
|
-
'source' => 'ADE',
|
30
|
-
'keywords' => dataset_metadata.xpath('//xmlns:keyword').map(&:text),
|
31
|
-
'authors' => dataset_metadata.xpath('//xmlns:contributor[@role="author"]').map { |node| parse_eol_authors(node.text) }.join(', '),
|
32
|
-
'dataset_url' => eol_dataset_url(dataset_metadata),
|
33
|
-
'facet_spatial_coverage' => Helpers::BoundingBoxUtil.box_global?(geospatial_coverage),
|
34
|
-
'facet_spatial_scope' => Helpers::SolrFormat.get_spatial_scope_facet_with_bounding_box(geospatial_coverage),
|
35
|
-
'spatial_coverages' => %i(south west north east).map { |d| geospatial_coverage[d] }.join(' '),
|
36
|
-
'spatial_area' => spatial_coverage_to_spatial_area(geospatial_coverage),
|
37
|
-
'spatial' => %i(west south east north).map { |d| geospatial_coverage[d] }.join(' ')
|
38
|
-
}
|
39
|
-
end
|
40
|
-
|
41
|
-
def eol_dataset_url(node)
|
42
|
-
node.xpath('//xmlns:documentation[@xlink:href]').each do |doc|
|
43
|
-
return doc['xlink:href'] if doc['xlink:href'].match('http://data.eol.ucar.edu/codiac/dss/id=(\S*)')
|
44
|
-
end
|
45
|
-
rescue Nokogiri::XML::XPath::SyntaxError
|
46
|
-
puts "Warning - no documentation URL found in the following node: #{node.to_html}"
|
47
|
-
end
|
48
|
-
|
49
|
-
def parse_eol_authors(author)
|
50
|
-
if author.include?(' AT ') && author.include?(' dot ')
|
51
|
-
author = author[0..author.rindex(',') - 1]
|
52
|
-
end
|
53
|
-
author
|
54
|
-
end
|
55
|
-
|
56
|
-
def get_time_coverages(doc)
|
57
|
-
doc.xpath('//xmlns:timeCoverage').map do |node|
|
58
|
-
{ 'start' => node.xpath('./xmlns:start').text,
|
59
|
-
'end' => node.xpath('./xmlns:end').text }
|
60
|
-
end
|
61
|
-
end
|
62
|
-
|
63
|
-
def spatial_coverage_to_spatial_area(coverage)
|
64
|
-
return if [:north, :south].any? { |x| coverage[x].nil? }
|
65
|
-
coverage[:north].abs - coverage[:south].abs
|
66
|
-
end
|
67
|
-
|
68
|
-
def parse_geospatial_coverages(doc)
|
69
|
-
node = doc.xpath('//xmlns:geospatialCoverage')
|
70
|
-
south = node.xpath('./xmlns:northsouth/xmlns:start').text.to_f
|
71
|
-
north = south + node.xpath('./xmlns:northsouth/xmlns:size').text.to_f
|
72
|
-
west = node.xpath('./xmlns:eastwest/xmlns:start').text.to_f
|
73
|
-
east = west + node.xpath('./xmlns:eastwest/xmlns:size').text.to_f
|
74
|
-
# EOL uses out-of-range east-west values to represent bounding boxes
|
75
|
-
# that cross the date line. For any box with a value out of range,
|
76
|
-
# adjust the east/west value to lie within the -180 to 180 range.
|
77
|
-
east -= 360 if east > 180
|
78
|
-
west += 360 if west < -180
|
79
|
-
|
80
|
-
{ east: east, west: west, north: north, south: south }
|
81
|
-
end
|
82
|
-
end
|
83
|
-
end
|
84
|
-
end
|
@@ -1,59 +0,0 @@
|
|
1
|
-
require 'json'
|
2
|
-
require 'rest-client'
|
3
|
-
require 'rgeo/geo_json'
|
4
|
-
|
5
|
-
require 'search_solr_tools'
|
6
|
-
require_relative '../helpers/solr_format'
|
7
|
-
require_relative '../helpers/translate_temporal_coverage'
|
8
|
-
require_relative '../helpers/translate_spatial_coverage'
|
9
|
-
|
10
|
-
module SearchSolrTools
|
11
|
-
module Translators
|
12
|
-
# Translates GTN-P json to solr json format
|
13
|
-
class GtnpJsonToSolr
|
14
|
-
# rubocop:disable AbcSize
|
15
|
-
def translate(json_doc, json_record)
|
16
|
-
json_geo = json_doc['geo'].nil? ? json_doc['coordinates'] : json_doc['geo']['coordinates']
|
17
|
-
concatenated_name = "#{json_record['title']} - #{json_doc['name']}"
|
18
|
-
spatial_values = translate_geometry json_geo
|
19
|
-
{
|
20
|
-
'title' => concatenated_name,
|
21
|
-
'authoritative_id' => concatenated_name,
|
22
|
-
'data_centers' => Helpers::SolrFormat::DATA_CENTER_NAMES[:GTNP][:long_name],
|
23
|
-
'facet_data_center' => "#{Helpers::SolrFormat::DATA_CENTER_NAMES[:GTNP][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:GTNP][:short_name]}",
|
24
|
-
'summary' => json_record['abstract'].to_s,
|
25
|
-
'dataset_url' => json_doc['link'],
|
26
|
-
'source' => 'ADE',
|
27
|
-
'facet_spatial_scope' => spatial_values[:spatial_scope_facet],
|
28
|
-
'spatial_coverages' => spatial_values[:spatial_display],
|
29
|
-
'spatial_area' => spatial_values[:spatial_area],
|
30
|
-
'spatial' => spatial_values[:spatial_index],
|
31
|
-
'temporal_coverages' => Helpers::SolrFormat::NOT_SPECIFIED,
|
32
|
-
'facet_temporal_duration' => Helpers::SolrFormat::NOT_SPECIFIED,
|
33
|
-
'authors' => parse_people(json_doc)
|
34
|
-
}
|
35
|
-
end
|
36
|
-
|
37
|
-
def parse_people(json_doc)
|
38
|
-
people_found = []
|
39
|
-
return people_found unless json_doc.key?('citation') && json_doc['citation'].key?('contacts')
|
40
|
-
citation = json_doc['citation']
|
41
|
-
citation['contacts'].each do |person|
|
42
|
-
people_found << "#{person['givenName']} #{person['familyName']}"
|
43
|
-
end
|
44
|
-
people_found
|
45
|
-
end
|
46
|
-
|
47
|
-
def translate_geometry(json_geom)
|
48
|
-
geo_string = "{\"type\":\"Point\",\"coordinates\":[#{json_geom['longitude']},#{json_geom['latitude']}]}"
|
49
|
-
geometry = RGeo::GeoJSON.decode(geo_string, json_parser: :json)
|
50
|
-
{
|
51
|
-
spatial_display: Helpers::TranslateSpatialCoverage.geojson_to_spatial_display_str([geometry]),
|
52
|
-
spatial_index: Helpers::TranslateSpatialCoverage.geojson_to_spatial_index_str([geometry]),
|
53
|
-
spatial_area: Helpers::TranslateSpatialCoverage.geojson_to_spatial_area([geometry]),
|
54
|
-
spatial_scope_facet: Helpers::TranslateSpatialCoverage.geojson_to_spatial_scope_facet([geometry])
|
55
|
-
}
|
56
|
-
end
|
57
|
-
end
|
58
|
-
end
|
59
|
-
end
|