search_solr_tools 3.3.3 → 3.3.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +0 -6
- data/bin/search_solr_tools +1 -0
- data/lib/search_solr_tools/config/environments.yaml +3 -0
- data/lib/search_solr_tools/harvesters/gtnp.rb +66 -0
- data/lib/search_solr_tools/helpers/solr_format.rb +1 -0
- data/lib/search_solr_tools/translators/gtnp_json.rb +55 -0
- data/lib/search_solr_tools/version.rb +1 -1
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b659f88de85b827d7a4f56883f1c0b781cc4fea8
|
4
|
+
data.tar.gz: 68c5c487467f2d2cffd12dfc780ce35248cc456f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4192221ae83802c9411d6af1aaa80e61bdeb8d67a96cf2bf8746c70199359e768e9b198be1d6ea09b0e098cb8116739fe2219d90a9e80a489b8ec2b20e8bea33
|
7
|
+
data.tar.gz: 515caf93f828b00e9c843dbb3447ad39b8b9bc7f73c0e33823d2c6e9be6a53c1aa63d23979962c2b9f3f15f76fcbab1ca3c15790d9d2ee7807c0a43cd4e3f9be
|
data/CHANGELOG.md
CHANGED
data/bin/search_solr_tools
CHANGED
@@ -71,6 +71,7 @@ class SolrHarvestCLI < Thor
|
|
71
71
|
'data_one' => SearchSolrTools::Harvesters::DataOne,
|
72
72
|
'echo' => SearchSolrTools::Harvesters::Echo,
|
73
73
|
'eol' => SearchSolrTools::Harvesters::Eol,
|
74
|
+
'gtnp' => SearchSolrTools::Harvesters::GtnP,
|
74
75
|
'ices' => SearchSolrTools::Harvesters::Ices,
|
75
76
|
'ncdc_paleo' => SearchSolrTools::Harvesters::NcdcPaleo,
|
76
77
|
'nmi' => SearchSolrTools::Harvesters::Nmi,
|
@@ -7,6 +7,9 @@
|
|
7
7
|
:cisl_url: https://www.aoncadis.org/oai/repository
|
8
8
|
:data_one_url: https://cn.dataone.org/cn/v1/query/solr/select?q=northBoundCoord:%5B45.0%20TO%2090.0%5D
|
9
9
|
:echo_url: https://api.echo.nasa.gov/catalog-rest/echo_catalog/datasets.echo10
|
10
|
+
:gtnp:
|
11
|
+
- http://www.gtnpdatabase.org/rest/boreholes/json
|
12
|
+
- http://www.gtnpdatabase.org/rest/activelayers/json
|
10
13
|
:ices_url: http://geo.ices.dk/geonetwork/srv/en/csw
|
11
14
|
:ncdc_paleo_url: http://gis.ncdc.noaa.gov/gptpaleo/csw
|
12
15
|
:nmi_url: http://access.met.no/metamod/oai
|
@@ -0,0 +1,66 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'rest-client'
|
3
|
+
|
4
|
+
module SearchSolrTools
|
5
|
+
module Harvesters
|
6
|
+
# Harvests data from GTN-P endpoints, translates and adds it to solr
|
7
|
+
class GtnP < Base
|
8
|
+
def initialize(env = 'development', die_on_failure = false)
|
9
|
+
super env, die_on_failure
|
10
|
+
@translator = Translators::GtnpJsonToSolr.new
|
11
|
+
end
|
12
|
+
|
13
|
+
def gtnp_service_urls
|
14
|
+
json_records = []
|
15
|
+
SearchSolrTools::SolrEnvironments[:common][:gtnp].flat_map do |endpoint|
|
16
|
+
record = request_json(endpoint)
|
17
|
+
json_records << record
|
18
|
+
end
|
19
|
+
json_records
|
20
|
+
end
|
21
|
+
|
22
|
+
def harvest_and_delete
|
23
|
+
puts 'Running harvest of GTN-P catalog using the following configured GTN-P URLs:'
|
24
|
+
SearchSolrTools::SolrEnvironments[:common][:gtnp].each { |x| puts x }
|
25
|
+
super(method(:harvest_gtnp_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:GTNP][:long_name]}\"")
|
26
|
+
end
|
27
|
+
|
28
|
+
def harvest_gtnp_into_solr
|
29
|
+
result = translate_gtnp
|
30
|
+
insert_solr_docs result[:add_docs], Base::JSON_CONTENT_TYPE
|
31
|
+
fail 'Failed to harvest some records from the provider' if result[:failure_ids].length > 0
|
32
|
+
end
|
33
|
+
|
34
|
+
def translate_gtnp
|
35
|
+
documents = []
|
36
|
+
failure_ids = []
|
37
|
+
gtnp_records = gtnp_service_urls
|
38
|
+
gtnp_records.each do |record|
|
39
|
+
results = parse_record(record)
|
40
|
+
results[:documents].each { |d| documents << d }
|
41
|
+
results[:failure_ids].each { |id| failure_ids << id }
|
42
|
+
end
|
43
|
+
{ add_docs: documents, failure_ids: failure_ids }
|
44
|
+
end
|
45
|
+
|
46
|
+
def request_json(url)
|
47
|
+
JSON.parse(RestClient.get(url))
|
48
|
+
end
|
49
|
+
|
50
|
+
def parse_record(record)
|
51
|
+
documents = []
|
52
|
+
failure_ids = []
|
53
|
+
begin
|
54
|
+
record.drop(1).each do |dataset|
|
55
|
+
trans_doc = @translator.translate(dataset, record[0])
|
56
|
+
documents << { 'add' => { 'doc' => trans_doc } }
|
57
|
+
end
|
58
|
+
rescue => e
|
59
|
+
puts "Failed to add record #{record[0][:title]} with error #{e} (#{e.message}) : #{e.backtrace.join("\n")}"
|
60
|
+
failure_ids << record[0][:title]
|
61
|
+
end
|
62
|
+
{ documents: documents, failure_ids: failure_ids }
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -14,6 +14,7 @@ module SearchSolrTools
|
|
14
14
|
DATA_ONE: { short_name: 'DataONE', long_name: 'DataONE' },
|
15
15
|
ECHO: { short_name: 'NASA ECHO', long_name: 'NASA Earth Observing System (EOS) Clearing House (ECHO)' },
|
16
16
|
EOL: { short_name: 'UCAR NCAR EOL', long_name: 'UCAR NCAR - Earth Observing Laboratory' },
|
17
|
+
GTNP: { short_name: 'GTN-P', long_name: 'Global Terrestrial Network for Permafrost (GTN-P)' },
|
17
18
|
ICES: { short_name: 'ICES', long_name: 'International Council for the Exploration of the Sea' },
|
18
19
|
NCDC_PALEO: { short_name: 'NOAA WDS Paleo', long_name: 'NOAA’s National Centers for Environmental Information, World Data Service for Paleoclimatology' },
|
19
20
|
NMI: { short_name: 'Met.no', long_name: 'Norwegian Meteorological Institute' },
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'rest-client'
|
3
|
+
require 'rgeo/geo_json'
|
4
|
+
|
5
|
+
require 'search_solr_tools'
|
6
|
+
|
7
|
+
module SearchSolrTools
|
8
|
+
module Translators
|
9
|
+
# Translates GTN-P json to solr json format
|
10
|
+
class GtnpJsonToSolr
|
11
|
+
# rubocop:disable AbcSize
|
12
|
+
def translate(json_doc, json_record)
|
13
|
+
json_geo = json_doc['geo'].nil? ? json_doc['coordinates'] : json_doc['geo']['coordinates']
|
14
|
+
concatenated_name = "#{json_record['title']} - #{json_doc['name']}"
|
15
|
+
spatial_values = translate_geometry json_geo
|
16
|
+
{
|
17
|
+
'title' => concatenated_name,
|
18
|
+
'authoritative_id' => concatenated_name,
|
19
|
+
'data_centers' => Helpers::SolrFormat::DATA_CENTER_NAMES[:GTNP][:long_name],
|
20
|
+
'facet_data_center' => "#{Helpers::SolrFormat::DATA_CENTER_NAMES[:GTNP][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:GTNP][:short_name]}",
|
21
|
+
'summary' => json_record['abstract'].to_s,
|
22
|
+
'dataset_url' => json_doc['link'],
|
23
|
+
'source' => 'ADE',
|
24
|
+
'facet_spatial_scope' => spatial_values[:spatial_scope_facet],
|
25
|
+
'spatial_coverages' => spatial_values[:spatial_display],
|
26
|
+
'spatial_area' => spatial_values[:spatial_area],
|
27
|
+
'spatial' => spatial_values[:spatial_index],
|
28
|
+
'temporal_coverages' => Helpers::SolrFormat::NOT_SPECIFIED,
|
29
|
+
'authors' => parse_people(json_doc)
|
30
|
+
}
|
31
|
+
end
|
32
|
+
|
33
|
+
def parse_people(json_doc)
|
34
|
+
people_found = []
|
35
|
+
return people_found unless json_doc.key?('citation') && json_doc['citation'].key?('contacts')
|
36
|
+
citation = json_doc['citation']
|
37
|
+
citation['contacts'].each do |person|
|
38
|
+
people_found << "#{person['givenName']} #{person['familyName']}"
|
39
|
+
end
|
40
|
+
people_found
|
41
|
+
end
|
42
|
+
|
43
|
+
def translate_geometry(json_geom)
|
44
|
+
geo_string = "{\"type\":\"Point\",\"coordinates\":[#{json_geom['longitude']},#{json_geom['latitude']}]}"
|
45
|
+
geometry = RGeo::GeoJSON.decode(geo_string, json_parser: :json)
|
46
|
+
{
|
47
|
+
spatial_display: Helpers::TranslateSpatialCoverage.geojson_to_spatial_display_str([geometry]),
|
48
|
+
spatial_index: Helpers::TranslateSpatialCoverage.geojson_to_spatial_index_str([geometry]),
|
49
|
+
spatial_area: Helpers::TranslateSpatialCoverage.geojson_to_spatial_area([geometry]),
|
50
|
+
spatial_scope_facet: Helpers::TranslateSpatialCoverage.geojson_to_spatial_scope_facet([geometry])
|
51
|
+
}
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: search_solr_tools
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.3.
|
4
|
+
version: 3.3.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Chris Chalstrom
|
@@ -12,7 +12,7 @@ authors:
|
|
12
12
|
autorequire:
|
13
13
|
bindir: bin
|
14
14
|
cert_chain: []
|
15
|
-
date: 2016-
|
15
|
+
date: 2016-02-08 00:00:00.000000000 Z
|
16
16
|
dependencies:
|
17
17
|
- !ruby/object:Gem::Dependency
|
18
18
|
name: iso8601
|
@@ -282,6 +282,7 @@ files:
|
|
282
282
|
- lib/search_solr_tools/harvesters/data_one.rb
|
283
283
|
- lib/search_solr_tools/harvesters/echo.rb
|
284
284
|
- lib/search_solr_tools/harvesters/eol.rb
|
285
|
+
- lib/search_solr_tools/harvesters/gtnp.rb
|
285
286
|
- lib/search_solr_tools/harvesters/ices.rb
|
286
287
|
- lib/search_solr_tools/harvesters/ncdc_paleo.rb
|
287
288
|
- lib/search_solr_tools/harvesters/nmi.rb
|
@@ -324,6 +325,7 @@ files:
|
|
324
325
|
- lib/search_solr_tools/selectors/usgs_iso.rb
|
325
326
|
- lib/search_solr_tools/translators/bcodmo_json.rb
|
326
327
|
- lib/search_solr_tools/translators/eol_to_solr.rb
|
328
|
+
- lib/search_solr_tools/translators/gtnp_json.rb
|
327
329
|
- lib/search_solr_tools/translators/nsidc_json.rb
|
328
330
|
- lib/search_solr_tools/version.rb
|
329
331
|
- search_solr_tools.gemspec
|
@@ -347,7 +349,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
347
349
|
version: '0'
|
348
350
|
requirements: []
|
349
351
|
rubyforge_project:
|
350
|
-
rubygems_version: 2.4.5
|
352
|
+
rubygems_version: 2.4.5
|
351
353
|
signing_key:
|
352
354
|
specification_version: 4
|
353
355
|
summary: Tools to harvest and manage various scientific dataset feeds in a Solr instance.
|