search_solr_tools 3.3.3 → 3.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +0 -6
- data/bin/search_solr_tools +1 -0
- data/lib/search_solr_tools/config/environments.yaml +3 -0
- data/lib/search_solr_tools/harvesters/gtnp.rb +66 -0
- data/lib/search_solr_tools/helpers/solr_format.rb +1 -0
- data/lib/search_solr_tools/translators/gtnp_json.rb +55 -0
- data/lib/search_solr_tools/version.rb +1 -1
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b659f88de85b827d7a4f56883f1c0b781cc4fea8
|
4
|
+
data.tar.gz: 68c5c487467f2d2cffd12dfc780ce35248cc456f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4192221ae83802c9411d6af1aaa80e61bdeb8d67a96cf2bf8746c70199359e768e9b198be1d6ea09b0e098cb8116739fe2219d90a9e80a489b8ec2b20e8bea33
|
7
|
+
data.tar.gz: 515caf93f828b00e9c843dbb3447ad39b8b9bc7f73c0e33823d2c6e9be6a53c1aa63d23979962c2b9f3f15f76fcbab1ca3c15790d9d2ee7807c0a43cd4e3f9be
|
data/CHANGELOG.md
CHANGED
data/bin/search_solr_tools
CHANGED
@@ -71,6 +71,7 @@ class SolrHarvestCLI < Thor
|
|
71
71
|
'data_one' => SearchSolrTools::Harvesters::DataOne,
|
72
72
|
'echo' => SearchSolrTools::Harvesters::Echo,
|
73
73
|
'eol' => SearchSolrTools::Harvesters::Eol,
|
74
|
+
'gtnp' => SearchSolrTools::Harvesters::GtnP,
|
74
75
|
'ices' => SearchSolrTools::Harvesters::Ices,
|
75
76
|
'ncdc_paleo' => SearchSolrTools::Harvesters::NcdcPaleo,
|
76
77
|
'nmi' => SearchSolrTools::Harvesters::Nmi,
|
@@ -7,6 +7,9 @@
|
|
7
7
|
:cisl_url: https://www.aoncadis.org/oai/repository
|
8
8
|
:data_one_url: https://cn.dataone.org/cn/v1/query/solr/select?q=northBoundCoord:%5B45.0%20TO%2090.0%5D
|
9
9
|
:echo_url: https://api.echo.nasa.gov/catalog-rest/echo_catalog/datasets.echo10
|
10
|
+
:gtnp:
|
11
|
+
- http://www.gtnpdatabase.org/rest/boreholes/json
|
12
|
+
- http://www.gtnpdatabase.org/rest/activelayers/json
|
10
13
|
:ices_url: http://geo.ices.dk/geonetwork/srv/en/csw
|
11
14
|
:ncdc_paleo_url: http://gis.ncdc.noaa.gov/gptpaleo/csw
|
12
15
|
:nmi_url: http://access.met.no/metamod/oai
|
@@ -0,0 +1,66 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'rest-client'
|
3
|
+
|
4
|
+
module SearchSolrTools
|
5
|
+
module Harvesters
|
6
|
+
# Harvests data from GTN-P endpoints, translates and adds it to solr
|
7
|
+
class GtnP < Base
|
8
|
+
def initialize(env = 'development', die_on_failure = false)
|
9
|
+
super env, die_on_failure
|
10
|
+
@translator = Translators::GtnpJsonToSolr.new
|
11
|
+
end
|
12
|
+
|
13
|
+
def gtnp_service_urls
|
14
|
+
json_records = []
|
15
|
+
SearchSolrTools::SolrEnvironments[:common][:gtnp].flat_map do |endpoint|
|
16
|
+
record = request_json(endpoint)
|
17
|
+
json_records << record
|
18
|
+
end
|
19
|
+
json_records
|
20
|
+
end
|
21
|
+
|
22
|
+
def harvest_and_delete
|
23
|
+
puts 'Running harvest of GTN-P catalog using the following configured GTN-P URLs:'
|
24
|
+
SearchSolrTools::SolrEnvironments[:common][:gtnp].each { |x| puts x }
|
25
|
+
super(method(:harvest_gtnp_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:GTNP][:long_name]}\"")
|
26
|
+
end
|
27
|
+
|
28
|
+
def harvest_gtnp_into_solr
|
29
|
+
result = translate_gtnp
|
30
|
+
insert_solr_docs result[:add_docs], Base::JSON_CONTENT_TYPE
|
31
|
+
fail 'Failed to harvest some records from the provider' if result[:failure_ids].length > 0
|
32
|
+
end
|
33
|
+
|
34
|
+
def translate_gtnp
|
35
|
+
documents = []
|
36
|
+
failure_ids = []
|
37
|
+
gtnp_records = gtnp_service_urls
|
38
|
+
gtnp_records.each do |record|
|
39
|
+
results = parse_record(record)
|
40
|
+
results[:documents].each { |d| documents << d }
|
41
|
+
results[:failure_ids].each { |id| failure_ids << id }
|
42
|
+
end
|
43
|
+
{ add_docs: documents, failure_ids: failure_ids }
|
44
|
+
end
|
45
|
+
|
46
|
+
def request_json(url)
|
47
|
+
JSON.parse(RestClient.get(url))
|
48
|
+
end
|
49
|
+
|
50
|
+
def parse_record(record)
|
51
|
+
documents = []
|
52
|
+
failure_ids = []
|
53
|
+
begin
|
54
|
+
record.drop(1).each do |dataset|
|
55
|
+
trans_doc = @translator.translate(dataset, record[0])
|
56
|
+
documents << { 'add' => { 'doc' => trans_doc } }
|
57
|
+
end
|
58
|
+
rescue => e
|
59
|
+
puts "Failed to add record #{record[0][:title]} with error #{e} (#{e.message}) : #{e.backtrace.join("\n")}"
|
60
|
+
failure_ids << record[0][:title]
|
61
|
+
end
|
62
|
+
{ documents: documents, failure_ids: failure_ids }
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -14,6 +14,7 @@ module SearchSolrTools
|
|
14
14
|
DATA_ONE: { short_name: 'DataONE', long_name: 'DataONE' },
|
15
15
|
ECHO: { short_name: 'NASA ECHO', long_name: 'NASA Earth Observing System (EOS) Clearing House (ECHO)' },
|
16
16
|
EOL: { short_name: 'UCAR NCAR EOL', long_name: 'UCAR NCAR - Earth Observing Laboratory' },
|
17
|
+
GTNP: { short_name: 'GTN-P', long_name: 'Global Terrestrial Network for Permafrost (GTN-P)' },
|
17
18
|
ICES: { short_name: 'ICES', long_name: 'International Council for the Exploration of the Sea' },
|
18
19
|
NCDC_PALEO: { short_name: 'NOAA WDS Paleo', long_name: 'NOAA’s National Centers for Environmental Information, World Data Service for Paleoclimatology' },
|
19
20
|
NMI: { short_name: 'Met.no', long_name: 'Norwegian Meteorological Institute' },
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'rest-client'
|
3
|
+
require 'rgeo/geo_json'
|
4
|
+
|
5
|
+
require 'search_solr_tools'
|
6
|
+
|
7
|
+
module SearchSolrTools
|
8
|
+
module Translators
|
9
|
+
# Translates GTN-P json to solr json format
|
10
|
+
class GtnpJsonToSolr
|
11
|
+
# rubocop:disable AbcSize
|
12
|
+
def translate(json_doc, json_record)
|
13
|
+
json_geo = json_doc['geo'].nil? ? json_doc['coordinates'] : json_doc['geo']['coordinates']
|
14
|
+
concatenated_name = "#{json_record['title']} - #{json_doc['name']}"
|
15
|
+
spatial_values = translate_geometry json_geo
|
16
|
+
{
|
17
|
+
'title' => concatenated_name,
|
18
|
+
'authoritative_id' => concatenated_name,
|
19
|
+
'data_centers' => Helpers::SolrFormat::DATA_CENTER_NAMES[:GTNP][:long_name],
|
20
|
+
'facet_data_center' => "#{Helpers::SolrFormat::DATA_CENTER_NAMES[:GTNP][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:GTNP][:short_name]}",
|
21
|
+
'summary' => json_record['abstract'].to_s,
|
22
|
+
'dataset_url' => json_doc['link'],
|
23
|
+
'source' => 'ADE',
|
24
|
+
'facet_spatial_scope' => spatial_values[:spatial_scope_facet],
|
25
|
+
'spatial_coverages' => spatial_values[:spatial_display],
|
26
|
+
'spatial_area' => spatial_values[:spatial_area],
|
27
|
+
'spatial' => spatial_values[:spatial_index],
|
28
|
+
'temporal_coverages' => Helpers::SolrFormat::NOT_SPECIFIED,
|
29
|
+
'authors' => parse_people(json_doc)
|
30
|
+
}
|
31
|
+
end
|
32
|
+
|
33
|
+
def parse_people(json_doc)
|
34
|
+
people_found = []
|
35
|
+
return people_found unless json_doc.key?('citation') && json_doc['citation'].key?('contacts')
|
36
|
+
citation = json_doc['citation']
|
37
|
+
citation['contacts'].each do |person|
|
38
|
+
people_found << "#{person['givenName']} #{person['familyName']}"
|
39
|
+
end
|
40
|
+
people_found
|
41
|
+
end
|
42
|
+
|
43
|
+
def translate_geometry(json_geom)
|
44
|
+
geo_string = "{\"type\":\"Point\",\"coordinates\":[#{json_geom['longitude']},#{json_geom['latitude']}]}"
|
45
|
+
geometry = RGeo::GeoJSON.decode(geo_string, json_parser: :json)
|
46
|
+
{
|
47
|
+
spatial_display: Helpers::TranslateSpatialCoverage.geojson_to_spatial_display_str([geometry]),
|
48
|
+
spatial_index: Helpers::TranslateSpatialCoverage.geojson_to_spatial_index_str([geometry]),
|
49
|
+
spatial_area: Helpers::TranslateSpatialCoverage.geojson_to_spatial_area([geometry]),
|
50
|
+
spatial_scope_facet: Helpers::TranslateSpatialCoverage.geojson_to_spatial_scope_facet([geometry])
|
51
|
+
}
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: search_solr_tools
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.3.
|
4
|
+
version: 3.3.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Chris Chalstrom
|
@@ -12,7 +12,7 @@ authors:
|
|
12
12
|
autorequire:
|
13
13
|
bindir: bin
|
14
14
|
cert_chain: []
|
15
|
-
date: 2016-
|
15
|
+
date: 2016-02-08 00:00:00.000000000 Z
|
16
16
|
dependencies:
|
17
17
|
- !ruby/object:Gem::Dependency
|
18
18
|
name: iso8601
|
@@ -282,6 +282,7 @@ files:
|
|
282
282
|
- lib/search_solr_tools/harvesters/data_one.rb
|
283
283
|
- lib/search_solr_tools/harvesters/echo.rb
|
284
284
|
- lib/search_solr_tools/harvesters/eol.rb
|
285
|
+
- lib/search_solr_tools/harvesters/gtnp.rb
|
285
286
|
- lib/search_solr_tools/harvesters/ices.rb
|
286
287
|
- lib/search_solr_tools/harvesters/ncdc_paleo.rb
|
287
288
|
- lib/search_solr_tools/harvesters/nmi.rb
|
@@ -324,6 +325,7 @@ files:
|
|
324
325
|
- lib/search_solr_tools/selectors/usgs_iso.rb
|
325
326
|
- lib/search_solr_tools/translators/bcodmo_json.rb
|
326
327
|
- lib/search_solr_tools/translators/eol_to_solr.rb
|
328
|
+
- lib/search_solr_tools/translators/gtnp_json.rb
|
327
329
|
- lib/search_solr_tools/translators/nsidc_json.rb
|
328
330
|
- lib/search_solr_tools/version.rb
|
329
331
|
- search_solr_tools.gemspec
|
@@ -347,7 +349,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
347
349
|
version: '0'
|
348
350
|
requirements: []
|
349
351
|
rubyforge_project:
|
350
|
-
rubygems_version: 2.4.5
|
352
|
+
rubygems_version: 2.4.5
|
351
353
|
signing_key:
|
352
354
|
specification_version: 4
|
353
355
|
summary: Tools to harvest and manage various scientific dataset feeds in a Solr instance.
|