search_solr_tools 3.3.3 → 3.3.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7ede056ad3f242ee3181914e874335216920d6fb
4
- data.tar.gz: 778b003b040565581be421b43cccfb49f4086952
3
+ metadata.gz: b659f88de85b827d7a4f56883f1c0b781cc4fea8
4
+ data.tar.gz: 68c5c487467f2d2cffd12dfc780ce35248cc456f
5
5
  SHA512:
6
- metadata.gz: 3e9665f5671a2b1f195ec51902d396dd302b262dd848776259db3ba176e6eea53f716f4d1557ca6e8cc1d3954dc5034ae7d66664faa25db112b0a78c12f9733f
7
- data.tar.gz: 3a632303193cec2b4784dce4cf0311ba69826b6d1b00c8cbe8da43748bce11434299cc378fdff507411b5003f943da37326b880d860118c51319d849c6f7a90a
6
+ metadata.gz: 4192221ae83802c9411d6af1aaa80e61bdeb8d67a96cf2bf8746c70199359e768e9b198be1d6ea09b0e098cb8116739fe2219d90a9e80a489b8ec2b20e8bea33
7
+ data.tar.gz: 515caf93f828b00e9c843dbb3447ad39b8b9bc7f73c0e33823d2c6e9be6a53c1aa63d23979962c2b9f3f15f76fcbab1ca3c15790d9d2ee7807c0a43cd4e3f9be
@@ -1,9 +1,3 @@
1
- ## v3.3.2
2
-
3
- Bugfix
4
-
5
- - Added quote checking for cisl offset parsing check
6
-
7
1
  ## v3.3.1
8
2
 
9
3
  Bugfix
@@ -71,6 +71,7 @@ class SolrHarvestCLI < Thor
71
71
  'data_one' => SearchSolrTools::Harvesters::DataOne,
72
72
  'echo' => SearchSolrTools::Harvesters::Echo,
73
73
  'eol' => SearchSolrTools::Harvesters::Eol,
74
+ 'gtnp' => SearchSolrTools::Harvesters::GtnP,
74
75
  'ices' => SearchSolrTools::Harvesters::Ices,
75
76
  'ncdc_paleo' => SearchSolrTools::Harvesters::NcdcPaleo,
76
77
  'nmi' => SearchSolrTools::Harvesters::Nmi,
@@ -7,6 +7,9 @@
7
7
  :cisl_url: https://www.aoncadis.org/oai/repository
8
8
  :data_one_url: https://cn.dataone.org/cn/v1/query/solr/select?q=northBoundCoord:%5B45.0%20TO%2090.0%5D
9
9
  :echo_url: https://api.echo.nasa.gov/catalog-rest/echo_catalog/datasets.echo10
10
+ :gtnp:
11
+ - http://www.gtnpdatabase.org/rest/boreholes/json
12
+ - http://www.gtnpdatabase.org/rest/activelayers/json
10
13
  :ices_url: http://geo.ices.dk/geonetwork/srv/en/csw
11
14
  :ncdc_paleo_url: http://gis.ncdc.noaa.gov/gptpaleo/csw
12
15
  :nmi_url: http://access.met.no/metamod/oai
@@ -0,0 +1,66 @@
1
+ require 'json'
2
+ require 'rest-client'
3
+
4
+ module SearchSolrTools
5
+ module Harvesters
6
+ # Harvests data from GTN-P endpoints, translates and adds it to solr
7
+ class GtnP < Base
8
+ def initialize(env = 'development', die_on_failure = false)
9
+ super env, die_on_failure
10
+ @translator = Translators::GtnpJsonToSolr.new
11
+ end
12
+
13
+ def gtnp_service_urls
14
+ json_records = []
15
+ SearchSolrTools::SolrEnvironments[:common][:gtnp].flat_map do |endpoint|
16
+ record = request_json(endpoint)
17
+ json_records << record
18
+ end
19
+ json_records
20
+ end
21
+
22
+ def harvest_and_delete
23
+ puts 'Running harvest of GTN-P catalog using the following configured GTN-P URLs:'
24
+ SearchSolrTools::SolrEnvironments[:common][:gtnp].each { |x| puts x }
25
+ super(method(:harvest_gtnp_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:GTNP][:long_name]}\"")
26
+ end
27
+
28
+ def harvest_gtnp_into_solr
29
+ result = translate_gtnp
30
+ insert_solr_docs result[:add_docs], Base::JSON_CONTENT_TYPE
31
+ fail 'Failed to harvest some records from the provider' if result[:failure_ids].length > 0
32
+ end
33
+
34
+ def translate_gtnp
35
+ documents = []
36
+ failure_ids = []
37
+ gtnp_records = gtnp_service_urls
38
+ gtnp_records.each do |record|
39
+ results = parse_record(record)
40
+ results[:documents].each { |d| documents << d }
41
+ results[:failure_ids].each { |id| failure_ids << id }
42
+ end
43
+ { add_docs: documents, failure_ids: failure_ids }
44
+ end
45
+
46
+ def request_json(url)
47
+ JSON.parse(RestClient.get(url))
48
+ end
49
+
50
+ def parse_record(record)
51
+ documents = []
52
+ failure_ids = []
53
+ begin
54
+ record.drop(1).each do |dataset|
55
+ trans_doc = @translator.translate(dataset, record[0])
56
+ documents << { 'add' => { 'doc' => trans_doc } }
57
+ end
58
+ rescue => e
59
+ puts "Failed to add record #{record[0][:title]} with error #{e} (#{e.message}) : #{e.backtrace.join("\n")}"
60
+ failure_ids << record[0][:title]
61
+ end
62
+ { documents: documents, failure_ids: failure_ids }
63
+ end
64
+ end
65
+ end
66
+ end
@@ -14,6 +14,7 @@ module SearchSolrTools
14
14
  DATA_ONE: { short_name: 'DataONE', long_name: 'DataONE' },
15
15
  ECHO: { short_name: 'NASA ECHO', long_name: 'NASA Earth Observing System (EOS) Clearing House (ECHO)' },
16
16
  EOL: { short_name: 'UCAR NCAR EOL', long_name: 'UCAR NCAR - Earth Observing Laboratory' },
17
+ GTNP: { short_name: 'GTN-P', long_name: 'Global Terrestrial Network for Permafrost (GTN-P)' },
17
18
  ICES: { short_name: 'ICES', long_name: 'International Council for the Exploration of the Sea' },
18
19
  NCDC_PALEO: { short_name: 'NOAA WDS Paleo', long_name: 'NOAA’s National Centers for Environmental Information, World Data Service for Paleoclimatology' },
19
20
  NMI: { short_name: 'Met.no', long_name: 'Norwegian Meteorological Institute' },
@@ -0,0 +1,55 @@
1
+ require 'json'
2
+ require 'rest-client'
3
+ require 'rgeo/geo_json'
4
+
5
+ require 'search_solr_tools'
6
+
7
+ module SearchSolrTools
8
+ module Translators
9
+ # Translates GTN-P json to solr json format
10
+ class GtnpJsonToSolr
11
+ # rubocop:disable AbcSize
12
+ def translate(json_doc, json_record)
13
+ json_geo = json_doc['geo'].nil? ? json_doc['coordinates'] : json_doc['geo']['coordinates']
14
+ concatenated_name = "#{json_record['title']} - #{json_doc['name']}"
15
+ spatial_values = translate_geometry json_geo
16
+ {
17
+ 'title' => concatenated_name,
18
+ 'authoritative_id' => concatenated_name,
19
+ 'data_centers' => Helpers::SolrFormat::DATA_CENTER_NAMES[:GTNP][:long_name],
20
+ 'facet_data_center' => "#{Helpers::SolrFormat::DATA_CENTER_NAMES[:GTNP][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:GTNP][:short_name]}",
21
+ 'summary' => json_record['abstract'].to_s,
22
+ 'dataset_url' => json_doc['link'],
23
+ 'source' => 'ADE',
24
+ 'facet_spatial_scope' => spatial_values[:spatial_scope_facet],
25
+ 'spatial_coverages' => spatial_values[:spatial_display],
26
+ 'spatial_area' => spatial_values[:spatial_area],
27
+ 'spatial' => spatial_values[:spatial_index],
28
+ 'temporal_coverages' => Helpers::SolrFormat::NOT_SPECIFIED,
29
+ 'authors' => parse_people(json_doc)
30
+ }
31
+ end
32
+
33
+ def parse_people(json_doc)
34
+ people_found = []
35
+ return people_found unless json_doc.key?('citation') && json_doc['citation'].key?('contacts')
36
+ citation = json_doc['citation']
37
+ citation['contacts'].each do |person|
38
+ people_found << "#{person['givenName']} #{person['familyName']}"
39
+ end
40
+ people_found
41
+ end
42
+
43
+ def translate_geometry(json_geom)
44
+ geo_string = "{\"type\":\"Point\",\"coordinates\":[#{json_geom['longitude']},#{json_geom['latitude']}]}"
45
+ geometry = RGeo::GeoJSON.decode(geo_string, json_parser: :json)
46
+ {
47
+ spatial_display: Helpers::TranslateSpatialCoverage.geojson_to_spatial_display_str([geometry]),
48
+ spatial_index: Helpers::TranslateSpatialCoverage.geojson_to_spatial_index_str([geometry]),
49
+ spatial_area: Helpers::TranslateSpatialCoverage.geojson_to_spatial_area([geometry]),
50
+ spatial_scope_facet: Helpers::TranslateSpatialCoverage.geojson_to_spatial_scope_facet([geometry])
51
+ }
52
+ end
53
+ end
54
+ end
55
+ end
@@ -1,3 +1,3 @@
1
1
  module SearchSolrTools
2
- VERSION = '3.3.3'
2
+ VERSION = '3.3.4'
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: search_solr_tools
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.3.3
4
+ version: 3.3.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chris Chalstrom
@@ -12,7 +12,7 @@ authors:
12
12
  autorequire:
13
13
  bindir: bin
14
14
  cert_chain: []
15
- date: 2016-01-14 00:00:00.000000000 Z
15
+ date: 2016-02-08 00:00:00.000000000 Z
16
16
  dependencies:
17
17
  - !ruby/object:Gem::Dependency
18
18
  name: iso8601
@@ -282,6 +282,7 @@ files:
282
282
  - lib/search_solr_tools/harvesters/data_one.rb
283
283
  - lib/search_solr_tools/harvesters/echo.rb
284
284
  - lib/search_solr_tools/harvesters/eol.rb
285
+ - lib/search_solr_tools/harvesters/gtnp.rb
285
286
  - lib/search_solr_tools/harvesters/ices.rb
286
287
  - lib/search_solr_tools/harvesters/ncdc_paleo.rb
287
288
  - lib/search_solr_tools/harvesters/nmi.rb
@@ -324,6 +325,7 @@ files:
324
325
  - lib/search_solr_tools/selectors/usgs_iso.rb
325
326
  - lib/search_solr_tools/translators/bcodmo_json.rb
326
327
  - lib/search_solr_tools/translators/eol_to_solr.rb
328
+ - lib/search_solr_tools/translators/gtnp_json.rb
327
329
  - lib/search_solr_tools/translators/nsidc_json.rb
328
330
  - lib/search_solr_tools/version.rb
329
331
  - search_solr_tools.gemspec
@@ -347,7 +349,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
347
349
  version: '0'
348
350
  requirements: []
349
351
  rubyforge_project:
350
- rubygems_version: 2.4.5.1
352
+ rubygems_version: 2.4.5
351
353
  signing_key:
352
354
  specification_version: 4
353
355
  summary: Tools to harvest and manage various scientific dataset feeds in a Solr instance.