search_solr_tools 3.3.3 → 3.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7ede056ad3f242ee3181914e874335216920d6fb
4
- data.tar.gz: 778b003b040565581be421b43cccfb49f4086952
3
+ metadata.gz: b659f88de85b827d7a4f56883f1c0b781cc4fea8
4
+ data.tar.gz: 68c5c487467f2d2cffd12dfc780ce35248cc456f
5
5
  SHA512:
6
- metadata.gz: 3e9665f5671a2b1f195ec51902d396dd302b262dd848776259db3ba176e6eea53f716f4d1557ca6e8cc1d3954dc5034ae7d66664faa25db112b0a78c12f9733f
7
- data.tar.gz: 3a632303193cec2b4784dce4cf0311ba69826b6d1b00c8cbe8da43748bce11434299cc378fdff507411b5003f943da37326b880d860118c51319d849c6f7a90a
6
+ metadata.gz: 4192221ae83802c9411d6af1aaa80e61bdeb8d67a96cf2bf8746c70199359e768e9b198be1d6ea09b0e098cb8116739fe2219d90a9e80a489b8ec2b20e8bea33
7
+ data.tar.gz: 515caf93f828b00e9c843dbb3447ad39b8b9bc7f73c0e33823d2c6e9be6a53c1aa63d23979962c2b9f3f15f76fcbab1ca3c15790d9d2ee7807c0a43cd4e3f9be
@@ -1,9 +1,3 @@
1
- ## v3.3.2
2
-
3
- Bugfix
4
-
5
- - Added quote checking for cisl offset parsing check
6
-
7
1
  ## v3.3.1
8
2
 
9
3
  Bugfix
@@ -71,6 +71,7 @@ class SolrHarvestCLI < Thor
71
71
  'data_one' => SearchSolrTools::Harvesters::DataOne,
72
72
  'echo' => SearchSolrTools::Harvesters::Echo,
73
73
  'eol' => SearchSolrTools::Harvesters::Eol,
74
+ 'gtnp' => SearchSolrTools::Harvesters::GtnP,
74
75
  'ices' => SearchSolrTools::Harvesters::Ices,
75
76
  'ncdc_paleo' => SearchSolrTools::Harvesters::NcdcPaleo,
76
77
  'nmi' => SearchSolrTools::Harvesters::Nmi,
@@ -7,6 +7,9 @@
7
7
  :cisl_url: https://www.aoncadis.org/oai/repository
8
8
  :data_one_url: https://cn.dataone.org/cn/v1/query/solr/select?q=northBoundCoord:%5B45.0%20TO%2090.0%5D
9
9
  :echo_url: https://api.echo.nasa.gov/catalog-rest/echo_catalog/datasets.echo10
10
+ :gtnp:
11
+ - http://www.gtnpdatabase.org/rest/boreholes/json
12
+ - http://www.gtnpdatabase.org/rest/activelayers/json
10
13
  :ices_url: http://geo.ices.dk/geonetwork/srv/en/csw
11
14
  :ncdc_paleo_url: http://gis.ncdc.noaa.gov/gptpaleo/csw
12
15
  :nmi_url: http://access.met.no/metamod/oai
@@ -0,0 +1,66 @@
1
+ require 'json'
2
+ require 'rest-client'
3
+
4
+ module SearchSolrTools
5
+ module Harvesters
6
+ # Harvests data from GTN-P endpoints, translates and adds it to solr
7
+ class GtnP < Base
8
+ def initialize(env = 'development', die_on_failure = false)
9
+ super env, die_on_failure
10
+ @translator = Translators::GtnpJsonToSolr.new
11
+ end
12
+
13
+ def gtnp_service_urls
14
+ json_records = []
15
+ SearchSolrTools::SolrEnvironments[:common][:gtnp].flat_map do |endpoint|
16
+ record = request_json(endpoint)
17
+ json_records << record
18
+ end
19
+ json_records
20
+ end
21
+
22
+ def harvest_and_delete
23
+ puts 'Running harvest of GTN-P catalog using the following configured GTN-P URLs:'
24
+ SearchSolrTools::SolrEnvironments[:common][:gtnp].each { |x| puts x }
25
+ super(method(:harvest_gtnp_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:GTNP][:long_name]}\"")
26
+ end
27
+
28
+ def harvest_gtnp_into_solr
29
+ result = translate_gtnp
30
+ insert_solr_docs result[:add_docs], Base::JSON_CONTENT_TYPE
31
+ fail 'Failed to harvest some records from the provider' if result[:failure_ids].length > 0
32
+ end
33
+
34
+ def translate_gtnp
35
+ documents = []
36
+ failure_ids = []
37
+ gtnp_records = gtnp_service_urls
38
+ gtnp_records.each do |record|
39
+ results = parse_record(record)
40
+ results[:documents].each { |d| documents << d }
41
+ results[:failure_ids].each { |id| failure_ids << id }
42
+ end
43
+ { add_docs: documents, failure_ids: failure_ids }
44
+ end
45
+
46
+ def request_json(url)
47
+ JSON.parse(RestClient.get(url))
48
+ end
49
+
50
+ def parse_record(record)
51
+ documents = []
52
+ failure_ids = []
53
+ begin
54
+ record.drop(1).each do |dataset|
55
+ trans_doc = @translator.translate(dataset, record[0])
56
+ documents << { 'add' => { 'doc' => trans_doc } }
57
+ end
58
+ rescue => e
59
+ puts "Failed to add record #{record[0][:title]} with error #{e} (#{e.message}) : #{e.backtrace.join("\n")}"
60
+ failure_ids << record[0][:title]
61
+ end
62
+ { documents: documents, failure_ids: failure_ids }
63
+ end
64
+ end
65
+ end
66
+ end
@@ -14,6 +14,7 @@ module SearchSolrTools
14
14
  DATA_ONE: { short_name: 'DataONE', long_name: 'DataONE' },
15
15
  ECHO: { short_name: 'NASA ECHO', long_name: 'NASA Earth Observing System (EOS) Clearing House (ECHO)' },
16
16
  EOL: { short_name: 'UCAR NCAR EOL', long_name: 'UCAR NCAR - Earth Observing Laboratory' },
17
+ GTNP: { short_name: 'GTN-P', long_name: 'Global Terrestrial Network for Permafrost (GTN-P)' },
17
18
  ICES: { short_name: 'ICES', long_name: 'International Council for the Exploration of the Sea' },
18
19
  NCDC_PALEO: { short_name: 'NOAA WDS Paleo', long_name: 'NOAA’s National Centers for Environmental Information, World Data Service for Paleoclimatology' },
19
20
  NMI: { short_name: 'Met.no', long_name: 'Norwegian Meteorological Institute' },
@@ -0,0 +1,55 @@
1
+ require 'json'
2
+ require 'rest-client'
3
+ require 'rgeo/geo_json'
4
+
5
+ require 'search_solr_tools'
6
+
7
+ module SearchSolrTools
8
+ module Translators
9
+ # Translates GTN-P json to solr json format
10
+ class GtnpJsonToSolr
11
+ # rubocop:disable AbcSize
12
+ def translate(json_doc, json_record)
13
+ json_geo = json_doc['geo'].nil? ? json_doc['coordinates'] : json_doc['geo']['coordinates']
14
+ concatenated_name = "#{json_record['title']} - #{json_doc['name']}"
15
+ spatial_values = translate_geometry json_geo
16
+ {
17
+ 'title' => concatenated_name,
18
+ 'authoritative_id' => concatenated_name,
19
+ 'data_centers' => Helpers::SolrFormat::DATA_CENTER_NAMES[:GTNP][:long_name],
20
+ 'facet_data_center' => "#{Helpers::SolrFormat::DATA_CENTER_NAMES[:GTNP][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:GTNP][:short_name]}",
21
+ 'summary' => json_record['abstract'].to_s,
22
+ 'dataset_url' => json_doc['link'],
23
+ 'source' => 'ADE',
24
+ 'facet_spatial_scope' => spatial_values[:spatial_scope_facet],
25
+ 'spatial_coverages' => spatial_values[:spatial_display],
26
+ 'spatial_area' => spatial_values[:spatial_area],
27
+ 'spatial' => spatial_values[:spatial_index],
28
+ 'temporal_coverages' => Helpers::SolrFormat::NOT_SPECIFIED,
29
+ 'authors' => parse_people(json_doc)
30
+ }
31
+ end
32
+
33
+ def parse_people(json_doc)
34
+ people_found = []
35
+ return people_found unless json_doc.key?('citation') && json_doc['citation'].key?('contacts')
36
+ citation = json_doc['citation']
37
+ citation['contacts'].each do |person|
38
+ people_found << "#{person['givenName']} #{person['familyName']}"
39
+ end
40
+ people_found
41
+ end
42
+
43
+ def translate_geometry(json_geom)
44
+ geo_string = "{\"type\":\"Point\",\"coordinates\":[#{json_geom['longitude']},#{json_geom['latitude']}]}"
45
+ geometry = RGeo::GeoJSON.decode(geo_string, json_parser: :json)
46
+ {
47
+ spatial_display: Helpers::TranslateSpatialCoverage.geojson_to_spatial_display_str([geometry]),
48
+ spatial_index: Helpers::TranslateSpatialCoverage.geojson_to_spatial_index_str([geometry]),
49
+ spatial_area: Helpers::TranslateSpatialCoverage.geojson_to_spatial_area([geometry]),
50
+ spatial_scope_facet: Helpers::TranslateSpatialCoverage.geojson_to_spatial_scope_facet([geometry])
51
+ }
52
+ end
53
+ end
54
+ end
55
+ end
@@ -1,3 +1,3 @@
1
1
  module SearchSolrTools
2
- VERSION = '3.3.3'
2
+ VERSION = '3.3.4'
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: search_solr_tools
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.3.3
4
+ version: 3.3.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chris Chalstrom
@@ -12,7 +12,7 @@ authors:
12
12
  autorequire:
13
13
  bindir: bin
14
14
  cert_chain: []
15
- date: 2016-01-14 00:00:00.000000000 Z
15
+ date: 2016-02-08 00:00:00.000000000 Z
16
16
  dependencies:
17
17
  - !ruby/object:Gem::Dependency
18
18
  name: iso8601
@@ -282,6 +282,7 @@ files:
282
282
  - lib/search_solr_tools/harvesters/data_one.rb
283
283
  - lib/search_solr_tools/harvesters/echo.rb
284
284
  - lib/search_solr_tools/harvesters/eol.rb
285
+ - lib/search_solr_tools/harvesters/gtnp.rb
285
286
  - lib/search_solr_tools/harvesters/ices.rb
286
287
  - lib/search_solr_tools/harvesters/ncdc_paleo.rb
287
288
  - lib/search_solr_tools/harvesters/nmi.rb
@@ -324,6 +325,7 @@ files:
324
325
  - lib/search_solr_tools/selectors/usgs_iso.rb
325
326
  - lib/search_solr_tools/translators/bcodmo_json.rb
326
327
  - lib/search_solr_tools/translators/eol_to_solr.rb
328
+ - lib/search_solr_tools/translators/gtnp_json.rb
327
329
  - lib/search_solr_tools/translators/nsidc_json.rb
328
330
  - lib/search_solr_tools/version.rb
329
331
  - search_solr_tools.gemspec
@@ -347,7 +349,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
347
349
  version: '0'
348
350
  requirements: []
349
351
  rubyforge_project:
350
- rubygems_version: 2.4.5.1
352
+ rubygems_version: 2.4.5
351
353
  signing_key:
352
354
  specification_version: 4
353
355
  summary: Tools to harvest and manage various scientific dataset feeds in a Solr instance.