search_solr_tools 3.1.2 → 3.1.3.pre2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1c7f4c152205a4ab6781f37a0c94994c8c38b49d
4
- data.tar.gz: a3d55a95e66ca26f077344798de686ed35c17182
3
+ metadata.gz: 74bf9594b189bbd05ae04fffaff8a431ee8ad73e
4
+ data.tar.gz: 6739088539526ce22e95c1e5e4de024f02509917
5
5
  SHA512:
6
- metadata.gz: ae1e563fc38f3ffed03eda53c1f7a2281757a0c1853ed474191b10323a0558b66c8db2cd9711ec2db327fc892a100860769a1b7d0b46a64f127a7914f5d7ee62
7
- data.tar.gz: b3762ea1d52947abebc6eaff5e5a75bb54509e0f7fbe304eb4a1c91ea3fd73df25c82c9d6112a21a85fcc0c34a81b04b7346865f4c103319a11b462e7d171ed7
6
+ metadata.gz: 01d720a93d61862c78b5494c71b3ed7f09c11ebe4304963b9a8d911643c5f78a8772123babb1ae5316e841be60819190c11c23296b5ecd473f7fb8e0839d6d03
7
+ data.tar.gz: d0d4909dd34e3cbe940da7aad635fcc6657fc16ddc15a8a64727b79bac15648542abe95d1900597bdfaf5776c7bcf3d57c693338710cb67d50b82ae88330eb40
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- [![Build Status](https://travis-ci.org/nsidc/search-solr-tools.svg?branch=master)](https://travis-ci.org/nsidc/search-solr-tools)
1
+ [![Gem Version](https://badge.fury.io/rb/search_solr_tools.svg)](http://badge.fury.io/rb/search_solr_tools) [![Build Status](https://travis-ci.org/nsidc/search-solr-tools.svg?branch=master)](https://travis-ci.org/nsidc/search-solr-tools)
2
2
 
3
3
  # NSIDC Search Solr Tools
4
4
 
@@ -66,6 +66,7 @@ class SolrHarvestCLI < Thor
66
66
  'ices' => SearchSolrTools::Harvesters::Ices,
67
67
  'nmi' => SearchSolrTools::Harvesters::Nmi,
68
68
  'nodc' => SearchSolrTools::Harvesters::Nodc,
69
+ 'r2r' => SearchSolrTools::Harvesters::R2R,
69
70
  'rda' => SearchSolrTools::Harvesters::Rda,
70
71
  'usgs' => SearchSolrTools::Harvesters::Usgs,
71
72
  'tdar' => SearchSolrTools::Harvesters::Tdar,
@@ -29,6 +29,7 @@
29
29
  - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BARROW.thredds.xml
30
30
  - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.DBO.thredds.xml
31
31
  - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.ITEX.thredds.xml
32
+ :r2r_url: http://get.rvdata.us/services/cruise/
32
33
 
33
34
  :local:
34
35
  :host: localhost
@@ -0,0 +1,61 @@
1
+ require 'nokogiri'
2
+ require 'rest-client'
3
+
4
+ require_relative 'base'
5
+
6
+ module SearchSolrTools
7
+ module Harvesters
8
+ class R2R < Base
9
+ def initialize(env = 'development', die_on_failure = false)
10
+ super
11
+ @data_centers = Helpers::SolrFormat::DATA_CENTER_NAMES[:R2R][:long_name]
12
+ @translator = Helpers::IsoToSolr.new :r2r
13
+ @metadata_url = SolrEnvironments[@environment][:r2r_url]
14
+ end
15
+
16
+ def harvest_and_delete
17
+ puts "Running #{self.class.name} at #{@metadata_url}"
18
+ super(method(:harvest), %(data_centers:"#{@data_centers}"))
19
+ end
20
+
21
+ # rubocop: disable MethodLength
22
+ # rubocop: disable AbcSize
23
+ def harvest
24
+ # first fetch list of available records at http://get.rvdata.us/services/cruise/
25
+ # then loop through each one of those, using the root <gmi:MI_Metadata> tag
26
+ puts "Getting list of records from #{@data_centers}"
27
+ RestClient.get(@metadata_url) do |resp, _req, _result, &_block|
28
+ unless resp.code == 200
29
+ puts "Got code #{resp.code} from #{@metadata_url}, skipping R2R harvest."
30
+ next
31
+ end
32
+
33
+ doc = Nokogiri::HTML(resp.body)
34
+
35
+ urls = doc.xpath('//a').map do |node|
36
+ "#{@metadata_url}#{node.attr('href')}"
37
+ end
38
+
39
+ urls.each_slice(50) do |url_subset|
40
+ # each result is a nokogirii doc with root element
41
+ # <gmi:MI_Metadata>
42
+ results = url_subset.map do |url|
43
+ get_results(url, '//gmi:MI_Metadata').first
44
+ end
45
+
46
+ begin
47
+ translated = results.map do |e|
48
+ create_new_solr_add_doc_with_child(@translator.translate(e).root)
49
+ end
50
+
51
+ insert_solr_docs(translated)
52
+ rescue => e
53
+ puts "ERROR: #{e}"
54
+ raise e if @die_on_failure
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
@@ -7,23 +7,23 @@ module SearchSolrTools
7
7
  end
8
8
 
9
9
  ISO_NAMESPACES = {
10
- 'csw' => 'http://www.opengis.net/cat/csw/2.0.2',
11
- 'gmd' => 'http://www.isotc211.org/2005/gmd',
12
- 'gco' => 'http://www.isotc211.org/2005/gco',
13
- 'gml' => 'http://www.opengis.net/gml/3.2',
14
- 'gmi' => 'http://www.isotc211.org/2005/gmi',
15
- 'gmx' => 'http://www.isotc211.org/2005/gmx',
16
- 'gsr' => 'http://www.isotc211.org/2005/gsr',
17
- 'gss' => 'http://www.isotc211.org/2005/gss',
18
- 'gts' => 'http://www.isotc211.org/2005/gts',
19
- 'srv' => 'http://www.isotc211.org/2005/srv',
20
- 'xlink' => 'http://www.w3.org/1999/xlink',
21
- 'xsi' => 'http://www.w3.org/2001/XMLSchema-instance',
22
- 'oai' => 'http://www.openarchives.org/OAI/2.0/',
23
- 'dif' => 'http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/',
24
- 'atom' => 'http://www.w3.org/2005/Atom',
25
- 'dc' => 'http://purl.org/dc/elements/1.1/',
26
- 'georss' => 'http://www.georss.org/georss'
10
+ 'atom' => 'http://www.w3.org/2005/Atom',
11
+ 'csw' => 'http://www.opengis.net/cat/csw/2.0.2',
12
+ 'dc' => 'http://purl.org/dc/elements/1.1/',
13
+ 'dif' => 'http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/',
14
+ 'gco' => 'http://www.isotc211.org/2005/gco',
15
+ 'georss' => 'http://www.georss.org/georss',
16
+ 'gmd' => 'http://www.isotc211.org/2005/gmd',
17
+ 'gmi' => 'http://www.isotc211.org/2005/gmi',
18
+ 'gml' => 'http://www.opengis.net/gml/3.2',
19
+ 'gmx' => 'http://www.isotc211.org/2005/gmx',
20
+ 'gsr' => 'http://www.isotc211.org/2005/gsr',
21
+ 'gss' => 'http://www.isotc211.org/2005/gss',
22
+ 'gts' => 'http://www.isotc211.org/2005/gts',
23
+ 'oai' => 'http://www.openarchives.org/OAI/2.0/',
24
+ 'srv' => 'http://www.isotc211.org/2005/srv',
25
+ 'xlink' => 'http://www.w3.org/1999/xlink',
26
+ 'xsi' => 'http://www.w3.org/2001/XMLSchema-instance'
27
27
  }
28
28
  end
29
29
  end
@@ -0,0 +1,25 @@
1
+ require_relative './iso_namespaces'
2
+ require_relative './iso_to_solr_format'
3
+ require_relative './solr_format'
4
+
5
+ module SearchSolrTools
6
+ module Helpers
7
+ class R2RFormat < IsoToSolrFormat
8
+ TEMPORAL_INDEX_STRING = proc { |node| R2RFormat.temporal_index_str(node) }
9
+ TEMPORAL_DISPLAY_STRING = proc { |node| R2RFormat.temporal_display_str(node) }
10
+ TEMPORAL_DURATION = proc { |node| R2RFormat.get_temporal_duration(node) }
11
+ FACET_TEMPORAL_DURATION = proc { |node| R2RFormat.get_temporal_duration_facet(node) }
12
+
13
+ def self.date_range(temporal_node, _formatted = false)
14
+ xpath_start = './/gmd:temporalElement/gmd:EX_SpatialTemporalExtent/gmd:extent/'\
15
+ 'gml:TimeInstant[@gml:id="start"]/gml:timePosition'
16
+ xpath_end = xpath_start.gsub('start', 'end')
17
+
18
+ {
19
+ start: temporal_node.xpath(xpath_start).text,
20
+ end: temporal_node.xpath(xpath_end).text
21
+ }
22
+ end
23
+ end
24
+ end
25
+ end
@@ -12,6 +12,7 @@ module SearchSolrTools
12
12
  nmi: Selectors::NMI,
13
13
  nodc: Selectors::NODC,
14
14
  pdc: Selectors::PDC,
15
+ r2r: Selectors::R2R,
15
16
  rda: Selectors::RDA,
16
17
  tdar: Selectors::TDAR,
17
18
  usgs: Selectors::USGS
@@ -9,18 +9,19 @@ module SearchSolrTools
9
9
  # rubocop:disable Metrics/ModuleLength
10
10
  module SolrFormat
11
11
  DATA_CENTER_NAMES = {
12
- NSIDC: { short_name: 'NSIDC', long_name: 'National Snow and Ice Data Center' },
12
+ BCODMO: { short_name: 'BCO-DMO', long_name: 'Biological and Chemical Oceanography Data Management Office' },
13
13
  CISL: { short_name: 'ACADIS Gateway', long_name: 'Advanced Cooperative Arctic Data and Information Service' },
14
14
  ECHO: { short_name: 'NASA ECHO', long_name: 'NASA Earth Observing System (EOS) Clearing House (ECHO)' },
15
15
  EOL: { short_name: 'UCAR NCAR EOL', long_name: 'UCAR NCAR - Earth Observing Laboratory' },
16
16
  ICES: { short_name: 'ICES', long_name: 'International Council for the Exploration of the Sea' },
17
17
  NMI: { short_name: 'Met.no', long_name: 'Norwegian Meteorological Institute' },
18
18
  NODC: { short_name: 'NOAA NODC', long_name: 'NOAA National Oceanographic Data Center' },
19
+ NSIDC: { short_name: 'NSIDC', long_name: 'National Snow and Ice Data Center' },
20
+ PDC: { short_name: 'PDC', long_name: 'Polar Data Catalogue' },
21
+ R2R: { short_name: 'R2R', long_name: 'Rolling Deck to Repository' },
19
22
  RDA: { short_name: 'UCAR NCAR RDA', long_name: 'UCAR NCAR Research Data Archive' },
20
- USGS: { short_name: 'USGS ScienceBase', long_name: 'U.S. Geological Survey ScienceBase' },
21
- BCODMO: { short_name: 'BCO-DMO', long_name: 'Biological and Chemical Oceanography Data Management Office' },
22
23
  TDAR: { short_name: 'tDAR', long_name: 'tDAR: The Digital Archaeological Record' },
23
- PDC: { short_name: 'PDC', long_name: 'Polar Data Catalogue' }
24
+ USGS: { short_name: 'USGS ScienceBase', long_name: 'U.S. Geological Survey ScienceBase' }
24
25
  }
25
26
 
26
27
  NOT_SPECIFIED = 'Not specified'
@@ -0,0 +1,113 @@
1
+ require 'search_solr_tools'
2
+
3
+ module SearchSolrTools
4
+ module Selectors
5
+ # The hash contains keys that should map to the fields in the solr schema,
6
+ # the keys are called selectors and are in charge of selecting the nodes
7
+ # from the ISO document, applying the default value if none of the xpaths
8
+ # resolved to a value and formatting the field. xpaths and multivalue are
9
+ # required, default_value, format, and reduce are optional.
10
+ #
11
+ # reduce takes the formatted result of multiple nodes and produces a single
12
+ # result. This is for fields that are not multivalued, but their value
13
+ # should consider information from all the nodes (for example, storing
14
+ # only the maximum duration from multiple temporal coverage fields, taking
15
+ # the sum of multiple spatial areas)
16
+ R2R = {
17
+ authoritative_id: {
18
+ xpaths: ['.//gmd:fileIdentifier/gco:CharacterString'],
19
+ multivalue: false
20
+ },
21
+ title: {
22
+ xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:title/gmx:Anchor'],
23
+ multivalue: false
24
+ },
25
+ summary: {
26
+ xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:abstract/gco:CharacterString'],
27
+ multivalue: false
28
+ },
29
+ data_centers: {
30
+ xpaths: [''],
31
+ default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:R2R][:long_name]],
32
+ multivalue: false
33
+ },
34
+ authors: {
35
+ xpaths: [".//gmd:CI_ResponsibleParty[./gmd:role/gmd:CI_RoleCode[@codeListValue='contributor']]/gmd:individualName/gmx:Anchor"],
36
+ multivalue: true
37
+ },
38
+ keywords: {
39
+ xpaths: ['.//gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gco:CharacterString',
40
+ './/gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gmx:Anchor'],
41
+ multivalue: true
42
+ },
43
+ last_revision_date: {
44
+ xpaths: ['.//gmd:dateStamp/gco:Date', './/gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:date/gmd:CI_Date/gmd:date/gco:DateTime'],
45
+ default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
46
+ multivalue: false,
47
+ format: Helpers::SolrFormat::DATE
48
+ },
49
+ dataset_url: {
50
+ xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:title/gmx:Anchor/@xlink:href'],
51
+ multivalue: false,
52
+ format: Helpers::IsoToSolrFormat::DATASET_URL
53
+ },
54
+ spatial_coverages: {
55
+ xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
56
+ multivalue: true,
57
+ format: Helpers::IsoToSolrFormat::SPATIAL_DISPLAY
58
+ },
59
+ spatial: {
60
+ xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
61
+ multivalue: true,
62
+ format: Helpers::IsoToSolrFormat::SPATIAL_INDEX
63
+ },
64
+ spatial_area: {
65
+ xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
66
+ multivalue: false,
67
+ reduce: Helpers::IsoToSolrFormat::MAX_SPATIAL_AREA,
68
+ format: Helpers::IsoToSolrFormat::SPATIAL_AREA
69
+ },
70
+ temporal_coverages: {
71
+ xpaths: ['.//gmd:EX_Extent[@id="temporalExtent"]'],
72
+ multivalue: false,
73
+ format: Helpers::R2RFormat::TEMPORAL_DISPLAY_STRING
74
+ },
75
+ temporal_duration: {
76
+ xpaths: ['.//gmd:EX_Extent[@id="temporalExtent"]'],
77
+ multivalue: false,
78
+ reduce: Helpers::SolrFormat::REDUCE_TEMPORAL_DURATION,
79
+ format: Helpers::R2RFormat::TEMPORAL_DURATION
80
+ },
81
+ temporal: {
82
+ xpaths: ['.//gmd:EX_Extent[@id="temporalExtent"]'],
83
+ multivalue: false,
84
+ format: Helpers::R2RFormat::TEMPORAL_INDEX_STRING
85
+ },
86
+ sensors: {
87
+ xpaths: ['.//gmi:acquisitionInformation/gmi:MI_AcquisitionInformation/gmi:instrument/gmi:MI_Instrument/gmi:type/gmx:Anchor'],
88
+ multivalue: true
89
+ },
90
+ source: {
91
+ xpaths: [''],
92
+ default_values: ['ADE'],
93
+ multivalue: false
94
+ },
95
+ facet_data_center: {
96
+ xpaths: [''],
97
+ default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:R2R][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:R2R][:short_name]}"],
98
+ multivalue: false
99
+ },
100
+ facet_spatial_scope: {
101
+ xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
102
+ multivalue: true,
103
+ format: Helpers::IsoToSolrFormat::FACET_SPATIAL_SCOPE
104
+ },
105
+ facet_temporal_duration: {
106
+ xpaths: ['.//gmd:EX_Extent[@id="temporalExtent"]'],
107
+ default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
108
+ format: Helpers::R2RFormat::FACET_TEMPORAL_DURATION,
109
+ multivalue: true
110
+ }
111
+ }
112
+ end
113
+ end
@@ -1,3 +1,3 @@
1
1
  module SearchSolrTools
2
- VERSION = '3.1.2'
2
+ VERSION = '3.1.3.pre2'
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: search_solr_tools
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.2
4
+ version: 3.1.3.pre2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chris Chalstrom
@@ -12,7 +12,7 @@ authors:
12
12
  autorequire:
13
13
  bindir: bin
14
14
  cert_chain: []
15
- date: 2015-06-30 00:00:00.000000000 Z
15
+ date: 2015-07-01 00:00:00.000000000 Z
16
16
  dependencies:
17
17
  - !ruby/object:Gem::Dependency
18
18
  name: iso8601
@@ -288,6 +288,7 @@ files:
288
288
  - lib/search_solr_tools/harvesters/nsidc_json.rb
289
289
  - lib/search_solr_tools/harvesters/oai.rb
290
290
  - lib/search_solr_tools/harvesters/pdc.rb
291
+ - lib/search_solr_tools/harvesters/r2r.rb
291
292
  - lib/search_solr_tools/harvesters/rda.rb
292
293
  - lib/search_solr_tools/harvesters/tdar.rb
293
294
  - lib/search_solr_tools/harvesters/usgs.rb
@@ -298,6 +299,7 @@ files:
298
299
  - lib/search_solr_tools/helpers/iso_to_solr.rb
299
300
  - lib/search_solr_tools/helpers/iso_to_solr_format.rb
300
301
  - lib/search_solr_tools/helpers/query_builder.rb
302
+ - lib/search_solr_tools/helpers/r2r_format.rb
301
303
  - lib/search_solr_tools/helpers/selectors.rb
302
304
  - lib/search_solr_tools/helpers/solr_format.rb
303
305
  - lib/search_solr_tools/helpers/tdar_format.rb
@@ -310,6 +312,7 @@ files:
310
312
  - lib/search_solr_tools/selectors/nmi.rb
311
313
  - lib/search_solr_tools/selectors/nodc_iso.rb
312
314
  - lib/search_solr_tools/selectors/pdc_iso.rb
315
+ - lib/search_solr_tools/selectors/r2r.rb
313
316
  - lib/search_solr_tools/selectors/rda.rb
314
317
  - lib/search_solr_tools/selectors/tdar_opensearch.rb
315
318
  - lib/search_solr_tools/selectors/usgs_iso.rb
@@ -333,9 +336,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
333
336
  version: '2.0'
334
337
  required_rubygems_version: !ruby/object:Gem::Requirement
335
338
  requirements:
336
- - - ">="
339
+ - - ">"
337
340
  - !ruby/object:Gem::Version
338
- version: '0'
341
+ version: 1.3.1
339
342
  requirements: []
340
343
  rubyforge_project:
341
344
  rubygems_version: 2.4.8