search_solr_tools 3.2.1 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: fbf4232d65a295d48197fa78817b2b3df3e7a2ca
4
- data.tar.gz: e1b6ae300079b1bcec58ad3641eef3ed579db01d
3
+ metadata.gz: da4660a6d133afe9408e57daf6f0cce758f5fea9
4
+ data.tar.gz: e6a60e1711fa6e7c8ee115321f8270207d2faaeb
5
5
  SHA512:
6
- metadata.gz: c8e8d5366ad07f0f6a9bf86349217f6bb008ee1a968b869663e656cc68cae00d3ee63a3796eb1c183ceb345dd67ff7d7e28afa7e84e5099c9f81994a43c25495
7
- data.tar.gz: 5ecc17ebe35cd7b55fd97338b6f2a471193942ca2bfcb7a5f5008dbf6a45bbbbb1cd0eb5e58df03083e17e94d83744ff2e73bc9bb018cccfe60c1faa2d5fb2e3
6
+ metadata.gz: d9893af295f321132cb507d59c1d29d39981c2303dc7784fc62a20f05646d95b57536bb15d772810e3529631f71a56ffb8d5f190f5d40371405e7cc3b538ee18
7
+ data.tar.gz: 1cc1addbfd33a0aca1561e68007681ed240dfc28e783cff394cacaadc60b1046261c933dbea6faf9b304eb837c718b2dc809f87e199dfe0652812fd52aae73a9
data/CHANGELOG.md CHANGED
@@ -1,4 +1,15 @@
1
- ## v3.2.1 (2015-9-23)
1
+ ## v3.3.0
2
+
3
+ New Features
4
+
5
+ - Add harvest support for
6
+ [NOAA Paleoclimatology Data Center (NOAA Paleo)](https://www.ncdc.noaa.gov/data-access/paleoclimatology-data/datasets).
7
+
8
+ - Add harvest support for
9
+ [Data Observation Network for Earth (Data ONE)](https://www.dataone.org/).
10
+ [Pivotal 77763710](https://www.pivotaltracker.com/story/show/77763710)
11
+
12
+ ## v3.2.1 (2015-09-23)
2
13
 
3
14
  Bugfixes
4
15
 
@@ -63,13 +63,16 @@ class SolrHarvestCLI < Thor
63
63
  end
64
64
 
65
65
  no_tasks do
66
+ # rubocop: disable MethodLength
66
67
  def harvester_map
67
68
  {
68
69
  'bco_dmo' => SearchSolrTools::Harvesters::BcoDmo,
69
70
  'cisl' => SearchSolrTools::Harvesters::Cisl,
71
+ 'data_one' => SearchSolrTools::Harvesters::DataOne,
70
72
  'echo' => SearchSolrTools::Harvesters::Echo,
71
73
  'eol' => SearchSolrTools::Harvesters::Eol,
72
74
  'ices' => SearchSolrTools::Harvesters::Ices,
75
+ 'ncdc_paleo' => SearchSolrTools::Harvesters::NcdcPaleo,
73
76
  'nmi' => SearchSolrTools::Harvesters::Nmi,
74
77
  'nodc' => SearchSolrTools::Harvesters::Nodc,
75
78
  'r2r' => SearchSolrTools::Harvesters::R2R,
@@ -82,6 +85,7 @@ class SolrHarvestCLI < Thor
82
85
  'ade_auto_suggest' => SearchSolrTools::Harvesters::AdeAutoSuggest
83
86
  }
84
87
  end
88
+ # rubocop: enable MethodLength
85
89
 
86
90
  def get_harvester_class(data_center_name)
87
91
  name = data_center_name.downcase.to_s
@@ -5,8 +5,10 @@
5
5
  :port: 8983
6
6
  :bcodmo_url: http://www.bco-dmo.org/nsidc/arctic-deployments.json
7
7
  :cisl_url: https://www.aoncadis.org/oai/repository
8
+ :data_one_url: https://cn.dataone.org/cn/v1/query/solr/select?q=northBoundCoord:%5B45.0%20TO%2090.0%5D
8
9
  :echo_url: https://api.echo.nasa.gov/catalog-rest/echo_catalog/datasets.echo10
9
10
  :ices_url: http://geo.ices.dk/geonetwork/srv/en/csw
11
+ :ncdc_paleo_url: http://gis.ncdc.noaa.gov/gptpaleo/csw
10
12
  :nmi_url: http://access.met.no/metamod/oai
11
13
  :nodc_url: http://data.nodc.noaa.gov/geoportal/csw
12
14
  :pdc_url: http://www.polardata.ca/oai/provider
@@ -38,7 +40,7 @@
38
40
  :oai_url: http://liquid.colorado.edu:11580/api/dataset/2/oai?verb=ListRecords&metadata_prefix=iso
39
41
 
40
42
  :dev:
41
- host: dev.search-solr.apps.int.nsidc.org
43
+ :host: dev.search-solr.apps.int.nsidc.org
42
44
  :nsidc_dataset_metadata_url: http://integration.nsidc.org/api/dataset/metadata/
43
45
  :nsidc_oai_identifiers_url: http://integration.nsidc.org/api/dataset/metadata/oai?verb=ListIdentifiers&metadata_prefix=iso
44
46
  :oai_url: http://liquid.colorado.edu:11580/api/dataset/2/oai?verb=ListRecords&metadata_prefix=iso
@@ -0,0 +1,47 @@
1
+ module SearchSolrTools
2
+ module Harvesters
3
+ class DataOne < Base
4
+ def initialize(env = 'development', die_on_failure = false)
5
+ super
6
+ @page_size = 250
7
+ @translator = Helpers::IsoToSolr.new :data_one
8
+ end
9
+
10
+ def harvest_and_delete
11
+ puts "Running harvest of dataONE catalog from #{metadata_url}"
12
+ super(method(:harvest_data_one_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:DATA_ONE][:long_name]}\"")
13
+ end
14
+
15
+ def harvest_data_one_into_solr
16
+ start = 0
17
+ while (entries = get_results_from_data_one(start)) && (entries.length > 0)
18
+ begin
19
+ insert_solr_docs(get_docs_with_translated_entries_from_data_one(entries))
20
+ rescue => e
21
+ puts "ERROR: #{e}\n\n"
22
+ raise e if @die_on_failure
23
+ end
24
+ start += @page_size
25
+ end
26
+ end
27
+
28
+ def get_results_from_data_one(start)
29
+ get_results(build_request(start, @page_size), './response/result/doc')
30
+ end
31
+
32
+ def metadata_url
33
+ SolrEnvironments[@environment][:data_one_url]
34
+ end
35
+
36
+ def get_docs_with_translated_entries_from_data_one(entries)
37
+ entries.map do |e|
38
+ create_new_solr_add_doc_with_child(@translator.translate(e).root)
39
+ end
40
+ end
41
+
42
+ def build_request(start = 0, max_records = 100)
43
+ "#{metadata_url}&start=#{start}&rows=#{max_records}"
44
+ end
45
+ end
46
+ end
47
+ end
@@ -47,7 +47,8 @@ module SearchSolrTools
47
47
  'resultType' => resultType,
48
48
  'maxRecords' => maxRecords,
49
49
  'startPosition' => startPosition,
50
- 'constraintLanguage' => 'CQL_TEXT'
50
+ 'constraintLanguage' => 'CQL_TEXT',
51
+ 'outputSchema' => 'http://www.isotc211.org/2005/gmd'
51
52
  )
52
53
  end
53
54
  end
@@ -0,0 +1,60 @@
1
+ module SearchSolrTools
2
+ module Harvesters
3
+ # Harvests data from NODC PALEO and inserts it into Solr after it has been translated
4
+ class NcdcPaleo < Base
5
+ def initialize(env = 'development', die_on_failure = false)
6
+ super env, die_on_failure
7
+ @page_size = 50
8
+ @translator = Helpers::IsoToSolr.new :ncdc_paleo
9
+ end
10
+
11
+ def harvest_and_delete
12
+ puts "Running harvest of NCDC Paleo catalog from #{ncdc_paleo_url}"
13
+ super(method(:harvest_ncdc_paleo_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:NCDC_PALEO][:long_name]}\"")
14
+ end
15
+
16
+ def harvest_ncdc_paleo_into_solr
17
+ start_index = 1
18
+ while (entries = get_results_from_ncdc_paleo_url(start_index)) && (entries.length > 0)
19
+ begin
20
+ insert_solr_docs get_docs_with_translated_entries_from_ncdc_paleo(entries)
21
+ rescue => e
22
+ puts "ERROR: #{e}"
23
+ raise e if @die_on_failure
24
+ end
25
+ start_index += @page_size
26
+ end
27
+ end
28
+
29
+ def ncdc_paleo_url
30
+ SolrEnvironments[@environment][:ncdc_paleo_url]
31
+ end
32
+
33
+ def get_results_from_ncdc_paleo_url(start_index)
34
+ get_results build_csw_request('results', @page_size, start_index), '//csw:Record'
35
+ end
36
+
37
+ def get_docs_with_translated_entries_from_ncdc_paleo(entries)
38
+ auth_ids = entries.map { |e| e.xpath("./dc:identifier[@scheme='urn:x-esri:specification:ServiceType:ArcIMS:Metadata:DocID']").text }
39
+
40
+ auth_ids.map do |record|
41
+ result_xml = get_results("http://gis.ncdc.noaa.gov/gptpaleo/csw?getxml=#{record}",
42
+ '/rdf:RDF/rdf:Description').first
43
+ solr_doc = create_new_solr_add_doc_with_child(@translator.translate(result_xml).root)
44
+ insert_node = solr_doc.at_xpath('//doc')
45
+ insert_node.add_child("<field name='authoritative_id'>#{record}</field>")
46
+ insert_node.add_child("<field name='dataset_url'>http://gis.ncdc.noaa.gov/gptpaleo/catalog/search/resource/details.page?uuid=#{record}")
47
+ solr_doc.root
48
+ end
49
+ end
50
+
51
+ def build_csw_request(resultType = 'results', maxRecords = '1000', startPosition = '1')
52
+ Helpers::CswIsoQueryBuilder.get_query_string(ncdc_paleo_url,
53
+ 'resultType' => resultType,
54
+ 'maxRecords' => maxRecords,
55
+ 'startPosition' => startPosition
56
+ )
57
+ end
58
+ end
59
+ end
60
+ end
@@ -47,7 +47,8 @@ module SearchSolrTools
47
47
  'resultType' => resultType,
48
48
  'maxRecords' => maxRecords,
49
49
  'startPosition' => startPosition,
50
- 'constraint' => bbox_constraint
50
+ 'constraint' => bbox_constraint,
51
+ 'outputSchema' => 'http://www.isotc211.org/2005/gmd'
51
52
  )
52
53
  end
53
54
 
@@ -48,8 +48,8 @@ module SearchSolrTools
48
48
  'maxRecords' => maxRecords,
49
49
  'startPosition' => startPosition,
50
50
  'TypeNames' => '',
51
- 'constraint' => bbox_constraint
52
-
51
+ 'constraint' => bbox_constraint,
52
+ 'outputSchema' => 'http://www.isotc211.org/2005/gmd'
53
53
  )
54
54
  end
55
55
 
@@ -13,8 +13,7 @@ module SearchSolrTools
13
13
  'resultType' => 'results',
14
14
  'outputFormat' => 'application/xml',
15
15
  'maxRecords' => '25',
16
- 'startPosition' => '1',
17
- 'outputSchema' => 'http://www.isotc211.org/2005/gmd'
16
+ 'startPosition' => '1'
18
17
  }
19
18
 
20
19
  def self.get_query_string(url, query_params = {})
@@ -0,0 +1,74 @@
1
+ require_relative './iso_namespaces'
2
+ require_relative './iso_to_solr_format'
3
+ require_relative './solr_format'
4
+
5
+ module SearchSolrTools
6
+ module Helpers
7
+ class DataOneFormat < IsoToSolrFormat
8
+ class << self
9
+ def date_range(node)
10
+ {
11
+ start: SolrFormat.date_str(node.xpath('.//date[@name="beginDate"]').text.strip),
12
+ end: SolrFormat.date_str(node.xpath('.//date[@name="endDate"]').text.strip)
13
+ }
14
+ end
15
+
16
+ def bounding_box(node)
17
+ {
18
+ north: node.xpath('.//float[@name="northBoundCoord"]').text.strip,
19
+ south: node.xpath('.//float[@name="southBoundCoord"]').text.strip,
20
+ east: node.xpath('.//float[@name="eastBoundCoord"]').text.strip,
21
+ west: node.xpath('.//float[@name="westBoundCoord"]').text.strip
22
+ }
23
+ end
24
+
25
+ def spatial_display(node)
26
+ box = bounding_box(node)
27
+
28
+ [box[:south], box[:west], box[:north], box[:east]].join(' ')
29
+ end
30
+
31
+ def spatial_index(node)
32
+ box = bounding_box(node)
33
+
34
+ if box[:west] == box[:east] && box[:south] == box[:north]
35
+ [box[:west], box[:south]]
36
+ else
37
+ [box[:west], box[:south], box[:east], box[:north]]
38
+ end.join(' ')
39
+ end
40
+
41
+ def spatial_area(node)
42
+ box = bounding_box(node)
43
+
44
+ box[:north].to_f - box[:south].to_f
45
+ end
46
+
47
+ def temporal_coverage(node)
48
+ SolrFormat.temporal_display_str(date_range(node))
49
+ end
50
+
51
+ def temporal_duration(node)
52
+ dr = date_range(node)
53
+ end_time = dr[:end].to_s.empty? ? Time.now : Time.parse(dr[:end])
54
+ SolrFormat.get_temporal_duration(Time.parse(dr[:start]), end_time) unless dr[:start].to_s.empty?
55
+ end
56
+
57
+ def temporal_index_string(node)
58
+ dr = date_range(node)
59
+ SolrFormat.temporal_index_str(dr)
60
+ end
61
+
62
+ def facet_spatial_scope(node)
63
+ box = bounding_box(node)
64
+ SolrFormat.get_spatial_scope_facet_with_bounding_box(box)
65
+ end
66
+
67
+ def facet_temporal_duration(node)
68
+ duration = temporal_duration(node)
69
+ SolrFormat.get_temporal_duration_facet(duration)
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
@@ -21,6 +21,7 @@ module SearchSolrTools
21
21
  'gss' => 'http://www.isotc211.org/2005/gss',
22
22
  'gts' => 'http://www.isotc211.org/2005/gts',
23
23
  'oai' => 'http://www.openarchives.org/OAI/2.0/',
24
+ 'rdf' => 'http://www.w3.org/TR/REC-rdf-syntax',
24
25
  'srv' => 'http://www.isotc211.org/2005/srv',
25
26
  'xlink' => 'http://www.w3.org/1999/xlink',
26
27
  'xsi' => 'http://www.w3.org/2001/XMLSchema-instance'
@@ -77,9 +77,8 @@ module SearchSolrTools
77
77
 
78
78
  def self.get_temporal_duration(temporal_node)
79
79
  dr = date_range(temporal_node)
80
- dr[:end].to_s.empty? ? end_time = Time.now : end_time = Time.parse(dr[:end])
81
- dr[:start].to_s.empty? ? duration = nil : duration = SolrFormat.get_temporal_duration(Time.parse(dr[:start]), end_time)
82
- duration
80
+ end_time = dr[:end].to_s.empty? ? Time.now : Time.parse(dr[:end])
81
+ SolrFormat.get_temporal_duration(Time.parse(dr[:start]), end_time) unless dr[:start].to_s.empty?
83
82
  end
84
83
 
85
84
  def self.get_temporal_duration_facet(temporal_node)
@@ -0,0 +1,61 @@
1
+ require 'date'
2
+
3
+ require_relative './iso_namespaces'
4
+ require_relative './solr_format'
5
+ require_relative './iso_to_solr_format'
6
+
7
+ module SearchSolrTools
8
+ module Helpers
9
+ class NcdcPaleoFormat < IsoToSolrFormat
10
+ def self.bounding_box(node)
11
+ east, north = node.xpath('./ows:UpperCorner').text.split
12
+ west, south = node.xpath('./ows:LowerCorner').text.split
13
+ { north: north, south: south, east: east, west: west }
14
+ end
15
+
16
+ def self.date_range(node, _formatted = false)
17
+ if node.text.include?('START YEAR')
18
+ if node.text.include?('AD')
19
+ format_ad_time(node.text)
20
+ elsif node.text.include?('yr BP')
21
+ format_cal_yr_bp_time(node.text)
22
+ end
23
+ end
24
+ end
25
+
26
+ def self.format_ad_time(node_text)
27
+ match = node_text.match(/START YEAR:(?<start>[^*]*)AD\s*\* END YEAR:(?<end>[^*]*)AD/)
28
+ {
29
+ start: DateTime.strptime(match[:start].strip, '%Y'),
30
+ end: DateTime.strptime(match[:end].strip, '%Y')
31
+ }
32
+ end
33
+
34
+ def self.format_cal_yr_bp_time(node_text)
35
+ zero_year = 1950
36
+ match = node_text.match(/START YEAR:(?<start>[^*]*)... yr BP\s*\* END YEAR:(?<end>[^*]*)... yr BP/)
37
+ {
38
+ start: DateTime.strptime((-(match[:start].strip.to_i) - zero_year).to_s, '%Y'),
39
+ end: DateTime.strptime((-(match[:end].strip.to_i) - zero_year).to_s, '%Y')
40
+ }
41
+ end
42
+
43
+ def self.temporal_index_str(node)
44
+ range = date_range(node)
45
+ SolrFormat.temporal_index_str(start: range[:start].to_s, end: range[:end].to_s) unless range.nil?
46
+ end
47
+
48
+ def self.get_temporal_duration(node)
49
+ range = date_range(node)
50
+ return if range.empty?
51
+ (range[:start] - range[:end]).to_i.abs
52
+ end
53
+
54
+ def self.author(node)
55
+ return node if node == ''
56
+ return if node.text.include? ';'
57
+ node.text
58
+ end
59
+ end
60
+ end
61
+ end
@@ -6,16 +6,18 @@ module SearchSolrTools
6
6
  # This hash grabs all the selector files inside the selectors directory,
7
7
  # to add a new source we need to create a selector file and add it to this hash.
8
8
  SELECTORS = {
9
- cisl: Selectors::CISL,
10
- echo: Selectors::ECHO,
11
- ices: Selectors::ICES,
12
- nmi: Selectors::NMI,
13
- nodc: Selectors::NODC,
14
- pdc: Selectors::PDC,
15
- r2r: Selectors::R2R,
16
- rda: Selectors::RDA,
17
- tdar: Selectors::TDAR,
18
- usgs: Selectors::USGS
9
+ cisl: Selectors::CISL,
10
+ data_one: Selectors::DATA_ONE,
11
+ echo: Selectors::ECHO,
12
+ ices: Selectors::ICES,
13
+ nmi: Selectors::NMI,
14
+ ncdc_paleo: Selectors::NCDC_PALEO,
15
+ nodc: Selectors::NODC,
16
+ pdc: Selectors::PDC,
17
+ r2r: Selectors::R2R,
18
+ rda: Selectors::RDA,
19
+ tdar: Selectors::TDAR,
20
+ usgs: Selectors::USGS
19
21
  }
20
22
  end
21
23
  end
@@ -11,9 +11,11 @@ module SearchSolrTools
11
11
  DATA_CENTER_NAMES = {
12
12
  BCODMO: { short_name: 'BCO-DMO', long_name: 'Biological and Chemical Oceanography Data Management Office' },
13
13
  CISL: { short_name: 'ACADIS Gateway', long_name: 'Advanced Cooperative Arctic Data and Information Service' },
14
+ DATA_ONE: { short_name: 'DataONE', long_name: 'DataONE' },
14
15
  ECHO: { short_name: 'NASA ECHO', long_name: 'NASA Earth Observing System (EOS) Clearing House (ECHO)' },
15
16
  EOL: { short_name: 'UCAR NCAR EOL', long_name: 'UCAR NCAR - Earth Observing Laboratory' },
16
17
  ICES: { short_name: 'ICES', long_name: 'International Council for the Exploration of the Sea' },
18
+ NCDC_PALEO: { short_name: 'NOAA WDS Paleo', long_name: 'NOAA’s National Centers for Environmental Information, World Data Service for Paleoclimatology' },
17
19
  NMI: { short_name: 'Met.no', long_name: 'Norwegian Meteorological Institute' },
18
20
  NODC: { short_name: 'NOAA NODC', long_name: 'NOAA National Oceanographic Data Center' },
19
21
  NSIDC: { short_name: 'NSIDC', long_name: 'National Snow and Ice Data Center' },
@@ -0,0 +1,95 @@
1
+ require 'search_solr_tools'
2
+
3
+ module SearchSolrTools
4
+ module Selectors
5
+ DATA_ONE = {
6
+ authoritative_id: {
7
+ xpaths: ['.//str[@name="id"]'],
8
+ multivalue: false
9
+ },
10
+ title: {
11
+ xpaths: ['.//str[@name="title"]'],
12
+ multivalue: false
13
+ },
14
+ summary: {
15
+ xpaths: ['.//str[@name="abstract"]'],
16
+ multivalue: false
17
+ },
18
+ data_centers: {
19
+ xpaths: [''],
20
+ default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:DATA_ONE][:long_name]],
21
+ multivalue: false
22
+ },
23
+ authors: {
24
+ xpaths: ['.//str[@name="author"]'],
25
+ multivalue: false
26
+ },
27
+ keywords: {
28
+ xpaths: ['.//arr[@name="keywords"]/str'],
29
+ multivalue: true
30
+ },
31
+ last_revision_date: {
32
+ xpaths: ['.//date[@name="updateDate"]'],
33
+ default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
34
+ multivalue: false,
35
+ format: Helpers::SolrFormat::DATE
36
+ },
37
+ dataset_url: {
38
+ xpaths: ['.//str[@name="dataUrl"]'],
39
+ default_values: [''],
40
+ multivalue: false
41
+ },
42
+ spatial_coverages: {
43
+ xpaths: ['.'],
44
+ multivalue: false,
45
+ format: Helpers::DataOneFormat.method(:spatial_display)
46
+ },
47
+ spatial: {
48
+ xpaths: ['.'],
49
+ multivalue: false,
50
+ format: Helpers::DataOneFormat.method(:spatial_index)
51
+ },
52
+ spatial_area: {
53
+ xpaths: ['.'],
54
+ multivalue: false,
55
+ format: Helpers::DataOneFormat.method(:spatial_area)
56
+ },
57
+ temporal_coverages: {
58
+ xpaths: ['.'],
59
+ multivalue: false,
60
+ format: Helpers::DataOneFormat.method(:temporal_coverage)
61
+ },
62
+ temporal_duration: {
63
+ xpaths: ['.'],
64
+ multivalue: false,
65
+ format: Helpers::DataOneFormat.method(:temporal_duration)
66
+ },
67
+ temporal: {
68
+ xpaths: ['.'],
69
+ multivalue: false,
70
+ format: Helpers::DataOneFormat.method(:temporal_index_string)
71
+ },
72
+ source: {
73
+ xpaths: [''],
74
+ default_values: ['ADE'],
75
+ multivalue: false
76
+ },
77
+ facet_data_center: {
78
+ xpaths: [''],
79
+ default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:DATA_ONE][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:DATA_ONE][:short_name]}"],
80
+ multivalue: false
81
+ },
82
+ facet_spatial_scope: {
83
+ xpaths: ['.'],
84
+ multivalue: false,
85
+ format: Helpers::DataOneFormat.method(:facet_spatial_scope)
86
+ },
87
+ facet_temporal_duration: {
88
+ xpaths: ['.'],
89
+ default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
90
+ format: Helpers::DataOneFormat.method(:facet_temporal_duration),
91
+ multivalue: false
92
+ }
93
+ }
94
+ end
95
+ end
@@ -43,7 +43,7 @@ module SearchSolrTools
43
43
  dataset_url: {
44
44
  xpaths: ['.//gmd:fileIdentifier/gco:CharacterString'],
45
45
  multivalue: false,
46
- format: Helpers::IsoToSolrFormat:: ICES_DATASET_URL
46
+ format: Helpers::IsoToSolrFormat::ICES_DATASET_URL
47
47
  },
48
48
  spatial_coverages: {
49
49
  xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
@@ -0,0 +1,89 @@
1
+ require 'search_solr_tools'
2
+
3
+ module SearchSolrTools
4
+ module Selectors
5
+ NCDC_PALEO = {
6
+ title: {
7
+ xpaths: ['/rdf:RDF/rdf:Description/dc:title'],
8
+ multivalue: false
9
+ },
10
+ summary: {
11
+ xpaths: ['/rdf:RDF/rdf:Description/dc:description'],
12
+ multivalue: false
13
+ },
14
+ data_centers: {
15
+ xpaths: [''],
16
+ default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:NCDC_PALEO][:long_name]],
17
+ multivalue: false
18
+ },
19
+ authors: {
20
+ xpaths: ['/rdf:RDF/rdf:Description/dc:creator'],
21
+ multivalue: true,
22
+ format: Helpers::NcdcPaleoFormat.method(:author)
23
+ },
24
+ keywords: {
25
+ xpaths: ['/rdf:RDF/rdf:Description/dc:subject'],
26
+ multivalue: true
27
+ },
28
+ last_revision_date: {
29
+ xpaths: ['/rdf:RDF/rdf:Description/dc:date'],
30
+ default_values: [''], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
31
+ multivalue: false,
32
+ format: Helpers::SolrFormat::DATE
33
+ },
34
+ spatial_coverages: {
35
+ xpaths: ['/rdf:RDF/rdf:Description/ows:WGS84BoundingBox'],
36
+ multivalue: true,
37
+ format: Helpers::NcdcPaleoFormat.method(:spatial_display_str)
38
+ },
39
+ spatial: {
40
+ xpaths: ['/rdf:RDF/rdf:Description/ows:WGS84BoundingBox'],
41
+ multivalue: true,
42
+ format: Helpers::NcdcPaleoFormat.method(:spatial_index_str)
43
+ },
44
+ spatial_area: {
45
+ xpaths: ['/rdf:RDF/rdf:Description/ows:WGS84BoundingBox'],
46
+ multivalue: false,
47
+ reduce: Helpers::NcdcPaleoFormat.method(:get_max_spatial_area),
48
+ format: Helpers::NcdcPaleoFormat.method(:spatial_area_str)
49
+ },
50
+ temporal: {
51
+ xpaths: ['/rdf:RDF/rdf:Description/dc:coverage'],
52
+ multivalue: true,
53
+ format: Helpers::NcdcPaleoFormat.method(:temporal_index_str)
54
+ },
55
+ temporal_coverages: {
56
+ xpaths: ['/rdf:RDF/rdf:Description/dc:coverage'],
57
+ multivalue: true,
58
+ format: Helpers::NcdcPaleoFormat.method(:temporal_display_str)
59
+ },
60
+ temporal_duration: {
61
+ xpaths: ['/rdf:RDF/rdf:Description/dc:coverage'],
62
+ multivalue: false,
63
+ reduce: Helpers::SolrFormat::REDUCE_TEMPORAL_DURATION,
64
+ format: Helpers::NcdcPaleoFormat.method(:get_temporal_duration)
65
+ },
66
+ source: {
67
+ xpaths: [''],
68
+ default_values: ['ADE'],
69
+ multivalue: false
70
+ },
71
+ facet_data_center: {
72
+ xpaths: [''],
73
+ default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:NCDC_PALEO][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:NCDC_PALEO][:short_name]}"],
74
+ multivalue: false
75
+ },
76
+ facet_spatial_scope: {
77
+ xpaths: ['/rdf:RDF/rdf:Description/ows:WGS84BoundingBox'],
78
+ multivalue: true,
79
+ format: Helpers::NcdcPaleoFormat.method(:get_spatial_scope_facet)
80
+ },
81
+ facet_temporal_duration: {
82
+ xpaths: ['/rdf:RDF/rdf:Description/dc:coverage'],
83
+ default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
84
+ format: Helpers::NcdcPaleoFormat.method(:get_temporal_duration_facet),
85
+ multivalue: true
86
+ }
87
+ }
88
+ end
89
+ end
@@ -1,3 +1,3 @@
1
1
  module SearchSolrTools
2
- VERSION = '3.2.1'
2
+ VERSION = '3.3.0'
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: search_solr_tools
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.2.1
4
+ version: 3.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chris Chalstrom
@@ -12,7 +12,7 @@ authors:
12
12
  autorequire:
13
13
  bindir: bin
14
14
  cert_chain: []
15
- date: 2015-09-23 00:00:00.000000000 Z
15
+ date: 2015-09-24 00:00:00.000000000 Z
16
16
  dependencies:
17
17
  - !ruby/object:Gem::Dependency
18
18
  name: iso8601
@@ -279,9 +279,11 @@ files:
279
279
  - lib/search_solr_tools/harvesters/base.rb
280
280
  - lib/search_solr_tools/harvesters/bcodmo.rb
281
281
  - lib/search_solr_tools/harvesters/cisl.rb
282
+ - lib/search_solr_tools/harvesters/data_one.rb
282
283
  - lib/search_solr_tools/harvesters/echo.rb
283
284
  - lib/search_solr_tools/harvesters/eol.rb
284
285
  - lib/search_solr_tools/harvesters/ices.rb
286
+ - lib/search_solr_tools/harvesters/ncdc_paleo.rb
285
287
  - lib/search_solr_tools/harvesters/nmi.rb
286
288
  - lib/search_solr_tools/harvesters/nodc.rb
287
289
  - lib/search_solr_tools/harvesters/nsidc_auto_suggest.rb
@@ -294,10 +296,12 @@ files:
294
296
  - lib/search_solr_tools/harvesters/usgs.rb
295
297
  - lib/search_solr_tools/helpers/bounding_box_util.rb
296
298
  - lib/search_solr_tools/helpers/csw_iso_query_builder.rb
299
+ - lib/search_solr_tools/helpers/data_one_format.rb
297
300
  - lib/search_solr_tools/helpers/facet_configuration.rb
298
301
  - lib/search_solr_tools/helpers/iso_namespaces.rb
299
302
  - lib/search_solr_tools/helpers/iso_to_solr.rb
300
303
  - lib/search_solr_tools/helpers/iso_to_solr_format.rb
304
+ - lib/search_solr_tools/helpers/ncdc_paleo_format.rb
301
305
  - lib/search_solr_tools/helpers/query_builder.rb
302
306
  - lib/search_solr_tools/helpers/r2r_format.rb
303
307
  - lib/search_solr_tools/helpers/selectors.rb
@@ -307,8 +311,10 @@ files:
307
311
  - lib/search_solr_tools/helpers/translate_temporal_coverage.rb
308
312
  - lib/search_solr_tools/helpers/usgs_format.rb
309
313
  - lib/search_solr_tools/selectors/cisl.rb
314
+ - lib/search_solr_tools/selectors/data_one.rb
310
315
  - lib/search_solr_tools/selectors/echo_iso.rb
311
316
  - lib/search_solr_tools/selectors/ices_iso.rb
317
+ - lib/search_solr_tools/selectors/ncdc_paleo.rb
312
318
  - lib/search_solr_tools/selectors/nmi.rb
313
319
  - lib/search_solr_tools/selectors/nodc_iso.rb
314
320
  - lib/search_solr_tools/selectors/pdc_iso.rb
@@ -341,7 +347,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
341
347
  version: '0'
342
348
  requirements: []
343
349
  rubyforge_project:
344
- rubygems_version: 2.4.6
350
+ rubygems_version: 2.4.8
345
351
  signing_key:
346
352
  specification_version: 4
347
353
  summary: Tools to harvest and manage various scientific dataset feeds in a Solr instance.