search_solr_tools 3.2.1 → 3.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: fbf4232d65a295d48197fa78817b2b3df3e7a2ca
4
- data.tar.gz: e1b6ae300079b1bcec58ad3641eef3ed579db01d
3
+ metadata.gz: da4660a6d133afe9408e57daf6f0cce758f5fea9
4
+ data.tar.gz: e6a60e1711fa6e7c8ee115321f8270207d2faaeb
5
5
  SHA512:
6
- metadata.gz: c8e8d5366ad07f0f6a9bf86349217f6bb008ee1a968b869663e656cc68cae00d3ee63a3796eb1c183ceb345dd67ff7d7e28afa7e84e5099c9f81994a43c25495
7
- data.tar.gz: 5ecc17ebe35cd7b55fd97338b6f2a471193942ca2bfcb7a5f5008dbf6a45bbbbb1cd0eb5e58df03083e17e94d83744ff2e73bc9bb018cccfe60c1faa2d5fb2e3
6
+ metadata.gz: d9893af295f321132cb507d59c1d29d39981c2303dc7784fc62a20f05646d95b57536bb15d772810e3529631f71a56ffb8d5f190f5d40371405e7cc3b538ee18
7
+ data.tar.gz: 1cc1addbfd33a0aca1561e68007681ed240dfc28e783cff394cacaadc60b1046261c933dbea6faf9b304eb837c718b2dc809f87e199dfe0652812fd52aae73a9
data/CHANGELOG.md CHANGED
@@ -1,4 +1,15 @@
1
- ## v3.2.1 (2015-9-23)
1
+ ## v3.3.0
2
+
3
+ New Features
4
+
5
+ - Add harvest support for
6
+ [NOAA Paleoclimatology Data Center (NOAA Paleo)](https://www.ncdc.noaa.gov/data-access/paleoclimatology-data/datasets).
7
+
8
+ - Add harvest support for
9
+ [Data Observation Network for Earth (Data ONE)](https://www.dataone.org/).
10
+ [Pivotal 77763710](https://www.pivotaltracker.com/story/show/77763710)
11
+
12
+ ## v3.2.1 (2015-09-23)
2
13
 
3
14
  Bugfixes
4
15
 
@@ -63,13 +63,16 @@ class SolrHarvestCLI < Thor
63
63
  end
64
64
 
65
65
  no_tasks do
66
+ # rubocop: disable MethodLength
66
67
  def harvester_map
67
68
  {
68
69
  'bco_dmo' => SearchSolrTools::Harvesters::BcoDmo,
69
70
  'cisl' => SearchSolrTools::Harvesters::Cisl,
71
+ 'data_one' => SearchSolrTools::Harvesters::DataOne,
70
72
  'echo' => SearchSolrTools::Harvesters::Echo,
71
73
  'eol' => SearchSolrTools::Harvesters::Eol,
72
74
  'ices' => SearchSolrTools::Harvesters::Ices,
75
+ 'ncdc_paleo' => SearchSolrTools::Harvesters::NcdcPaleo,
73
76
  'nmi' => SearchSolrTools::Harvesters::Nmi,
74
77
  'nodc' => SearchSolrTools::Harvesters::Nodc,
75
78
  'r2r' => SearchSolrTools::Harvesters::R2R,
@@ -82,6 +85,7 @@ class SolrHarvestCLI < Thor
82
85
  'ade_auto_suggest' => SearchSolrTools::Harvesters::AdeAutoSuggest
83
86
  }
84
87
  end
88
+ # rubocop: enable MethodLength
85
89
 
86
90
  def get_harvester_class(data_center_name)
87
91
  name = data_center_name.downcase.to_s
@@ -5,8 +5,10 @@
5
5
  :port: 8983
6
6
  :bcodmo_url: http://www.bco-dmo.org/nsidc/arctic-deployments.json
7
7
  :cisl_url: https://www.aoncadis.org/oai/repository
8
+ :data_one_url: https://cn.dataone.org/cn/v1/query/solr/select?q=northBoundCoord:%5B45.0%20TO%2090.0%5D
8
9
  :echo_url: https://api.echo.nasa.gov/catalog-rest/echo_catalog/datasets.echo10
9
10
  :ices_url: http://geo.ices.dk/geonetwork/srv/en/csw
11
+ :ncdc_paleo_url: http://gis.ncdc.noaa.gov/gptpaleo/csw
10
12
  :nmi_url: http://access.met.no/metamod/oai
11
13
  :nodc_url: http://data.nodc.noaa.gov/geoportal/csw
12
14
  :pdc_url: http://www.polardata.ca/oai/provider
@@ -38,7 +40,7 @@
38
40
  :oai_url: http://liquid.colorado.edu:11580/api/dataset/2/oai?verb=ListRecords&metadata_prefix=iso
39
41
 
40
42
  :dev:
41
- host: dev.search-solr.apps.int.nsidc.org
43
+ :host: dev.search-solr.apps.int.nsidc.org
42
44
  :nsidc_dataset_metadata_url: http://integration.nsidc.org/api/dataset/metadata/
43
45
  :nsidc_oai_identifiers_url: http://integration.nsidc.org/api/dataset/metadata/oai?verb=ListIdentifiers&metadata_prefix=iso
44
46
  :oai_url: http://liquid.colorado.edu:11580/api/dataset/2/oai?verb=ListRecords&metadata_prefix=iso
@@ -0,0 +1,47 @@
1
+ module SearchSolrTools
2
+ module Harvesters
3
+ class DataOne < Base
4
+ def initialize(env = 'development', die_on_failure = false)
5
+ super
6
+ @page_size = 250
7
+ @translator = Helpers::IsoToSolr.new :data_one
8
+ end
9
+
10
+ def harvest_and_delete
11
+ puts "Running harvest of dataONE catalog from #{metadata_url}"
12
+ super(method(:harvest_data_one_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:DATA_ONE][:long_name]}\"")
13
+ end
14
+
15
+ def harvest_data_one_into_solr
16
+ start = 0
17
+ while (entries = get_results_from_data_one(start)) && (entries.length > 0)
18
+ begin
19
+ insert_solr_docs(get_docs_with_translated_entries_from_data_one(entries))
20
+ rescue => e
21
+ puts "ERROR: #{e}\n\n"
22
+ raise e if @die_on_failure
23
+ end
24
+ start += @page_size
25
+ end
26
+ end
27
+
28
+ def get_results_from_data_one(start)
29
+ get_results(build_request(start, @page_size), './response/result/doc')
30
+ end
31
+
32
+ def metadata_url
33
+ SolrEnvironments[@environment][:data_one_url]
34
+ end
35
+
36
+ def get_docs_with_translated_entries_from_data_one(entries)
37
+ entries.map do |e|
38
+ create_new_solr_add_doc_with_child(@translator.translate(e).root)
39
+ end
40
+ end
41
+
42
+ def build_request(start = 0, max_records = 100)
43
+ "#{metadata_url}&start=#{start}&rows=#{max_records}"
44
+ end
45
+ end
46
+ end
47
+ end
@@ -47,7 +47,8 @@ module SearchSolrTools
47
47
  'resultType' => resultType,
48
48
  'maxRecords' => maxRecords,
49
49
  'startPosition' => startPosition,
50
- 'constraintLanguage' => 'CQL_TEXT'
50
+ 'constraintLanguage' => 'CQL_TEXT',
51
+ 'outputSchema' => 'http://www.isotc211.org/2005/gmd'
51
52
  )
52
53
  end
53
54
  end
@@ -0,0 +1,60 @@
1
+ module SearchSolrTools
2
+ module Harvesters
3
+ # Harvests data from NODC PALEO and inserts it into Solr after it has been translated
4
+ class NcdcPaleo < Base
5
+ def initialize(env = 'development', die_on_failure = false)
6
+ super env, die_on_failure
7
+ @page_size = 50
8
+ @translator = Helpers::IsoToSolr.new :ncdc_paleo
9
+ end
10
+
11
+ def harvest_and_delete
12
+ puts "Running harvest of NCDC Paleo catalog from #{ncdc_paleo_url}"
13
+ super(method(:harvest_ncdc_paleo_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:NCDC_PALEO][:long_name]}\"")
14
+ end
15
+
16
+ def harvest_ncdc_paleo_into_solr
17
+ start_index = 1
18
+ while (entries = get_results_from_ncdc_paleo_url(start_index)) && (entries.length > 0)
19
+ begin
20
+ insert_solr_docs get_docs_with_translated_entries_from_ncdc_paleo(entries)
21
+ rescue => e
22
+ puts "ERROR: #{e}"
23
+ raise e if @die_on_failure
24
+ end
25
+ start_index += @page_size
26
+ end
27
+ end
28
+
29
+ def ncdc_paleo_url
30
+ SolrEnvironments[@environment][:ncdc_paleo_url]
31
+ end
32
+
33
+ def get_results_from_ncdc_paleo_url(start_index)
34
+ get_results build_csw_request('results', @page_size, start_index), '//csw:Record'
35
+ end
36
+
37
+ def get_docs_with_translated_entries_from_ncdc_paleo(entries)
38
+ auth_ids = entries.map { |e| e.xpath("./dc:identifier[@scheme='urn:x-esri:specification:ServiceType:ArcIMS:Metadata:DocID']").text }
39
+
40
+ auth_ids.map do |record|
41
+ result_xml = get_results("http://gis.ncdc.noaa.gov/gptpaleo/csw?getxml=#{record}",
42
+ '/rdf:RDF/rdf:Description').first
43
+ solr_doc = create_new_solr_add_doc_with_child(@translator.translate(result_xml).root)
44
+ insert_node = solr_doc.at_xpath('//doc')
45
+ insert_node.add_child("<field name='authoritative_id'>#{record}</field>")
46
+ insert_node.add_child("<field name='dataset_url'>http://gis.ncdc.noaa.gov/gptpaleo/catalog/search/resource/details.page?uuid=#{record}")
47
+ solr_doc.root
48
+ end
49
+ end
50
+
51
+ def build_csw_request(resultType = 'results', maxRecords = '1000', startPosition = '1')
52
+ Helpers::CswIsoQueryBuilder.get_query_string(ncdc_paleo_url,
53
+ 'resultType' => resultType,
54
+ 'maxRecords' => maxRecords,
55
+ 'startPosition' => startPosition
56
+ )
57
+ end
58
+ end
59
+ end
60
+ end
@@ -47,7 +47,8 @@ module SearchSolrTools
47
47
  'resultType' => resultType,
48
48
  'maxRecords' => maxRecords,
49
49
  'startPosition' => startPosition,
50
- 'constraint' => bbox_constraint
50
+ 'constraint' => bbox_constraint,
51
+ 'outputSchema' => 'http://www.isotc211.org/2005/gmd'
51
52
  )
52
53
  end
53
54
 
@@ -48,8 +48,8 @@ module SearchSolrTools
48
48
  'maxRecords' => maxRecords,
49
49
  'startPosition' => startPosition,
50
50
  'TypeNames' => '',
51
- 'constraint' => bbox_constraint
52
-
51
+ 'constraint' => bbox_constraint,
52
+ 'outputSchema' => 'http://www.isotc211.org/2005/gmd'
53
53
  )
54
54
  end
55
55
 
@@ -13,8 +13,7 @@ module SearchSolrTools
13
13
  'resultType' => 'results',
14
14
  'outputFormat' => 'application/xml',
15
15
  'maxRecords' => '25',
16
- 'startPosition' => '1',
17
- 'outputSchema' => 'http://www.isotc211.org/2005/gmd'
16
+ 'startPosition' => '1'
18
17
  }
19
18
 
20
19
  def self.get_query_string(url, query_params = {})
@@ -0,0 +1,74 @@
1
+ require_relative './iso_namespaces'
2
+ require_relative './iso_to_solr_format'
3
+ require_relative './solr_format'
4
+
5
+ module SearchSolrTools
6
+ module Helpers
7
+ class DataOneFormat < IsoToSolrFormat
8
+ class << self
9
+ def date_range(node)
10
+ {
11
+ start: SolrFormat.date_str(node.xpath('.//date[@name="beginDate"]').text.strip),
12
+ end: SolrFormat.date_str(node.xpath('.//date[@name="endDate"]').text.strip)
13
+ }
14
+ end
15
+
16
+ def bounding_box(node)
17
+ {
18
+ north: node.xpath('.//float[@name="northBoundCoord"]').text.strip,
19
+ south: node.xpath('.//float[@name="southBoundCoord"]').text.strip,
20
+ east: node.xpath('.//float[@name="eastBoundCoord"]').text.strip,
21
+ west: node.xpath('.//float[@name="westBoundCoord"]').text.strip
22
+ }
23
+ end
24
+
25
+ def spatial_display(node)
26
+ box = bounding_box(node)
27
+
28
+ [box[:south], box[:west], box[:north], box[:east]].join(' ')
29
+ end
30
+
31
+ def spatial_index(node)
32
+ box = bounding_box(node)
33
+
34
+ if box[:west] == box[:east] && box[:south] == box[:north]
35
+ [box[:west], box[:south]]
36
+ else
37
+ [box[:west], box[:south], box[:east], box[:north]]
38
+ end.join(' ')
39
+ end
40
+
41
+ def spatial_area(node)
42
+ box = bounding_box(node)
43
+
44
+ box[:north].to_f - box[:south].to_f
45
+ end
46
+
47
+ def temporal_coverage(node)
48
+ SolrFormat.temporal_display_str(date_range(node))
49
+ end
50
+
51
+ def temporal_duration(node)
52
+ dr = date_range(node)
53
+ end_time = dr[:end].to_s.empty? ? Time.now : Time.parse(dr[:end])
54
+ SolrFormat.get_temporal_duration(Time.parse(dr[:start]), end_time) unless dr[:start].to_s.empty?
55
+ end
56
+
57
+ def temporal_index_string(node)
58
+ dr = date_range(node)
59
+ SolrFormat.temporal_index_str(dr)
60
+ end
61
+
62
+ def facet_spatial_scope(node)
63
+ box = bounding_box(node)
64
+ SolrFormat.get_spatial_scope_facet_with_bounding_box(box)
65
+ end
66
+
67
+ def facet_temporal_duration(node)
68
+ duration = temporal_duration(node)
69
+ SolrFormat.get_temporal_duration_facet(duration)
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
@@ -21,6 +21,7 @@ module SearchSolrTools
21
21
  'gss' => 'http://www.isotc211.org/2005/gss',
22
22
  'gts' => 'http://www.isotc211.org/2005/gts',
23
23
  'oai' => 'http://www.openarchives.org/OAI/2.0/',
24
+ 'rdf' => 'http://www.w3.org/TR/REC-rdf-syntax',
24
25
  'srv' => 'http://www.isotc211.org/2005/srv',
25
26
  'xlink' => 'http://www.w3.org/1999/xlink',
26
27
  'xsi' => 'http://www.w3.org/2001/XMLSchema-instance'
@@ -77,9 +77,8 @@ module SearchSolrTools
77
77
 
78
78
  def self.get_temporal_duration(temporal_node)
79
79
  dr = date_range(temporal_node)
80
- dr[:end].to_s.empty? ? end_time = Time.now : end_time = Time.parse(dr[:end])
81
- dr[:start].to_s.empty? ? duration = nil : duration = SolrFormat.get_temporal_duration(Time.parse(dr[:start]), end_time)
82
- duration
80
+ end_time = dr[:end].to_s.empty? ? Time.now : Time.parse(dr[:end])
81
+ SolrFormat.get_temporal_duration(Time.parse(dr[:start]), end_time) unless dr[:start].to_s.empty?
83
82
  end
84
83
 
85
84
  def self.get_temporal_duration_facet(temporal_node)
@@ -0,0 +1,61 @@
1
+ require 'date'
2
+
3
+ require_relative './iso_namespaces'
4
+ require_relative './solr_format'
5
+ require_relative './iso_to_solr_format'
6
+
7
+ module SearchSolrTools
8
+ module Helpers
9
+ class NcdcPaleoFormat < IsoToSolrFormat
10
+ def self.bounding_box(node)
11
+ east, north = node.xpath('./ows:UpperCorner').text.split
12
+ west, south = node.xpath('./ows:LowerCorner').text.split
13
+ { north: north, south: south, east: east, west: west }
14
+ end
15
+
16
+ def self.date_range(node, _formatted = false)
17
+ if node.text.include?('START YEAR')
18
+ if node.text.include?('AD')
19
+ format_ad_time(node.text)
20
+ elsif node.text.include?('yr BP')
21
+ format_cal_yr_bp_time(node.text)
22
+ end
23
+ end
24
+ end
25
+
26
+ def self.format_ad_time(node_text)
27
+ match = node_text.match(/START YEAR:(?<start>[^*]*)AD\s*\* END YEAR:(?<end>[^*]*)AD/)
28
+ {
29
+ start: DateTime.strptime(match[:start].strip, '%Y'),
30
+ end: DateTime.strptime(match[:end].strip, '%Y')
31
+ }
32
+ end
33
+
34
+ def self.format_cal_yr_bp_time(node_text)
35
+ zero_year = 1950
36
+ match = node_text.match(/START YEAR:(?<start>[^*]*)... yr BP\s*\* END YEAR:(?<end>[^*]*)... yr BP/)
37
+ {
38
+ start: DateTime.strptime((-(match[:start].strip.to_i) - zero_year).to_s, '%Y'),
39
+ end: DateTime.strptime((-(match[:end].strip.to_i) - zero_year).to_s, '%Y')
40
+ }
41
+ end
42
+
43
+ def self.temporal_index_str(node)
44
+ range = date_range(node)
45
+ SolrFormat.temporal_index_str(start: range[:start].to_s, end: range[:end].to_s) unless range.nil?
46
+ end
47
+
48
+ def self.get_temporal_duration(node)
49
+ range = date_range(node)
50
+ return if range.empty?
51
+ (range[:start] - range[:end]).to_i.abs
52
+ end
53
+
54
+ def self.author(node)
55
+ return node if node == ''
56
+ return if node.text.include? ';'
57
+ node.text
58
+ end
59
+ end
60
+ end
61
+ end
@@ -6,16 +6,18 @@ module SearchSolrTools
6
6
  # This hash grabs all the selector files inside the selectors directory,
7
7
  # to add a new source we need to create a selector file and add it to this hash.
8
8
  SELECTORS = {
9
- cisl: Selectors::CISL,
10
- echo: Selectors::ECHO,
11
- ices: Selectors::ICES,
12
- nmi: Selectors::NMI,
13
- nodc: Selectors::NODC,
14
- pdc: Selectors::PDC,
15
- r2r: Selectors::R2R,
16
- rda: Selectors::RDA,
17
- tdar: Selectors::TDAR,
18
- usgs: Selectors::USGS
9
+ cisl: Selectors::CISL,
10
+ data_one: Selectors::DATA_ONE,
11
+ echo: Selectors::ECHO,
12
+ ices: Selectors::ICES,
13
+ nmi: Selectors::NMI,
14
+ ncdc_paleo: Selectors::NCDC_PALEO,
15
+ nodc: Selectors::NODC,
16
+ pdc: Selectors::PDC,
17
+ r2r: Selectors::R2R,
18
+ rda: Selectors::RDA,
19
+ tdar: Selectors::TDAR,
20
+ usgs: Selectors::USGS
19
21
  }
20
22
  end
21
23
  end
@@ -11,9 +11,11 @@ module SearchSolrTools
11
11
  DATA_CENTER_NAMES = {
12
12
  BCODMO: { short_name: 'BCO-DMO', long_name: 'Biological and Chemical Oceanography Data Management Office' },
13
13
  CISL: { short_name: 'ACADIS Gateway', long_name: 'Advanced Cooperative Arctic Data and Information Service' },
14
+ DATA_ONE: { short_name: 'DataONE', long_name: 'DataONE' },
14
15
  ECHO: { short_name: 'NASA ECHO', long_name: 'NASA Earth Observing System (EOS) Clearing House (ECHO)' },
15
16
  EOL: { short_name: 'UCAR NCAR EOL', long_name: 'UCAR NCAR - Earth Observing Laboratory' },
16
17
  ICES: { short_name: 'ICES', long_name: 'International Council for the Exploration of the Sea' },
18
+ NCDC_PALEO: { short_name: 'NOAA WDS Paleo', long_name: 'NOAA’s National Centers for Environmental Information, World Data Service for Paleoclimatology' },
17
19
  NMI: { short_name: 'Met.no', long_name: 'Norwegian Meteorological Institute' },
18
20
  NODC: { short_name: 'NOAA NODC', long_name: 'NOAA National Oceanographic Data Center' },
19
21
  NSIDC: { short_name: 'NSIDC', long_name: 'National Snow and Ice Data Center' },
@@ -0,0 +1,95 @@
1
+ require 'search_solr_tools'
2
+
3
+ module SearchSolrTools
4
+ module Selectors
5
+ DATA_ONE = {
6
+ authoritative_id: {
7
+ xpaths: ['.//str[@name="id"]'],
8
+ multivalue: false
9
+ },
10
+ title: {
11
+ xpaths: ['.//str[@name="title"]'],
12
+ multivalue: false
13
+ },
14
+ summary: {
15
+ xpaths: ['.//str[@name="abstract"]'],
16
+ multivalue: false
17
+ },
18
+ data_centers: {
19
+ xpaths: [''],
20
+ default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:DATA_ONE][:long_name]],
21
+ multivalue: false
22
+ },
23
+ authors: {
24
+ xpaths: ['.//str[@name="author"]'],
25
+ multivalue: false
26
+ },
27
+ keywords: {
28
+ xpaths: ['.//arr[@name="keywords"]/str'],
29
+ multivalue: true
30
+ },
31
+ last_revision_date: {
32
+ xpaths: ['.//date[@name="updateDate"]'],
33
+ default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
34
+ multivalue: false,
35
+ format: Helpers::SolrFormat::DATE
36
+ },
37
+ dataset_url: {
38
+ xpaths: ['.//str[@name="dataUrl"]'],
39
+ default_values: [''],
40
+ multivalue: false
41
+ },
42
+ spatial_coverages: {
43
+ xpaths: ['.'],
44
+ multivalue: false,
45
+ format: Helpers::DataOneFormat.method(:spatial_display)
46
+ },
47
+ spatial: {
48
+ xpaths: ['.'],
49
+ multivalue: false,
50
+ format: Helpers::DataOneFormat.method(:spatial_index)
51
+ },
52
+ spatial_area: {
53
+ xpaths: ['.'],
54
+ multivalue: false,
55
+ format: Helpers::DataOneFormat.method(:spatial_area)
56
+ },
57
+ temporal_coverages: {
58
+ xpaths: ['.'],
59
+ multivalue: false,
60
+ format: Helpers::DataOneFormat.method(:temporal_coverage)
61
+ },
62
+ temporal_duration: {
63
+ xpaths: ['.'],
64
+ multivalue: false,
65
+ format: Helpers::DataOneFormat.method(:temporal_duration)
66
+ },
67
+ temporal: {
68
+ xpaths: ['.'],
69
+ multivalue: false,
70
+ format: Helpers::DataOneFormat.method(:temporal_index_string)
71
+ },
72
+ source: {
73
+ xpaths: [''],
74
+ default_values: ['ADE'],
75
+ multivalue: false
76
+ },
77
+ facet_data_center: {
78
+ xpaths: [''],
79
+ default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:DATA_ONE][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:DATA_ONE][:short_name]}"],
80
+ multivalue: false
81
+ },
82
+ facet_spatial_scope: {
83
+ xpaths: ['.'],
84
+ multivalue: false,
85
+ format: Helpers::DataOneFormat.method(:facet_spatial_scope)
86
+ },
87
+ facet_temporal_duration: {
88
+ xpaths: ['.'],
89
+ default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
90
+ format: Helpers::DataOneFormat.method(:facet_temporal_duration),
91
+ multivalue: false
92
+ }
93
+ }
94
+ end
95
+ end
@@ -43,7 +43,7 @@ module SearchSolrTools
43
43
  dataset_url: {
44
44
  xpaths: ['.//gmd:fileIdentifier/gco:CharacterString'],
45
45
  multivalue: false,
46
- format: Helpers::IsoToSolrFormat:: ICES_DATASET_URL
46
+ format: Helpers::IsoToSolrFormat::ICES_DATASET_URL
47
47
  },
48
48
  spatial_coverages: {
49
49
  xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
@@ -0,0 +1,89 @@
1
+ require 'search_solr_tools'
2
+
3
+ module SearchSolrTools
4
+ module Selectors
5
+ NCDC_PALEO = {
6
+ title: {
7
+ xpaths: ['/rdf:RDF/rdf:Description/dc:title'],
8
+ multivalue: false
9
+ },
10
+ summary: {
11
+ xpaths: ['/rdf:RDF/rdf:Description/dc:description'],
12
+ multivalue: false
13
+ },
14
+ data_centers: {
15
+ xpaths: [''],
16
+ default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:NCDC_PALEO][:long_name]],
17
+ multivalue: false
18
+ },
19
+ authors: {
20
+ xpaths: ['/rdf:RDF/rdf:Description/dc:creator'],
21
+ multivalue: true,
22
+ format: Helpers::NcdcPaleoFormat.method(:author)
23
+ },
24
+ keywords: {
25
+ xpaths: ['/rdf:RDF/rdf:Description/dc:subject'],
26
+ multivalue: true
27
+ },
28
+ last_revision_date: {
29
+ xpaths: ['/rdf:RDF/rdf:Description/dc:date'],
30
+ default_values: [''], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
31
+ multivalue: false,
32
+ format: Helpers::SolrFormat::DATE
33
+ },
34
+ spatial_coverages: {
35
+ xpaths: ['/rdf:RDF/rdf:Description/ows:WGS84BoundingBox'],
36
+ multivalue: true,
37
+ format: Helpers::NcdcPaleoFormat.method(:spatial_display_str)
38
+ },
39
+ spatial: {
40
+ xpaths: ['/rdf:RDF/rdf:Description/ows:WGS84BoundingBox'],
41
+ multivalue: true,
42
+ format: Helpers::NcdcPaleoFormat.method(:spatial_index_str)
43
+ },
44
+ spatial_area: {
45
+ xpaths: ['/rdf:RDF/rdf:Description/ows:WGS84BoundingBox'],
46
+ multivalue: false,
47
+ reduce: Helpers::NcdcPaleoFormat.method(:get_max_spatial_area),
48
+ format: Helpers::NcdcPaleoFormat.method(:spatial_area_str)
49
+ },
50
+ temporal: {
51
+ xpaths: ['/rdf:RDF/rdf:Description/dc:coverage'],
52
+ multivalue: true,
53
+ format: Helpers::NcdcPaleoFormat.method(:temporal_index_str)
54
+ },
55
+ temporal_coverages: {
56
+ xpaths: ['/rdf:RDF/rdf:Description/dc:coverage'],
57
+ multivalue: true,
58
+ format: Helpers::NcdcPaleoFormat.method(:temporal_display_str)
59
+ },
60
+ temporal_duration: {
61
+ xpaths: ['/rdf:RDF/rdf:Description/dc:coverage'],
62
+ multivalue: false,
63
+ reduce: Helpers::SolrFormat::REDUCE_TEMPORAL_DURATION,
64
+ format: Helpers::NcdcPaleoFormat.method(:get_temporal_duration)
65
+ },
66
+ source: {
67
+ xpaths: [''],
68
+ default_values: ['ADE'],
69
+ multivalue: false
70
+ },
71
+ facet_data_center: {
72
+ xpaths: [''],
73
+ default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:NCDC_PALEO][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:NCDC_PALEO][:short_name]}"],
74
+ multivalue: false
75
+ },
76
+ facet_spatial_scope: {
77
+ xpaths: ['/rdf:RDF/rdf:Description/ows:WGS84BoundingBox'],
78
+ multivalue: true,
79
+ format: Helpers::NcdcPaleoFormat.method(:get_spatial_scope_facet)
80
+ },
81
+ facet_temporal_duration: {
82
+ xpaths: ['/rdf:RDF/rdf:Description/dc:coverage'],
83
+ default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
84
+ format: Helpers::NcdcPaleoFormat.method(:get_temporal_duration_facet),
85
+ multivalue: true
86
+ }
87
+ }
88
+ end
89
+ end
@@ -1,3 +1,3 @@
1
1
  module SearchSolrTools
2
- VERSION = '3.2.1'
2
+ VERSION = '3.3.0'
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: search_solr_tools
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.2.1
4
+ version: 3.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chris Chalstrom
@@ -12,7 +12,7 @@ authors:
12
12
  autorequire:
13
13
  bindir: bin
14
14
  cert_chain: []
15
- date: 2015-09-23 00:00:00.000000000 Z
15
+ date: 2015-09-24 00:00:00.000000000 Z
16
16
  dependencies:
17
17
  - !ruby/object:Gem::Dependency
18
18
  name: iso8601
@@ -279,9 +279,11 @@ files:
279
279
  - lib/search_solr_tools/harvesters/base.rb
280
280
  - lib/search_solr_tools/harvesters/bcodmo.rb
281
281
  - lib/search_solr_tools/harvesters/cisl.rb
282
+ - lib/search_solr_tools/harvesters/data_one.rb
282
283
  - lib/search_solr_tools/harvesters/echo.rb
283
284
  - lib/search_solr_tools/harvesters/eol.rb
284
285
  - lib/search_solr_tools/harvesters/ices.rb
286
+ - lib/search_solr_tools/harvesters/ncdc_paleo.rb
285
287
  - lib/search_solr_tools/harvesters/nmi.rb
286
288
  - lib/search_solr_tools/harvesters/nodc.rb
287
289
  - lib/search_solr_tools/harvesters/nsidc_auto_suggest.rb
@@ -294,10 +296,12 @@ files:
294
296
  - lib/search_solr_tools/harvesters/usgs.rb
295
297
  - lib/search_solr_tools/helpers/bounding_box_util.rb
296
298
  - lib/search_solr_tools/helpers/csw_iso_query_builder.rb
299
+ - lib/search_solr_tools/helpers/data_one_format.rb
297
300
  - lib/search_solr_tools/helpers/facet_configuration.rb
298
301
  - lib/search_solr_tools/helpers/iso_namespaces.rb
299
302
  - lib/search_solr_tools/helpers/iso_to_solr.rb
300
303
  - lib/search_solr_tools/helpers/iso_to_solr_format.rb
304
+ - lib/search_solr_tools/helpers/ncdc_paleo_format.rb
301
305
  - lib/search_solr_tools/helpers/query_builder.rb
302
306
  - lib/search_solr_tools/helpers/r2r_format.rb
303
307
  - lib/search_solr_tools/helpers/selectors.rb
@@ -307,8 +311,10 @@ files:
307
311
  - lib/search_solr_tools/helpers/translate_temporal_coverage.rb
308
312
  - lib/search_solr_tools/helpers/usgs_format.rb
309
313
  - lib/search_solr_tools/selectors/cisl.rb
314
+ - lib/search_solr_tools/selectors/data_one.rb
310
315
  - lib/search_solr_tools/selectors/echo_iso.rb
311
316
  - lib/search_solr_tools/selectors/ices_iso.rb
317
+ - lib/search_solr_tools/selectors/ncdc_paleo.rb
312
318
  - lib/search_solr_tools/selectors/nmi.rb
313
319
  - lib/search_solr_tools/selectors/nodc_iso.rb
314
320
  - lib/search_solr_tools/selectors/pdc_iso.rb
@@ -341,7 +347,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
341
347
  version: '0'
342
348
  requirements: []
343
349
  rubyforge_project:
344
- rubygems_version: 2.4.6
350
+ rubygems_version: 2.4.8
345
351
  signing_key:
346
352
  specification_version: 4
347
353
  summary: Tools to harvest and manage various scientific dataset feeds in a Solr instance.