search_solr_tools 3.2.1 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -1
- data/bin/search_solr_tools +4 -0
- data/lib/search_solr_tools/config/environments.yaml +3 -1
- data/lib/search_solr_tools/harvesters/data_one.rb +47 -0
- data/lib/search_solr_tools/harvesters/ices.rb +2 -1
- data/lib/search_solr_tools/harvesters/ncdc_paleo.rb +60 -0
- data/lib/search_solr_tools/harvesters/nodc.rb +2 -1
- data/lib/search_solr_tools/harvesters/usgs.rb +2 -2
- data/lib/search_solr_tools/helpers/csw_iso_query_builder.rb +1 -2
- data/lib/search_solr_tools/helpers/data_one_format.rb +74 -0
- data/lib/search_solr_tools/helpers/iso_namespaces.rb +1 -0
- data/lib/search_solr_tools/helpers/iso_to_solr_format.rb +2 -3
- data/lib/search_solr_tools/helpers/ncdc_paleo_format.rb +61 -0
- data/lib/search_solr_tools/helpers/selectors.rb +12 -10
- data/lib/search_solr_tools/helpers/solr_format.rb +2 -0
- data/lib/search_solr_tools/selectors/data_one.rb +95 -0
- data/lib/search_solr_tools/selectors/ices_iso.rb +1 -1
- data/lib/search_solr_tools/selectors/ncdc_paleo.rb +89 -0
- data/lib/search_solr_tools/version.rb +1 -1
- metadata +9 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: da4660a6d133afe9408e57daf6f0cce758f5fea9
|
4
|
+
data.tar.gz: e6a60e1711fa6e7c8ee115321f8270207d2faaeb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d9893af295f321132cb507d59c1d29d39981c2303dc7784fc62a20f05646d95b57536bb15d772810e3529631f71a56ffb8d5f190f5d40371405e7cc3b538ee18
|
7
|
+
data.tar.gz: 1cc1addbfd33a0aca1561e68007681ed240dfc28e783cff394cacaadc60b1046261c933dbea6faf9b304eb837c718b2dc809f87e199dfe0652812fd52aae73a9
|
data/CHANGELOG.md
CHANGED
@@ -1,4 +1,15 @@
|
|
1
|
-
## v3.
|
1
|
+
## v3.3.0
|
2
|
+
|
3
|
+
New Features
|
4
|
+
|
5
|
+
- Add harvest support for
|
6
|
+
[NOAA Paleoclimatology Data Center (NOAA Paleo)](https://www.ncdc.noaa.gov/data-access/paleoclimatology-data/datasets).
|
7
|
+
|
8
|
+
- Add harvest support for
|
9
|
+
[Data Observation Network for Earth (Data ONE)](https://www.dataone.org/).
|
10
|
+
[Pivotal 77763710](https://www.pivotaltracker.com/story/show/77763710)
|
11
|
+
|
12
|
+
## v3.2.1 (2015-09-23)
|
2
13
|
|
3
14
|
Bugfixes
|
4
15
|
|
data/bin/search_solr_tools
CHANGED
@@ -63,13 +63,16 @@ class SolrHarvestCLI < Thor
|
|
63
63
|
end
|
64
64
|
|
65
65
|
no_tasks do
|
66
|
+
# rubocop: disable MethodLength
|
66
67
|
def harvester_map
|
67
68
|
{
|
68
69
|
'bco_dmo' => SearchSolrTools::Harvesters::BcoDmo,
|
69
70
|
'cisl' => SearchSolrTools::Harvesters::Cisl,
|
71
|
+
'data_one' => SearchSolrTools::Harvesters::DataOne,
|
70
72
|
'echo' => SearchSolrTools::Harvesters::Echo,
|
71
73
|
'eol' => SearchSolrTools::Harvesters::Eol,
|
72
74
|
'ices' => SearchSolrTools::Harvesters::Ices,
|
75
|
+
'ncdc_paleo' => SearchSolrTools::Harvesters::NcdcPaleo,
|
73
76
|
'nmi' => SearchSolrTools::Harvesters::Nmi,
|
74
77
|
'nodc' => SearchSolrTools::Harvesters::Nodc,
|
75
78
|
'r2r' => SearchSolrTools::Harvesters::R2R,
|
@@ -82,6 +85,7 @@ class SolrHarvestCLI < Thor
|
|
82
85
|
'ade_auto_suggest' => SearchSolrTools::Harvesters::AdeAutoSuggest
|
83
86
|
}
|
84
87
|
end
|
88
|
+
# rubocop: enable MethodLength
|
85
89
|
|
86
90
|
def get_harvester_class(data_center_name)
|
87
91
|
name = data_center_name.downcase.to_s
|
@@ -5,8 +5,10 @@
|
|
5
5
|
:port: 8983
|
6
6
|
:bcodmo_url: http://www.bco-dmo.org/nsidc/arctic-deployments.json
|
7
7
|
:cisl_url: https://www.aoncadis.org/oai/repository
|
8
|
+
:data_one_url: https://cn.dataone.org/cn/v1/query/solr/select?q=northBoundCoord:%5B45.0%20TO%2090.0%5D
|
8
9
|
:echo_url: https://api.echo.nasa.gov/catalog-rest/echo_catalog/datasets.echo10
|
9
10
|
:ices_url: http://geo.ices.dk/geonetwork/srv/en/csw
|
11
|
+
:ncdc_paleo_url: http://gis.ncdc.noaa.gov/gptpaleo/csw
|
10
12
|
:nmi_url: http://access.met.no/metamod/oai
|
11
13
|
:nodc_url: http://data.nodc.noaa.gov/geoportal/csw
|
12
14
|
:pdc_url: http://www.polardata.ca/oai/provider
|
@@ -38,7 +40,7 @@
|
|
38
40
|
:oai_url: http://liquid.colorado.edu:11580/api/dataset/2/oai?verb=ListRecords&metadata_prefix=iso
|
39
41
|
|
40
42
|
:dev:
|
41
|
-
host: dev.search-solr.apps.int.nsidc.org
|
43
|
+
:host: dev.search-solr.apps.int.nsidc.org
|
42
44
|
:nsidc_dataset_metadata_url: http://integration.nsidc.org/api/dataset/metadata/
|
43
45
|
:nsidc_oai_identifiers_url: http://integration.nsidc.org/api/dataset/metadata/oai?verb=ListIdentifiers&metadata_prefix=iso
|
44
46
|
:oai_url: http://liquid.colorado.edu:11580/api/dataset/2/oai?verb=ListRecords&metadata_prefix=iso
|
@@ -0,0 +1,47 @@
|
|
1
|
+
module SearchSolrTools
|
2
|
+
module Harvesters
|
3
|
+
class DataOne < Base
|
4
|
+
def initialize(env = 'development', die_on_failure = false)
|
5
|
+
super
|
6
|
+
@page_size = 250
|
7
|
+
@translator = Helpers::IsoToSolr.new :data_one
|
8
|
+
end
|
9
|
+
|
10
|
+
def harvest_and_delete
|
11
|
+
puts "Running harvest of dataONE catalog from #{metadata_url}"
|
12
|
+
super(method(:harvest_data_one_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:DATA_ONE][:long_name]}\"")
|
13
|
+
end
|
14
|
+
|
15
|
+
def harvest_data_one_into_solr
|
16
|
+
start = 0
|
17
|
+
while (entries = get_results_from_data_one(start)) && (entries.length > 0)
|
18
|
+
begin
|
19
|
+
insert_solr_docs(get_docs_with_translated_entries_from_data_one(entries))
|
20
|
+
rescue => e
|
21
|
+
puts "ERROR: #{e}\n\n"
|
22
|
+
raise e if @die_on_failure
|
23
|
+
end
|
24
|
+
start += @page_size
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def get_results_from_data_one(start)
|
29
|
+
get_results(build_request(start, @page_size), './response/result/doc')
|
30
|
+
end
|
31
|
+
|
32
|
+
def metadata_url
|
33
|
+
SolrEnvironments[@environment][:data_one_url]
|
34
|
+
end
|
35
|
+
|
36
|
+
def get_docs_with_translated_entries_from_data_one(entries)
|
37
|
+
entries.map do |e|
|
38
|
+
create_new_solr_add_doc_with_child(@translator.translate(e).root)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def build_request(start = 0, max_records = 100)
|
43
|
+
"#{metadata_url}&start=#{start}&rows=#{max_records}"
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -47,7 +47,8 @@ module SearchSolrTools
|
|
47
47
|
'resultType' => resultType,
|
48
48
|
'maxRecords' => maxRecords,
|
49
49
|
'startPosition' => startPosition,
|
50
|
-
'constraintLanguage' => 'CQL_TEXT'
|
50
|
+
'constraintLanguage' => 'CQL_TEXT',
|
51
|
+
'outputSchema' => 'http://www.isotc211.org/2005/gmd'
|
51
52
|
)
|
52
53
|
end
|
53
54
|
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
module SearchSolrTools
|
2
|
+
module Harvesters
|
3
|
+
# Harvests data from NODC PALEO and inserts it into Solr after it has been translated
|
4
|
+
class NcdcPaleo < Base
|
5
|
+
def initialize(env = 'development', die_on_failure = false)
|
6
|
+
super env, die_on_failure
|
7
|
+
@page_size = 50
|
8
|
+
@translator = Helpers::IsoToSolr.new :ncdc_paleo
|
9
|
+
end
|
10
|
+
|
11
|
+
def harvest_and_delete
|
12
|
+
puts "Running harvest of NCDC Paleo catalog from #{ncdc_paleo_url}"
|
13
|
+
super(method(:harvest_ncdc_paleo_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:NCDC_PALEO][:long_name]}\"")
|
14
|
+
end
|
15
|
+
|
16
|
+
def harvest_ncdc_paleo_into_solr
|
17
|
+
start_index = 1
|
18
|
+
while (entries = get_results_from_ncdc_paleo_url(start_index)) && (entries.length > 0)
|
19
|
+
begin
|
20
|
+
insert_solr_docs get_docs_with_translated_entries_from_ncdc_paleo(entries)
|
21
|
+
rescue => e
|
22
|
+
puts "ERROR: #{e}"
|
23
|
+
raise e if @die_on_failure
|
24
|
+
end
|
25
|
+
start_index += @page_size
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def ncdc_paleo_url
|
30
|
+
SolrEnvironments[@environment][:ncdc_paleo_url]
|
31
|
+
end
|
32
|
+
|
33
|
+
def get_results_from_ncdc_paleo_url(start_index)
|
34
|
+
get_results build_csw_request('results', @page_size, start_index), '//csw:Record'
|
35
|
+
end
|
36
|
+
|
37
|
+
def get_docs_with_translated_entries_from_ncdc_paleo(entries)
|
38
|
+
auth_ids = entries.map { |e| e.xpath("./dc:identifier[@scheme='urn:x-esri:specification:ServiceType:ArcIMS:Metadata:DocID']").text }
|
39
|
+
|
40
|
+
auth_ids.map do |record|
|
41
|
+
result_xml = get_results("http://gis.ncdc.noaa.gov/gptpaleo/csw?getxml=#{record}",
|
42
|
+
'/rdf:RDF/rdf:Description').first
|
43
|
+
solr_doc = create_new_solr_add_doc_with_child(@translator.translate(result_xml).root)
|
44
|
+
insert_node = solr_doc.at_xpath('//doc')
|
45
|
+
insert_node.add_child("<field name='authoritative_id'>#{record}</field>")
|
46
|
+
insert_node.add_child("<field name='dataset_url'>http://gis.ncdc.noaa.gov/gptpaleo/catalog/search/resource/details.page?uuid=#{record}")
|
47
|
+
solr_doc.root
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def build_csw_request(resultType = 'results', maxRecords = '1000', startPosition = '1')
|
52
|
+
Helpers::CswIsoQueryBuilder.get_query_string(ncdc_paleo_url,
|
53
|
+
'resultType' => resultType,
|
54
|
+
'maxRecords' => maxRecords,
|
55
|
+
'startPosition' => startPosition
|
56
|
+
)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -47,7 +47,8 @@ module SearchSolrTools
|
|
47
47
|
'resultType' => resultType,
|
48
48
|
'maxRecords' => maxRecords,
|
49
49
|
'startPosition' => startPosition,
|
50
|
-
'constraint' => bbox_constraint
|
50
|
+
'constraint' => bbox_constraint,
|
51
|
+
'outputSchema' => 'http://www.isotc211.org/2005/gmd'
|
51
52
|
)
|
52
53
|
end
|
53
54
|
|
@@ -48,8 +48,8 @@ module SearchSolrTools
|
|
48
48
|
'maxRecords' => maxRecords,
|
49
49
|
'startPosition' => startPosition,
|
50
50
|
'TypeNames' => '',
|
51
|
-
'constraint' => bbox_constraint
|
52
|
-
|
51
|
+
'constraint' => bbox_constraint,
|
52
|
+
'outputSchema' => 'http://www.isotc211.org/2005/gmd'
|
53
53
|
)
|
54
54
|
end
|
55
55
|
|
@@ -13,8 +13,7 @@ module SearchSolrTools
|
|
13
13
|
'resultType' => 'results',
|
14
14
|
'outputFormat' => 'application/xml',
|
15
15
|
'maxRecords' => '25',
|
16
|
-
'startPosition' => '1'
|
17
|
-
'outputSchema' => 'http://www.isotc211.org/2005/gmd'
|
16
|
+
'startPosition' => '1'
|
18
17
|
}
|
19
18
|
|
20
19
|
def self.get_query_string(url, query_params = {})
|
@@ -0,0 +1,74 @@
|
|
1
|
+
require_relative './iso_namespaces'
|
2
|
+
require_relative './iso_to_solr_format'
|
3
|
+
require_relative './solr_format'
|
4
|
+
|
5
|
+
module SearchSolrTools
|
6
|
+
module Helpers
|
7
|
+
class DataOneFormat < IsoToSolrFormat
|
8
|
+
class << self
|
9
|
+
def date_range(node)
|
10
|
+
{
|
11
|
+
start: SolrFormat.date_str(node.xpath('.//date[@name="beginDate"]').text.strip),
|
12
|
+
end: SolrFormat.date_str(node.xpath('.//date[@name="endDate"]').text.strip)
|
13
|
+
}
|
14
|
+
end
|
15
|
+
|
16
|
+
def bounding_box(node)
|
17
|
+
{
|
18
|
+
north: node.xpath('.//float[@name="northBoundCoord"]').text.strip,
|
19
|
+
south: node.xpath('.//float[@name="southBoundCoord"]').text.strip,
|
20
|
+
east: node.xpath('.//float[@name="eastBoundCoord"]').text.strip,
|
21
|
+
west: node.xpath('.//float[@name="westBoundCoord"]').text.strip
|
22
|
+
}
|
23
|
+
end
|
24
|
+
|
25
|
+
def spatial_display(node)
|
26
|
+
box = bounding_box(node)
|
27
|
+
|
28
|
+
[box[:south], box[:west], box[:north], box[:east]].join(' ')
|
29
|
+
end
|
30
|
+
|
31
|
+
def spatial_index(node)
|
32
|
+
box = bounding_box(node)
|
33
|
+
|
34
|
+
if box[:west] == box[:east] && box[:south] == box[:north]
|
35
|
+
[box[:west], box[:south]]
|
36
|
+
else
|
37
|
+
[box[:west], box[:south], box[:east], box[:north]]
|
38
|
+
end.join(' ')
|
39
|
+
end
|
40
|
+
|
41
|
+
def spatial_area(node)
|
42
|
+
box = bounding_box(node)
|
43
|
+
|
44
|
+
box[:north].to_f - box[:south].to_f
|
45
|
+
end
|
46
|
+
|
47
|
+
def temporal_coverage(node)
|
48
|
+
SolrFormat.temporal_display_str(date_range(node))
|
49
|
+
end
|
50
|
+
|
51
|
+
def temporal_duration(node)
|
52
|
+
dr = date_range(node)
|
53
|
+
end_time = dr[:end].to_s.empty? ? Time.now : Time.parse(dr[:end])
|
54
|
+
SolrFormat.get_temporal_duration(Time.parse(dr[:start]), end_time) unless dr[:start].to_s.empty?
|
55
|
+
end
|
56
|
+
|
57
|
+
def temporal_index_string(node)
|
58
|
+
dr = date_range(node)
|
59
|
+
SolrFormat.temporal_index_str(dr)
|
60
|
+
end
|
61
|
+
|
62
|
+
def facet_spatial_scope(node)
|
63
|
+
box = bounding_box(node)
|
64
|
+
SolrFormat.get_spatial_scope_facet_with_bounding_box(box)
|
65
|
+
end
|
66
|
+
|
67
|
+
def facet_temporal_duration(node)
|
68
|
+
duration = temporal_duration(node)
|
69
|
+
SolrFormat.get_temporal_duration_facet(duration)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
@@ -21,6 +21,7 @@ module SearchSolrTools
|
|
21
21
|
'gss' => 'http://www.isotc211.org/2005/gss',
|
22
22
|
'gts' => 'http://www.isotc211.org/2005/gts',
|
23
23
|
'oai' => 'http://www.openarchives.org/OAI/2.0/',
|
24
|
+
'rdf' => 'http://www.w3.org/TR/REC-rdf-syntax',
|
24
25
|
'srv' => 'http://www.isotc211.org/2005/srv',
|
25
26
|
'xlink' => 'http://www.w3.org/1999/xlink',
|
26
27
|
'xsi' => 'http://www.w3.org/2001/XMLSchema-instance'
|
@@ -77,9 +77,8 @@ module SearchSolrTools
|
|
77
77
|
|
78
78
|
def self.get_temporal_duration(temporal_node)
|
79
79
|
dr = date_range(temporal_node)
|
80
|
-
dr[:end].to_s.empty? ?
|
81
|
-
|
82
|
-
duration
|
80
|
+
end_time = dr[:end].to_s.empty? ? Time.now : Time.parse(dr[:end])
|
81
|
+
SolrFormat.get_temporal_duration(Time.parse(dr[:start]), end_time) unless dr[:start].to_s.empty?
|
83
82
|
end
|
84
83
|
|
85
84
|
def self.get_temporal_duration_facet(temporal_node)
|
@@ -0,0 +1,61 @@
|
|
1
|
+
require 'date'
|
2
|
+
|
3
|
+
require_relative './iso_namespaces'
|
4
|
+
require_relative './solr_format'
|
5
|
+
require_relative './iso_to_solr_format'
|
6
|
+
|
7
|
+
module SearchSolrTools
|
8
|
+
module Helpers
|
9
|
+
class NcdcPaleoFormat < IsoToSolrFormat
|
10
|
+
def self.bounding_box(node)
|
11
|
+
east, north = node.xpath('./ows:UpperCorner').text.split
|
12
|
+
west, south = node.xpath('./ows:LowerCorner').text.split
|
13
|
+
{ north: north, south: south, east: east, west: west }
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.date_range(node, _formatted = false)
|
17
|
+
if node.text.include?('START YEAR')
|
18
|
+
if node.text.include?('AD')
|
19
|
+
format_ad_time(node.text)
|
20
|
+
elsif node.text.include?('yr BP')
|
21
|
+
format_cal_yr_bp_time(node.text)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.format_ad_time(node_text)
|
27
|
+
match = node_text.match(/START YEAR:(?<start>[^*]*)AD\s*\* END YEAR:(?<end>[^*]*)AD/)
|
28
|
+
{
|
29
|
+
start: DateTime.strptime(match[:start].strip, '%Y'),
|
30
|
+
end: DateTime.strptime(match[:end].strip, '%Y')
|
31
|
+
}
|
32
|
+
end
|
33
|
+
|
34
|
+
def self.format_cal_yr_bp_time(node_text)
|
35
|
+
zero_year = 1950
|
36
|
+
match = node_text.match(/START YEAR:(?<start>[^*]*)... yr BP\s*\* END YEAR:(?<end>[^*]*)... yr BP/)
|
37
|
+
{
|
38
|
+
start: DateTime.strptime((-(match[:start].strip.to_i) - zero_year).to_s, '%Y'),
|
39
|
+
end: DateTime.strptime((-(match[:end].strip.to_i) - zero_year).to_s, '%Y')
|
40
|
+
}
|
41
|
+
end
|
42
|
+
|
43
|
+
def self.temporal_index_str(node)
|
44
|
+
range = date_range(node)
|
45
|
+
SolrFormat.temporal_index_str(start: range[:start].to_s, end: range[:end].to_s) unless range.nil?
|
46
|
+
end
|
47
|
+
|
48
|
+
def self.get_temporal_duration(node)
|
49
|
+
range = date_range(node)
|
50
|
+
return if range.empty?
|
51
|
+
(range[:start] - range[:end]).to_i.abs
|
52
|
+
end
|
53
|
+
|
54
|
+
def self.author(node)
|
55
|
+
return node if node == ''
|
56
|
+
return if node.text.include? ';'
|
57
|
+
node.text
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -6,16 +6,18 @@ module SearchSolrTools
|
|
6
6
|
# This hash grabs all the selector files inside the selectors directory,
|
7
7
|
# to add a new source we need to create a selector file and add it to this hash.
|
8
8
|
SELECTORS = {
|
9
|
-
cisl:
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
9
|
+
cisl: Selectors::CISL,
|
10
|
+
data_one: Selectors::DATA_ONE,
|
11
|
+
echo: Selectors::ECHO,
|
12
|
+
ices: Selectors::ICES,
|
13
|
+
nmi: Selectors::NMI,
|
14
|
+
ncdc_paleo: Selectors::NCDC_PALEO,
|
15
|
+
nodc: Selectors::NODC,
|
16
|
+
pdc: Selectors::PDC,
|
17
|
+
r2r: Selectors::R2R,
|
18
|
+
rda: Selectors::RDA,
|
19
|
+
tdar: Selectors::TDAR,
|
20
|
+
usgs: Selectors::USGS
|
19
21
|
}
|
20
22
|
end
|
21
23
|
end
|
@@ -11,9 +11,11 @@ module SearchSolrTools
|
|
11
11
|
DATA_CENTER_NAMES = {
|
12
12
|
BCODMO: { short_name: 'BCO-DMO', long_name: 'Biological and Chemical Oceanography Data Management Office' },
|
13
13
|
CISL: { short_name: 'ACADIS Gateway', long_name: 'Advanced Cooperative Arctic Data and Information Service' },
|
14
|
+
DATA_ONE: { short_name: 'DataONE', long_name: 'DataONE' },
|
14
15
|
ECHO: { short_name: 'NASA ECHO', long_name: 'NASA Earth Observing System (EOS) Clearing House (ECHO)' },
|
15
16
|
EOL: { short_name: 'UCAR NCAR EOL', long_name: 'UCAR NCAR - Earth Observing Laboratory' },
|
16
17
|
ICES: { short_name: 'ICES', long_name: 'International Council for the Exploration of the Sea' },
|
18
|
+
NCDC_PALEO: { short_name: 'NOAA WDS Paleo', long_name: 'NOAA’s National Centers for Environmental Information, World Data Service for Paleoclimatology' },
|
17
19
|
NMI: { short_name: 'Met.no', long_name: 'Norwegian Meteorological Institute' },
|
18
20
|
NODC: { short_name: 'NOAA NODC', long_name: 'NOAA National Oceanographic Data Center' },
|
19
21
|
NSIDC: { short_name: 'NSIDC', long_name: 'National Snow and Ice Data Center' },
|
@@ -0,0 +1,95 @@
|
|
1
|
+
require 'search_solr_tools'
|
2
|
+
|
3
|
+
module SearchSolrTools
|
4
|
+
module Selectors
|
5
|
+
DATA_ONE = {
|
6
|
+
authoritative_id: {
|
7
|
+
xpaths: ['.//str[@name="id"]'],
|
8
|
+
multivalue: false
|
9
|
+
},
|
10
|
+
title: {
|
11
|
+
xpaths: ['.//str[@name="title"]'],
|
12
|
+
multivalue: false
|
13
|
+
},
|
14
|
+
summary: {
|
15
|
+
xpaths: ['.//str[@name="abstract"]'],
|
16
|
+
multivalue: false
|
17
|
+
},
|
18
|
+
data_centers: {
|
19
|
+
xpaths: [''],
|
20
|
+
default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:DATA_ONE][:long_name]],
|
21
|
+
multivalue: false
|
22
|
+
},
|
23
|
+
authors: {
|
24
|
+
xpaths: ['.//str[@name="author"]'],
|
25
|
+
multivalue: false
|
26
|
+
},
|
27
|
+
keywords: {
|
28
|
+
xpaths: ['.//arr[@name="keywords"]/str'],
|
29
|
+
multivalue: true
|
30
|
+
},
|
31
|
+
last_revision_date: {
|
32
|
+
xpaths: ['.//date[@name="updateDate"]'],
|
33
|
+
default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
|
34
|
+
multivalue: false,
|
35
|
+
format: Helpers::SolrFormat::DATE
|
36
|
+
},
|
37
|
+
dataset_url: {
|
38
|
+
xpaths: ['.//str[@name="dataUrl"]'],
|
39
|
+
default_values: [''],
|
40
|
+
multivalue: false
|
41
|
+
},
|
42
|
+
spatial_coverages: {
|
43
|
+
xpaths: ['.'],
|
44
|
+
multivalue: false,
|
45
|
+
format: Helpers::DataOneFormat.method(:spatial_display)
|
46
|
+
},
|
47
|
+
spatial: {
|
48
|
+
xpaths: ['.'],
|
49
|
+
multivalue: false,
|
50
|
+
format: Helpers::DataOneFormat.method(:spatial_index)
|
51
|
+
},
|
52
|
+
spatial_area: {
|
53
|
+
xpaths: ['.'],
|
54
|
+
multivalue: false,
|
55
|
+
format: Helpers::DataOneFormat.method(:spatial_area)
|
56
|
+
},
|
57
|
+
temporal_coverages: {
|
58
|
+
xpaths: ['.'],
|
59
|
+
multivalue: false,
|
60
|
+
format: Helpers::DataOneFormat.method(:temporal_coverage)
|
61
|
+
},
|
62
|
+
temporal_duration: {
|
63
|
+
xpaths: ['.'],
|
64
|
+
multivalue: false,
|
65
|
+
format: Helpers::DataOneFormat.method(:temporal_duration)
|
66
|
+
},
|
67
|
+
temporal: {
|
68
|
+
xpaths: ['.'],
|
69
|
+
multivalue: false,
|
70
|
+
format: Helpers::DataOneFormat.method(:temporal_index_string)
|
71
|
+
},
|
72
|
+
source: {
|
73
|
+
xpaths: [''],
|
74
|
+
default_values: ['ADE'],
|
75
|
+
multivalue: false
|
76
|
+
},
|
77
|
+
facet_data_center: {
|
78
|
+
xpaths: [''],
|
79
|
+
default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:DATA_ONE][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:DATA_ONE][:short_name]}"],
|
80
|
+
multivalue: false
|
81
|
+
},
|
82
|
+
facet_spatial_scope: {
|
83
|
+
xpaths: ['.'],
|
84
|
+
multivalue: false,
|
85
|
+
format: Helpers::DataOneFormat.method(:facet_spatial_scope)
|
86
|
+
},
|
87
|
+
facet_temporal_duration: {
|
88
|
+
xpaths: ['.'],
|
89
|
+
default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
|
90
|
+
format: Helpers::DataOneFormat.method(:facet_temporal_duration),
|
91
|
+
multivalue: false
|
92
|
+
}
|
93
|
+
}
|
94
|
+
end
|
95
|
+
end
|
@@ -43,7 +43,7 @@ module SearchSolrTools
|
|
43
43
|
dataset_url: {
|
44
44
|
xpaths: ['.//gmd:fileIdentifier/gco:CharacterString'],
|
45
45
|
multivalue: false,
|
46
|
-
format: Helpers::IsoToSolrFormat::
|
46
|
+
format: Helpers::IsoToSolrFormat::ICES_DATASET_URL
|
47
47
|
},
|
48
48
|
spatial_coverages: {
|
49
49
|
xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
|
@@ -0,0 +1,89 @@
|
|
1
|
+
require 'search_solr_tools'
|
2
|
+
|
3
|
+
module SearchSolrTools
|
4
|
+
module Selectors
|
5
|
+
NCDC_PALEO = {
|
6
|
+
title: {
|
7
|
+
xpaths: ['/rdf:RDF/rdf:Description/dc:title'],
|
8
|
+
multivalue: false
|
9
|
+
},
|
10
|
+
summary: {
|
11
|
+
xpaths: ['/rdf:RDF/rdf:Description/dc:description'],
|
12
|
+
multivalue: false
|
13
|
+
},
|
14
|
+
data_centers: {
|
15
|
+
xpaths: [''],
|
16
|
+
default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:NCDC_PALEO][:long_name]],
|
17
|
+
multivalue: false
|
18
|
+
},
|
19
|
+
authors: {
|
20
|
+
xpaths: ['/rdf:RDF/rdf:Description/dc:creator'],
|
21
|
+
multivalue: true,
|
22
|
+
format: Helpers::NcdcPaleoFormat.method(:author)
|
23
|
+
},
|
24
|
+
keywords: {
|
25
|
+
xpaths: ['/rdf:RDF/rdf:Description/dc:subject'],
|
26
|
+
multivalue: true
|
27
|
+
},
|
28
|
+
last_revision_date: {
|
29
|
+
xpaths: ['/rdf:RDF/rdf:Description/dc:date'],
|
30
|
+
default_values: [''], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
|
31
|
+
multivalue: false,
|
32
|
+
format: Helpers::SolrFormat::DATE
|
33
|
+
},
|
34
|
+
spatial_coverages: {
|
35
|
+
xpaths: ['/rdf:RDF/rdf:Description/ows:WGS84BoundingBox'],
|
36
|
+
multivalue: true,
|
37
|
+
format: Helpers::NcdcPaleoFormat.method(:spatial_display_str)
|
38
|
+
},
|
39
|
+
spatial: {
|
40
|
+
xpaths: ['/rdf:RDF/rdf:Description/ows:WGS84BoundingBox'],
|
41
|
+
multivalue: true,
|
42
|
+
format: Helpers::NcdcPaleoFormat.method(:spatial_index_str)
|
43
|
+
},
|
44
|
+
spatial_area: {
|
45
|
+
xpaths: ['/rdf:RDF/rdf:Description/ows:WGS84BoundingBox'],
|
46
|
+
multivalue: false,
|
47
|
+
reduce: Helpers::NcdcPaleoFormat.method(:get_max_spatial_area),
|
48
|
+
format: Helpers::NcdcPaleoFormat.method(:spatial_area_str)
|
49
|
+
},
|
50
|
+
temporal: {
|
51
|
+
xpaths: ['/rdf:RDF/rdf:Description/dc:coverage'],
|
52
|
+
multivalue: true,
|
53
|
+
format: Helpers::NcdcPaleoFormat.method(:temporal_index_str)
|
54
|
+
},
|
55
|
+
temporal_coverages: {
|
56
|
+
xpaths: ['/rdf:RDF/rdf:Description/dc:coverage'],
|
57
|
+
multivalue: true,
|
58
|
+
format: Helpers::NcdcPaleoFormat.method(:temporal_display_str)
|
59
|
+
},
|
60
|
+
temporal_duration: {
|
61
|
+
xpaths: ['/rdf:RDF/rdf:Description/dc:coverage'],
|
62
|
+
multivalue: false,
|
63
|
+
reduce: Helpers::SolrFormat::REDUCE_TEMPORAL_DURATION,
|
64
|
+
format: Helpers::NcdcPaleoFormat.method(:get_temporal_duration)
|
65
|
+
},
|
66
|
+
source: {
|
67
|
+
xpaths: [''],
|
68
|
+
default_values: ['ADE'],
|
69
|
+
multivalue: false
|
70
|
+
},
|
71
|
+
facet_data_center: {
|
72
|
+
xpaths: [''],
|
73
|
+
default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:NCDC_PALEO][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:NCDC_PALEO][:short_name]}"],
|
74
|
+
multivalue: false
|
75
|
+
},
|
76
|
+
facet_spatial_scope: {
|
77
|
+
xpaths: ['/rdf:RDF/rdf:Description/ows:WGS84BoundingBox'],
|
78
|
+
multivalue: true,
|
79
|
+
format: Helpers::NcdcPaleoFormat.method(:get_spatial_scope_facet)
|
80
|
+
},
|
81
|
+
facet_temporal_duration: {
|
82
|
+
xpaths: ['/rdf:RDF/rdf:Description/dc:coverage'],
|
83
|
+
default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
|
84
|
+
format: Helpers::NcdcPaleoFormat.method(:get_temporal_duration_facet),
|
85
|
+
multivalue: true
|
86
|
+
}
|
87
|
+
}
|
88
|
+
end
|
89
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: search_solr_tools
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Chris Chalstrom
|
@@ -12,7 +12,7 @@ authors:
|
|
12
12
|
autorequire:
|
13
13
|
bindir: bin
|
14
14
|
cert_chain: []
|
15
|
-
date: 2015-09-
|
15
|
+
date: 2015-09-24 00:00:00.000000000 Z
|
16
16
|
dependencies:
|
17
17
|
- !ruby/object:Gem::Dependency
|
18
18
|
name: iso8601
|
@@ -279,9 +279,11 @@ files:
|
|
279
279
|
- lib/search_solr_tools/harvesters/base.rb
|
280
280
|
- lib/search_solr_tools/harvesters/bcodmo.rb
|
281
281
|
- lib/search_solr_tools/harvesters/cisl.rb
|
282
|
+
- lib/search_solr_tools/harvesters/data_one.rb
|
282
283
|
- lib/search_solr_tools/harvesters/echo.rb
|
283
284
|
- lib/search_solr_tools/harvesters/eol.rb
|
284
285
|
- lib/search_solr_tools/harvesters/ices.rb
|
286
|
+
- lib/search_solr_tools/harvesters/ncdc_paleo.rb
|
285
287
|
- lib/search_solr_tools/harvesters/nmi.rb
|
286
288
|
- lib/search_solr_tools/harvesters/nodc.rb
|
287
289
|
- lib/search_solr_tools/harvesters/nsidc_auto_suggest.rb
|
@@ -294,10 +296,12 @@ files:
|
|
294
296
|
- lib/search_solr_tools/harvesters/usgs.rb
|
295
297
|
- lib/search_solr_tools/helpers/bounding_box_util.rb
|
296
298
|
- lib/search_solr_tools/helpers/csw_iso_query_builder.rb
|
299
|
+
- lib/search_solr_tools/helpers/data_one_format.rb
|
297
300
|
- lib/search_solr_tools/helpers/facet_configuration.rb
|
298
301
|
- lib/search_solr_tools/helpers/iso_namespaces.rb
|
299
302
|
- lib/search_solr_tools/helpers/iso_to_solr.rb
|
300
303
|
- lib/search_solr_tools/helpers/iso_to_solr_format.rb
|
304
|
+
- lib/search_solr_tools/helpers/ncdc_paleo_format.rb
|
301
305
|
- lib/search_solr_tools/helpers/query_builder.rb
|
302
306
|
- lib/search_solr_tools/helpers/r2r_format.rb
|
303
307
|
- lib/search_solr_tools/helpers/selectors.rb
|
@@ -307,8 +311,10 @@ files:
|
|
307
311
|
- lib/search_solr_tools/helpers/translate_temporal_coverage.rb
|
308
312
|
- lib/search_solr_tools/helpers/usgs_format.rb
|
309
313
|
- lib/search_solr_tools/selectors/cisl.rb
|
314
|
+
- lib/search_solr_tools/selectors/data_one.rb
|
310
315
|
- lib/search_solr_tools/selectors/echo_iso.rb
|
311
316
|
- lib/search_solr_tools/selectors/ices_iso.rb
|
317
|
+
- lib/search_solr_tools/selectors/ncdc_paleo.rb
|
312
318
|
- lib/search_solr_tools/selectors/nmi.rb
|
313
319
|
- lib/search_solr_tools/selectors/nodc_iso.rb
|
314
320
|
- lib/search_solr_tools/selectors/pdc_iso.rb
|
@@ -341,7 +347,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
341
347
|
version: '0'
|
342
348
|
requirements: []
|
343
349
|
rubyforge_project:
|
344
|
-
rubygems_version: 2.4.
|
350
|
+
rubygems_version: 2.4.8
|
345
351
|
signing_key:
|
346
352
|
specification_version: 4
|
347
353
|
summary: Tools to harvest and manage various scientific dataset feeds in a Solr instance.
|