search_solr_tools 3.2.1 → 3.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -1
- data/bin/search_solr_tools +4 -0
- data/lib/search_solr_tools/config/environments.yaml +3 -1
- data/lib/search_solr_tools/harvesters/data_one.rb +47 -0
- data/lib/search_solr_tools/harvesters/ices.rb +2 -1
- data/lib/search_solr_tools/harvesters/ncdc_paleo.rb +60 -0
- data/lib/search_solr_tools/harvesters/nodc.rb +2 -1
- data/lib/search_solr_tools/harvesters/usgs.rb +2 -2
- data/lib/search_solr_tools/helpers/csw_iso_query_builder.rb +1 -2
- data/lib/search_solr_tools/helpers/data_one_format.rb +74 -0
- data/lib/search_solr_tools/helpers/iso_namespaces.rb +1 -0
- data/lib/search_solr_tools/helpers/iso_to_solr_format.rb +2 -3
- data/lib/search_solr_tools/helpers/ncdc_paleo_format.rb +61 -0
- data/lib/search_solr_tools/helpers/selectors.rb +12 -10
- data/lib/search_solr_tools/helpers/solr_format.rb +2 -0
- data/lib/search_solr_tools/selectors/data_one.rb +95 -0
- data/lib/search_solr_tools/selectors/ices_iso.rb +1 -1
- data/lib/search_solr_tools/selectors/ncdc_paleo.rb +89 -0
- data/lib/search_solr_tools/version.rb +1 -1
- metadata +9 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: da4660a6d133afe9408e57daf6f0cce758f5fea9
|
4
|
+
data.tar.gz: e6a60e1711fa6e7c8ee115321f8270207d2faaeb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d9893af295f321132cb507d59c1d29d39981c2303dc7784fc62a20f05646d95b57536bb15d772810e3529631f71a56ffb8d5f190f5d40371405e7cc3b538ee18
|
7
|
+
data.tar.gz: 1cc1addbfd33a0aca1561e68007681ed240dfc28e783cff394cacaadc60b1046261c933dbea6faf9b304eb837c718b2dc809f87e199dfe0652812fd52aae73a9
|
data/CHANGELOG.md
CHANGED
@@ -1,4 +1,15 @@
|
|
1
|
-
## v3.
|
1
|
+
## v3.3.0
|
2
|
+
|
3
|
+
New Features
|
4
|
+
|
5
|
+
- Add harvest support for
|
6
|
+
[NOAA Paleoclimatology Data Center (NOAA Paleo)](https://www.ncdc.noaa.gov/data-access/paleoclimatology-data/datasets).
|
7
|
+
|
8
|
+
- Add harvest support for
|
9
|
+
[Data Observation Network for Earth (Data ONE)](https://www.dataone.org/).
|
10
|
+
[Pivotal 77763710](https://www.pivotaltracker.com/story/show/77763710)
|
11
|
+
|
12
|
+
## v3.2.1 (2015-09-23)
|
2
13
|
|
3
14
|
Bugfixes
|
4
15
|
|
data/bin/search_solr_tools
CHANGED
@@ -63,13 +63,16 @@ class SolrHarvestCLI < Thor
|
|
63
63
|
end
|
64
64
|
|
65
65
|
no_tasks do
|
66
|
+
# rubocop: disable MethodLength
|
66
67
|
def harvester_map
|
67
68
|
{
|
68
69
|
'bco_dmo' => SearchSolrTools::Harvesters::BcoDmo,
|
69
70
|
'cisl' => SearchSolrTools::Harvesters::Cisl,
|
71
|
+
'data_one' => SearchSolrTools::Harvesters::DataOne,
|
70
72
|
'echo' => SearchSolrTools::Harvesters::Echo,
|
71
73
|
'eol' => SearchSolrTools::Harvesters::Eol,
|
72
74
|
'ices' => SearchSolrTools::Harvesters::Ices,
|
75
|
+
'ncdc_paleo' => SearchSolrTools::Harvesters::NcdcPaleo,
|
73
76
|
'nmi' => SearchSolrTools::Harvesters::Nmi,
|
74
77
|
'nodc' => SearchSolrTools::Harvesters::Nodc,
|
75
78
|
'r2r' => SearchSolrTools::Harvesters::R2R,
|
@@ -82,6 +85,7 @@ class SolrHarvestCLI < Thor
|
|
82
85
|
'ade_auto_suggest' => SearchSolrTools::Harvesters::AdeAutoSuggest
|
83
86
|
}
|
84
87
|
end
|
88
|
+
# rubocop: enable MethodLength
|
85
89
|
|
86
90
|
def get_harvester_class(data_center_name)
|
87
91
|
name = data_center_name.downcase.to_s
|
@@ -5,8 +5,10 @@
|
|
5
5
|
:port: 8983
|
6
6
|
:bcodmo_url: http://www.bco-dmo.org/nsidc/arctic-deployments.json
|
7
7
|
:cisl_url: https://www.aoncadis.org/oai/repository
|
8
|
+
:data_one_url: https://cn.dataone.org/cn/v1/query/solr/select?q=northBoundCoord:%5B45.0%20TO%2090.0%5D
|
8
9
|
:echo_url: https://api.echo.nasa.gov/catalog-rest/echo_catalog/datasets.echo10
|
9
10
|
:ices_url: http://geo.ices.dk/geonetwork/srv/en/csw
|
11
|
+
:ncdc_paleo_url: http://gis.ncdc.noaa.gov/gptpaleo/csw
|
10
12
|
:nmi_url: http://access.met.no/metamod/oai
|
11
13
|
:nodc_url: http://data.nodc.noaa.gov/geoportal/csw
|
12
14
|
:pdc_url: http://www.polardata.ca/oai/provider
|
@@ -38,7 +40,7 @@
|
|
38
40
|
:oai_url: http://liquid.colorado.edu:11580/api/dataset/2/oai?verb=ListRecords&metadata_prefix=iso
|
39
41
|
|
40
42
|
:dev:
|
41
|
-
host: dev.search-solr.apps.int.nsidc.org
|
43
|
+
:host: dev.search-solr.apps.int.nsidc.org
|
42
44
|
:nsidc_dataset_metadata_url: http://integration.nsidc.org/api/dataset/metadata/
|
43
45
|
:nsidc_oai_identifiers_url: http://integration.nsidc.org/api/dataset/metadata/oai?verb=ListIdentifiers&metadata_prefix=iso
|
44
46
|
:oai_url: http://liquid.colorado.edu:11580/api/dataset/2/oai?verb=ListRecords&metadata_prefix=iso
|
@@ -0,0 +1,47 @@
|
|
1
|
+
module SearchSolrTools
|
2
|
+
module Harvesters
|
3
|
+
class DataOne < Base
|
4
|
+
def initialize(env = 'development', die_on_failure = false)
|
5
|
+
super
|
6
|
+
@page_size = 250
|
7
|
+
@translator = Helpers::IsoToSolr.new :data_one
|
8
|
+
end
|
9
|
+
|
10
|
+
def harvest_and_delete
|
11
|
+
puts "Running harvest of dataONE catalog from #{metadata_url}"
|
12
|
+
super(method(:harvest_data_one_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:DATA_ONE][:long_name]}\"")
|
13
|
+
end
|
14
|
+
|
15
|
+
def harvest_data_one_into_solr
|
16
|
+
start = 0
|
17
|
+
while (entries = get_results_from_data_one(start)) && (entries.length > 0)
|
18
|
+
begin
|
19
|
+
insert_solr_docs(get_docs_with_translated_entries_from_data_one(entries))
|
20
|
+
rescue => e
|
21
|
+
puts "ERROR: #{e}\n\n"
|
22
|
+
raise e if @die_on_failure
|
23
|
+
end
|
24
|
+
start += @page_size
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def get_results_from_data_one(start)
|
29
|
+
get_results(build_request(start, @page_size), './response/result/doc')
|
30
|
+
end
|
31
|
+
|
32
|
+
def metadata_url
|
33
|
+
SolrEnvironments[@environment][:data_one_url]
|
34
|
+
end
|
35
|
+
|
36
|
+
def get_docs_with_translated_entries_from_data_one(entries)
|
37
|
+
entries.map do |e|
|
38
|
+
create_new_solr_add_doc_with_child(@translator.translate(e).root)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def build_request(start = 0, max_records = 100)
|
43
|
+
"#{metadata_url}&start=#{start}&rows=#{max_records}"
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -47,7 +47,8 @@ module SearchSolrTools
|
|
47
47
|
'resultType' => resultType,
|
48
48
|
'maxRecords' => maxRecords,
|
49
49
|
'startPosition' => startPosition,
|
50
|
-
'constraintLanguage' => 'CQL_TEXT'
|
50
|
+
'constraintLanguage' => 'CQL_TEXT',
|
51
|
+
'outputSchema' => 'http://www.isotc211.org/2005/gmd'
|
51
52
|
)
|
52
53
|
end
|
53
54
|
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
module SearchSolrTools
|
2
|
+
module Harvesters
|
3
|
+
# Harvests data from NODC PALEO and inserts it into Solr after it has been translated
|
4
|
+
class NcdcPaleo < Base
|
5
|
+
def initialize(env = 'development', die_on_failure = false)
|
6
|
+
super env, die_on_failure
|
7
|
+
@page_size = 50
|
8
|
+
@translator = Helpers::IsoToSolr.new :ncdc_paleo
|
9
|
+
end
|
10
|
+
|
11
|
+
def harvest_and_delete
|
12
|
+
puts "Running harvest of NCDC Paleo catalog from #{ncdc_paleo_url}"
|
13
|
+
super(method(:harvest_ncdc_paleo_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:NCDC_PALEO][:long_name]}\"")
|
14
|
+
end
|
15
|
+
|
16
|
+
def harvest_ncdc_paleo_into_solr
|
17
|
+
start_index = 1
|
18
|
+
while (entries = get_results_from_ncdc_paleo_url(start_index)) && (entries.length > 0)
|
19
|
+
begin
|
20
|
+
insert_solr_docs get_docs_with_translated_entries_from_ncdc_paleo(entries)
|
21
|
+
rescue => e
|
22
|
+
puts "ERROR: #{e}"
|
23
|
+
raise e if @die_on_failure
|
24
|
+
end
|
25
|
+
start_index += @page_size
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def ncdc_paleo_url
|
30
|
+
SolrEnvironments[@environment][:ncdc_paleo_url]
|
31
|
+
end
|
32
|
+
|
33
|
+
def get_results_from_ncdc_paleo_url(start_index)
|
34
|
+
get_results build_csw_request('results', @page_size, start_index), '//csw:Record'
|
35
|
+
end
|
36
|
+
|
37
|
+
def get_docs_with_translated_entries_from_ncdc_paleo(entries)
|
38
|
+
auth_ids = entries.map { |e| e.xpath("./dc:identifier[@scheme='urn:x-esri:specification:ServiceType:ArcIMS:Metadata:DocID']").text }
|
39
|
+
|
40
|
+
auth_ids.map do |record|
|
41
|
+
result_xml = get_results("http://gis.ncdc.noaa.gov/gptpaleo/csw?getxml=#{record}",
|
42
|
+
'/rdf:RDF/rdf:Description').first
|
43
|
+
solr_doc = create_new_solr_add_doc_with_child(@translator.translate(result_xml).root)
|
44
|
+
insert_node = solr_doc.at_xpath('//doc')
|
45
|
+
insert_node.add_child("<field name='authoritative_id'>#{record}</field>")
|
46
|
+
insert_node.add_child("<field name='dataset_url'>http://gis.ncdc.noaa.gov/gptpaleo/catalog/search/resource/details.page?uuid=#{record}")
|
47
|
+
solr_doc.root
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def build_csw_request(resultType = 'results', maxRecords = '1000', startPosition = '1')
|
52
|
+
Helpers::CswIsoQueryBuilder.get_query_string(ncdc_paleo_url,
|
53
|
+
'resultType' => resultType,
|
54
|
+
'maxRecords' => maxRecords,
|
55
|
+
'startPosition' => startPosition
|
56
|
+
)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -47,7 +47,8 @@ module SearchSolrTools
|
|
47
47
|
'resultType' => resultType,
|
48
48
|
'maxRecords' => maxRecords,
|
49
49
|
'startPosition' => startPosition,
|
50
|
-
'constraint' => bbox_constraint
|
50
|
+
'constraint' => bbox_constraint,
|
51
|
+
'outputSchema' => 'http://www.isotc211.org/2005/gmd'
|
51
52
|
)
|
52
53
|
end
|
53
54
|
|
@@ -48,8 +48,8 @@ module SearchSolrTools
|
|
48
48
|
'maxRecords' => maxRecords,
|
49
49
|
'startPosition' => startPosition,
|
50
50
|
'TypeNames' => '',
|
51
|
-
'constraint' => bbox_constraint
|
52
|
-
|
51
|
+
'constraint' => bbox_constraint,
|
52
|
+
'outputSchema' => 'http://www.isotc211.org/2005/gmd'
|
53
53
|
)
|
54
54
|
end
|
55
55
|
|
@@ -13,8 +13,7 @@ module SearchSolrTools
|
|
13
13
|
'resultType' => 'results',
|
14
14
|
'outputFormat' => 'application/xml',
|
15
15
|
'maxRecords' => '25',
|
16
|
-
'startPosition' => '1'
|
17
|
-
'outputSchema' => 'http://www.isotc211.org/2005/gmd'
|
16
|
+
'startPosition' => '1'
|
18
17
|
}
|
19
18
|
|
20
19
|
def self.get_query_string(url, query_params = {})
|
@@ -0,0 +1,74 @@
|
|
1
|
+
require_relative './iso_namespaces'
|
2
|
+
require_relative './iso_to_solr_format'
|
3
|
+
require_relative './solr_format'
|
4
|
+
|
5
|
+
module SearchSolrTools
|
6
|
+
module Helpers
|
7
|
+
class DataOneFormat < IsoToSolrFormat
|
8
|
+
class << self
|
9
|
+
def date_range(node)
|
10
|
+
{
|
11
|
+
start: SolrFormat.date_str(node.xpath('.//date[@name="beginDate"]').text.strip),
|
12
|
+
end: SolrFormat.date_str(node.xpath('.//date[@name="endDate"]').text.strip)
|
13
|
+
}
|
14
|
+
end
|
15
|
+
|
16
|
+
def bounding_box(node)
|
17
|
+
{
|
18
|
+
north: node.xpath('.//float[@name="northBoundCoord"]').text.strip,
|
19
|
+
south: node.xpath('.//float[@name="southBoundCoord"]').text.strip,
|
20
|
+
east: node.xpath('.//float[@name="eastBoundCoord"]').text.strip,
|
21
|
+
west: node.xpath('.//float[@name="westBoundCoord"]').text.strip
|
22
|
+
}
|
23
|
+
end
|
24
|
+
|
25
|
+
def spatial_display(node)
|
26
|
+
box = bounding_box(node)
|
27
|
+
|
28
|
+
[box[:south], box[:west], box[:north], box[:east]].join(' ')
|
29
|
+
end
|
30
|
+
|
31
|
+
def spatial_index(node)
|
32
|
+
box = bounding_box(node)
|
33
|
+
|
34
|
+
if box[:west] == box[:east] && box[:south] == box[:north]
|
35
|
+
[box[:west], box[:south]]
|
36
|
+
else
|
37
|
+
[box[:west], box[:south], box[:east], box[:north]]
|
38
|
+
end.join(' ')
|
39
|
+
end
|
40
|
+
|
41
|
+
def spatial_area(node)
|
42
|
+
box = bounding_box(node)
|
43
|
+
|
44
|
+
box[:north].to_f - box[:south].to_f
|
45
|
+
end
|
46
|
+
|
47
|
+
def temporal_coverage(node)
|
48
|
+
SolrFormat.temporal_display_str(date_range(node))
|
49
|
+
end
|
50
|
+
|
51
|
+
def temporal_duration(node)
|
52
|
+
dr = date_range(node)
|
53
|
+
end_time = dr[:end].to_s.empty? ? Time.now : Time.parse(dr[:end])
|
54
|
+
SolrFormat.get_temporal_duration(Time.parse(dr[:start]), end_time) unless dr[:start].to_s.empty?
|
55
|
+
end
|
56
|
+
|
57
|
+
def temporal_index_string(node)
|
58
|
+
dr = date_range(node)
|
59
|
+
SolrFormat.temporal_index_str(dr)
|
60
|
+
end
|
61
|
+
|
62
|
+
def facet_spatial_scope(node)
|
63
|
+
box = bounding_box(node)
|
64
|
+
SolrFormat.get_spatial_scope_facet_with_bounding_box(box)
|
65
|
+
end
|
66
|
+
|
67
|
+
def facet_temporal_duration(node)
|
68
|
+
duration = temporal_duration(node)
|
69
|
+
SolrFormat.get_temporal_duration_facet(duration)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
@@ -21,6 +21,7 @@ module SearchSolrTools
|
|
21
21
|
'gss' => 'http://www.isotc211.org/2005/gss',
|
22
22
|
'gts' => 'http://www.isotc211.org/2005/gts',
|
23
23
|
'oai' => 'http://www.openarchives.org/OAI/2.0/',
|
24
|
+
'rdf' => 'http://www.w3.org/TR/REC-rdf-syntax',
|
24
25
|
'srv' => 'http://www.isotc211.org/2005/srv',
|
25
26
|
'xlink' => 'http://www.w3.org/1999/xlink',
|
26
27
|
'xsi' => 'http://www.w3.org/2001/XMLSchema-instance'
|
@@ -77,9 +77,8 @@ module SearchSolrTools
|
|
77
77
|
|
78
78
|
def self.get_temporal_duration(temporal_node)
|
79
79
|
dr = date_range(temporal_node)
|
80
|
-
dr[:end].to_s.empty? ?
|
81
|
-
|
82
|
-
duration
|
80
|
+
end_time = dr[:end].to_s.empty? ? Time.now : Time.parse(dr[:end])
|
81
|
+
SolrFormat.get_temporal_duration(Time.parse(dr[:start]), end_time) unless dr[:start].to_s.empty?
|
83
82
|
end
|
84
83
|
|
85
84
|
def self.get_temporal_duration_facet(temporal_node)
|
@@ -0,0 +1,61 @@
|
|
1
|
+
require 'date'
|
2
|
+
|
3
|
+
require_relative './iso_namespaces'
|
4
|
+
require_relative './solr_format'
|
5
|
+
require_relative './iso_to_solr_format'
|
6
|
+
|
7
|
+
module SearchSolrTools
|
8
|
+
module Helpers
|
9
|
+
class NcdcPaleoFormat < IsoToSolrFormat
|
10
|
+
def self.bounding_box(node)
|
11
|
+
east, north = node.xpath('./ows:UpperCorner').text.split
|
12
|
+
west, south = node.xpath('./ows:LowerCorner').text.split
|
13
|
+
{ north: north, south: south, east: east, west: west }
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.date_range(node, _formatted = false)
|
17
|
+
if node.text.include?('START YEAR')
|
18
|
+
if node.text.include?('AD')
|
19
|
+
format_ad_time(node.text)
|
20
|
+
elsif node.text.include?('yr BP')
|
21
|
+
format_cal_yr_bp_time(node.text)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.format_ad_time(node_text)
|
27
|
+
match = node_text.match(/START YEAR:(?<start>[^*]*)AD\s*\* END YEAR:(?<end>[^*]*)AD/)
|
28
|
+
{
|
29
|
+
start: DateTime.strptime(match[:start].strip, '%Y'),
|
30
|
+
end: DateTime.strptime(match[:end].strip, '%Y')
|
31
|
+
}
|
32
|
+
end
|
33
|
+
|
34
|
+
def self.format_cal_yr_bp_time(node_text)
|
35
|
+
zero_year = 1950
|
36
|
+
match = node_text.match(/START YEAR:(?<start>[^*]*)... yr BP\s*\* END YEAR:(?<end>[^*]*)... yr BP/)
|
37
|
+
{
|
38
|
+
start: DateTime.strptime((-(match[:start].strip.to_i) - zero_year).to_s, '%Y'),
|
39
|
+
end: DateTime.strptime((-(match[:end].strip.to_i) - zero_year).to_s, '%Y')
|
40
|
+
}
|
41
|
+
end
|
42
|
+
|
43
|
+
def self.temporal_index_str(node)
|
44
|
+
range = date_range(node)
|
45
|
+
SolrFormat.temporal_index_str(start: range[:start].to_s, end: range[:end].to_s) unless range.nil?
|
46
|
+
end
|
47
|
+
|
48
|
+
def self.get_temporal_duration(node)
|
49
|
+
range = date_range(node)
|
50
|
+
return if range.empty?
|
51
|
+
(range[:start] - range[:end]).to_i.abs
|
52
|
+
end
|
53
|
+
|
54
|
+
def self.author(node)
|
55
|
+
return node if node == ''
|
56
|
+
return if node.text.include? ';'
|
57
|
+
node.text
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -6,16 +6,18 @@ module SearchSolrTools
|
|
6
6
|
# This hash grabs all the selector files inside the selectors directory,
|
7
7
|
# to add a new source we need to create a selector file and add it to this hash.
|
8
8
|
SELECTORS = {
|
9
|
-
cisl:
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
9
|
+
cisl: Selectors::CISL,
|
10
|
+
data_one: Selectors::DATA_ONE,
|
11
|
+
echo: Selectors::ECHO,
|
12
|
+
ices: Selectors::ICES,
|
13
|
+
nmi: Selectors::NMI,
|
14
|
+
ncdc_paleo: Selectors::NCDC_PALEO,
|
15
|
+
nodc: Selectors::NODC,
|
16
|
+
pdc: Selectors::PDC,
|
17
|
+
r2r: Selectors::R2R,
|
18
|
+
rda: Selectors::RDA,
|
19
|
+
tdar: Selectors::TDAR,
|
20
|
+
usgs: Selectors::USGS
|
19
21
|
}
|
20
22
|
end
|
21
23
|
end
|
@@ -11,9 +11,11 @@ module SearchSolrTools
|
|
11
11
|
DATA_CENTER_NAMES = {
|
12
12
|
BCODMO: { short_name: 'BCO-DMO', long_name: 'Biological and Chemical Oceanography Data Management Office' },
|
13
13
|
CISL: { short_name: 'ACADIS Gateway', long_name: 'Advanced Cooperative Arctic Data and Information Service' },
|
14
|
+
DATA_ONE: { short_name: 'DataONE', long_name: 'DataONE' },
|
14
15
|
ECHO: { short_name: 'NASA ECHO', long_name: 'NASA Earth Observing System (EOS) Clearing House (ECHO)' },
|
15
16
|
EOL: { short_name: 'UCAR NCAR EOL', long_name: 'UCAR NCAR - Earth Observing Laboratory' },
|
16
17
|
ICES: { short_name: 'ICES', long_name: 'International Council for the Exploration of the Sea' },
|
18
|
+
NCDC_PALEO: { short_name: 'NOAA WDS Paleo', long_name: 'NOAA’s National Centers for Environmental Information, World Data Service for Paleoclimatology' },
|
17
19
|
NMI: { short_name: 'Met.no', long_name: 'Norwegian Meteorological Institute' },
|
18
20
|
NODC: { short_name: 'NOAA NODC', long_name: 'NOAA National Oceanographic Data Center' },
|
19
21
|
NSIDC: { short_name: 'NSIDC', long_name: 'National Snow and Ice Data Center' },
|
@@ -0,0 +1,95 @@
|
|
1
|
+
require 'search_solr_tools'
|
2
|
+
|
3
|
+
module SearchSolrTools
|
4
|
+
module Selectors
|
5
|
+
DATA_ONE = {
|
6
|
+
authoritative_id: {
|
7
|
+
xpaths: ['.//str[@name="id"]'],
|
8
|
+
multivalue: false
|
9
|
+
},
|
10
|
+
title: {
|
11
|
+
xpaths: ['.//str[@name="title"]'],
|
12
|
+
multivalue: false
|
13
|
+
},
|
14
|
+
summary: {
|
15
|
+
xpaths: ['.//str[@name="abstract"]'],
|
16
|
+
multivalue: false
|
17
|
+
},
|
18
|
+
data_centers: {
|
19
|
+
xpaths: [''],
|
20
|
+
default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:DATA_ONE][:long_name]],
|
21
|
+
multivalue: false
|
22
|
+
},
|
23
|
+
authors: {
|
24
|
+
xpaths: ['.//str[@name="author"]'],
|
25
|
+
multivalue: false
|
26
|
+
},
|
27
|
+
keywords: {
|
28
|
+
xpaths: ['.//arr[@name="keywords"]/str'],
|
29
|
+
multivalue: true
|
30
|
+
},
|
31
|
+
last_revision_date: {
|
32
|
+
xpaths: ['.//date[@name="updateDate"]'],
|
33
|
+
default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
|
34
|
+
multivalue: false,
|
35
|
+
format: Helpers::SolrFormat::DATE
|
36
|
+
},
|
37
|
+
dataset_url: {
|
38
|
+
xpaths: ['.//str[@name="dataUrl"]'],
|
39
|
+
default_values: [''],
|
40
|
+
multivalue: false
|
41
|
+
},
|
42
|
+
spatial_coverages: {
|
43
|
+
xpaths: ['.'],
|
44
|
+
multivalue: false,
|
45
|
+
format: Helpers::DataOneFormat.method(:spatial_display)
|
46
|
+
},
|
47
|
+
spatial: {
|
48
|
+
xpaths: ['.'],
|
49
|
+
multivalue: false,
|
50
|
+
format: Helpers::DataOneFormat.method(:spatial_index)
|
51
|
+
},
|
52
|
+
spatial_area: {
|
53
|
+
xpaths: ['.'],
|
54
|
+
multivalue: false,
|
55
|
+
format: Helpers::DataOneFormat.method(:spatial_area)
|
56
|
+
},
|
57
|
+
temporal_coverages: {
|
58
|
+
xpaths: ['.'],
|
59
|
+
multivalue: false,
|
60
|
+
format: Helpers::DataOneFormat.method(:temporal_coverage)
|
61
|
+
},
|
62
|
+
temporal_duration: {
|
63
|
+
xpaths: ['.'],
|
64
|
+
multivalue: false,
|
65
|
+
format: Helpers::DataOneFormat.method(:temporal_duration)
|
66
|
+
},
|
67
|
+
temporal: {
|
68
|
+
xpaths: ['.'],
|
69
|
+
multivalue: false,
|
70
|
+
format: Helpers::DataOneFormat.method(:temporal_index_string)
|
71
|
+
},
|
72
|
+
source: {
|
73
|
+
xpaths: [''],
|
74
|
+
default_values: ['ADE'],
|
75
|
+
multivalue: false
|
76
|
+
},
|
77
|
+
facet_data_center: {
|
78
|
+
xpaths: [''],
|
79
|
+
default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:DATA_ONE][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:DATA_ONE][:short_name]}"],
|
80
|
+
multivalue: false
|
81
|
+
},
|
82
|
+
facet_spatial_scope: {
|
83
|
+
xpaths: ['.'],
|
84
|
+
multivalue: false,
|
85
|
+
format: Helpers::DataOneFormat.method(:facet_spatial_scope)
|
86
|
+
},
|
87
|
+
facet_temporal_duration: {
|
88
|
+
xpaths: ['.'],
|
89
|
+
default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
|
90
|
+
format: Helpers::DataOneFormat.method(:facet_temporal_duration),
|
91
|
+
multivalue: false
|
92
|
+
}
|
93
|
+
}
|
94
|
+
end
|
95
|
+
end
|
@@ -43,7 +43,7 @@ module SearchSolrTools
|
|
43
43
|
dataset_url: {
|
44
44
|
xpaths: ['.//gmd:fileIdentifier/gco:CharacterString'],
|
45
45
|
multivalue: false,
|
46
|
-
format: Helpers::IsoToSolrFormat::
|
46
|
+
format: Helpers::IsoToSolrFormat::ICES_DATASET_URL
|
47
47
|
},
|
48
48
|
spatial_coverages: {
|
49
49
|
xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
|
@@ -0,0 +1,89 @@
|
|
1
|
+
require 'search_solr_tools'
|
2
|
+
|
3
|
+
module SearchSolrTools
|
4
|
+
module Selectors
|
5
|
+
NCDC_PALEO = {
|
6
|
+
title: {
|
7
|
+
xpaths: ['/rdf:RDF/rdf:Description/dc:title'],
|
8
|
+
multivalue: false
|
9
|
+
},
|
10
|
+
summary: {
|
11
|
+
xpaths: ['/rdf:RDF/rdf:Description/dc:description'],
|
12
|
+
multivalue: false
|
13
|
+
},
|
14
|
+
data_centers: {
|
15
|
+
xpaths: [''],
|
16
|
+
default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:NCDC_PALEO][:long_name]],
|
17
|
+
multivalue: false
|
18
|
+
},
|
19
|
+
authors: {
|
20
|
+
xpaths: ['/rdf:RDF/rdf:Description/dc:creator'],
|
21
|
+
multivalue: true,
|
22
|
+
format: Helpers::NcdcPaleoFormat.method(:author)
|
23
|
+
},
|
24
|
+
keywords: {
|
25
|
+
xpaths: ['/rdf:RDF/rdf:Description/dc:subject'],
|
26
|
+
multivalue: true
|
27
|
+
},
|
28
|
+
last_revision_date: {
|
29
|
+
xpaths: ['/rdf:RDF/rdf:Description/dc:date'],
|
30
|
+
default_values: [''], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
|
31
|
+
multivalue: false,
|
32
|
+
format: Helpers::SolrFormat::DATE
|
33
|
+
},
|
34
|
+
spatial_coverages: {
|
35
|
+
xpaths: ['/rdf:RDF/rdf:Description/ows:WGS84BoundingBox'],
|
36
|
+
multivalue: true,
|
37
|
+
format: Helpers::NcdcPaleoFormat.method(:spatial_display_str)
|
38
|
+
},
|
39
|
+
spatial: {
|
40
|
+
xpaths: ['/rdf:RDF/rdf:Description/ows:WGS84BoundingBox'],
|
41
|
+
multivalue: true,
|
42
|
+
format: Helpers::NcdcPaleoFormat.method(:spatial_index_str)
|
43
|
+
},
|
44
|
+
spatial_area: {
|
45
|
+
xpaths: ['/rdf:RDF/rdf:Description/ows:WGS84BoundingBox'],
|
46
|
+
multivalue: false,
|
47
|
+
reduce: Helpers::NcdcPaleoFormat.method(:get_max_spatial_area),
|
48
|
+
format: Helpers::NcdcPaleoFormat.method(:spatial_area_str)
|
49
|
+
},
|
50
|
+
temporal: {
|
51
|
+
xpaths: ['/rdf:RDF/rdf:Description/dc:coverage'],
|
52
|
+
multivalue: true,
|
53
|
+
format: Helpers::NcdcPaleoFormat.method(:temporal_index_str)
|
54
|
+
},
|
55
|
+
temporal_coverages: {
|
56
|
+
xpaths: ['/rdf:RDF/rdf:Description/dc:coverage'],
|
57
|
+
multivalue: true,
|
58
|
+
format: Helpers::NcdcPaleoFormat.method(:temporal_display_str)
|
59
|
+
},
|
60
|
+
temporal_duration: {
|
61
|
+
xpaths: ['/rdf:RDF/rdf:Description/dc:coverage'],
|
62
|
+
multivalue: false,
|
63
|
+
reduce: Helpers::SolrFormat::REDUCE_TEMPORAL_DURATION,
|
64
|
+
format: Helpers::NcdcPaleoFormat.method(:get_temporal_duration)
|
65
|
+
},
|
66
|
+
source: {
|
67
|
+
xpaths: [''],
|
68
|
+
default_values: ['ADE'],
|
69
|
+
multivalue: false
|
70
|
+
},
|
71
|
+
facet_data_center: {
|
72
|
+
xpaths: [''],
|
73
|
+
default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:NCDC_PALEO][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:NCDC_PALEO][:short_name]}"],
|
74
|
+
multivalue: false
|
75
|
+
},
|
76
|
+
facet_spatial_scope: {
|
77
|
+
xpaths: ['/rdf:RDF/rdf:Description/ows:WGS84BoundingBox'],
|
78
|
+
multivalue: true,
|
79
|
+
format: Helpers::NcdcPaleoFormat.method(:get_spatial_scope_facet)
|
80
|
+
},
|
81
|
+
facet_temporal_duration: {
|
82
|
+
xpaths: ['/rdf:RDF/rdf:Description/dc:coverage'],
|
83
|
+
default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
|
84
|
+
format: Helpers::NcdcPaleoFormat.method(:get_temporal_duration_facet),
|
85
|
+
multivalue: true
|
86
|
+
}
|
87
|
+
}
|
88
|
+
end
|
89
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: search_solr_tools
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Chris Chalstrom
|
@@ -12,7 +12,7 @@ authors:
|
|
12
12
|
autorequire:
|
13
13
|
bindir: bin
|
14
14
|
cert_chain: []
|
15
|
-
date: 2015-09-
|
15
|
+
date: 2015-09-24 00:00:00.000000000 Z
|
16
16
|
dependencies:
|
17
17
|
- !ruby/object:Gem::Dependency
|
18
18
|
name: iso8601
|
@@ -279,9 +279,11 @@ files:
|
|
279
279
|
- lib/search_solr_tools/harvesters/base.rb
|
280
280
|
- lib/search_solr_tools/harvesters/bcodmo.rb
|
281
281
|
- lib/search_solr_tools/harvesters/cisl.rb
|
282
|
+
- lib/search_solr_tools/harvesters/data_one.rb
|
282
283
|
- lib/search_solr_tools/harvesters/echo.rb
|
283
284
|
- lib/search_solr_tools/harvesters/eol.rb
|
284
285
|
- lib/search_solr_tools/harvesters/ices.rb
|
286
|
+
- lib/search_solr_tools/harvesters/ncdc_paleo.rb
|
285
287
|
- lib/search_solr_tools/harvesters/nmi.rb
|
286
288
|
- lib/search_solr_tools/harvesters/nodc.rb
|
287
289
|
- lib/search_solr_tools/harvesters/nsidc_auto_suggest.rb
|
@@ -294,10 +296,12 @@ files:
|
|
294
296
|
- lib/search_solr_tools/harvesters/usgs.rb
|
295
297
|
- lib/search_solr_tools/helpers/bounding_box_util.rb
|
296
298
|
- lib/search_solr_tools/helpers/csw_iso_query_builder.rb
|
299
|
+
- lib/search_solr_tools/helpers/data_one_format.rb
|
297
300
|
- lib/search_solr_tools/helpers/facet_configuration.rb
|
298
301
|
- lib/search_solr_tools/helpers/iso_namespaces.rb
|
299
302
|
- lib/search_solr_tools/helpers/iso_to_solr.rb
|
300
303
|
- lib/search_solr_tools/helpers/iso_to_solr_format.rb
|
304
|
+
- lib/search_solr_tools/helpers/ncdc_paleo_format.rb
|
301
305
|
- lib/search_solr_tools/helpers/query_builder.rb
|
302
306
|
- lib/search_solr_tools/helpers/r2r_format.rb
|
303
307
|
- lib/search_solr_tools/helpers/selectors.rb
|
@@ -307,8 +311,10 @@ files:
|
|
307
311
|
- lib/search_solr_tools/helpers/translate_temporal_coverage.rb
|
308
312
|
- lib/search_solr_tools/helpers/usgs_format.rb
|
309
313
|
- lib/search_solr_tools/selectors/cisl.rb
|
314
|
+
- lib/search_solr_tools/selectors/data_one.rb
|
310
315
|
- lib/search_solr_tools/selectors/echo_iso.rb
|
311
316
|
- lib/search_solr_tools/selectors/ices_iso.rb
|
317
|
+
- lib/search_solr_tools/selectors/ncdc_paleo.rb
|
312
318
|
- lib/search_solr_tools/selectors/nmi.rb
|
313
319
|
- lib/search_solr_tools/selectors/nodc_iso.rb
|
314
320
|
- lib/search_solr_tools/selectors/pdc_iso.rb
|
@@ -341,7 +347,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
341
347
|
version: '0'
|
342
348
|
requirements: []
|
343
349
|
rubyforge_project:
|
344
|
-
rubygems_version: 2.4.
|
350
|
+
rubygems_version: 2.4.8
|
345
351
|
signing_key:
|
346
352
|
specification_version: 4
|
347
353
|
summary: Tools to harvest and manage various scientific dataset feeds in a Solr instance.
|