search_solr_tools 3.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +88 -0
- data/COPYING +674 -0
- data/README.md +203 -0
- data/bin/search_solr_tools +87 -0
- data/lib/search_solr_tools.rb +8 -0
- data/lib/search_solr_tools/config/environments.rb +12 -0
- data/lib/search_solr_tools/config/environments.yaml +73 -0
- data/lib/search_solr_tools/harvesters/ade_auto_suggest.rb +43 -0
- data/lib/search_solr_tools/harvesters/auto_suggest.rb +61 -0
- data/lib/search_solr_tools/harvesters/base.rb +183 -0
- data/lib/search_solr_tools/harvesters/bcodmo.rb +55 -0
- data/lib/search_solr_tools/harvesters/cisl.rb +63 -0
- data/lib/search_solr_tools/harvesters/echo.rb +50 -0
- data/lib/search_solr_tools/harvesters/eol.rb +53 -0
- data/lib/search_solr_tools/harvesters/ices.rb +55 -0
- data/lib/search_solr_tools/harvesters/nmi.rb +32 -0
- data/lib/search_solr_tools/harvesters/nodc.rb +72 -0
- data/lib/search_solr_tools/harvesters/nsidc_auto_suggest.rb +33 -0
- data/lib/search_solr_tools/harvesters/nsidc_json.rb +60 -0
- data/lib/search_solr_tools/harvesters/oai.rb +59 -0
- data/lib/search_solr_tools/harvesters/pdc.rb +38 -0
- data/lib/search_solr_tools/harvesters/rda.rb +33 -0
- data/lib/search_solr_tools/harvesters/tdar.rb +57 -0
- data/lib/search_solr_tools/harvesters/usgs.rb +74 -0
- data/lib/search_solr_tools/helpers/bounding_box_util.rb +37 -0
- data/lib/search_solr_tools/helpers/csw_iso_query_builder.rb +30 -0
- data/lib/search_solr_tools/helpers/facet_configuration.rb +19 -0
- data/lib/search_solr_tools/helpers/iso_namespaces.rb +30 -0
- data/lib/search_solr_tools/helpers/iso_to_solr.rb +96 -0
- data/lib/search_solr_tools/helpers/iso_to_solr_format.rb +198 -0
- data/lib/search_solr_tools/helpers/query_builder.rb +13 -0
- data/lib/search_solr_tools/helpers/selectors.rb +20 -0
- data/lib/search_solr_tools/helpers/solr_format.rb +260 -0
- data/lib/search_solr_tools/helpers/tdar_format.rb +70 -0
- data/lib/search_solr_tools/helpers/translate_spatial_coverage.rb +77 -0
- data/lib/search_solr_tools/helpers/translate_temporal_coverage.rb +40 -0
- data/lib/search_solr_tools/helpers/usgs_format.rb +50 -0
- data/lib/search_solr_tools/selectors/cisl.rb +112 -0
- data/lib/search_solr_tools/selectors/echo_iso.rb +111 -0
- data/lib/search_solr_tools/selectors/ices_iso.rb +107 -0
- data/lib/search_solr_tools/selectors/nmi.rb +106 -0
- data/lib/search_solr_tools/selectors/nodc_iso.rb +107 -0
- data/lib/search_solr_tools/selectors/pdc_iso.rb +108 -0
- data/lib/search_solr_tools/selectors/rda.rb +106 -0
- data/lib/search_solr_tools/selectors/tdar_opensearch.rb +89 -0
- data/lib/search_solr_tools/selectors/usgs_iso.rb +105 -0
- data/lib/search_solr_tools/translators/bcodmo_json.rb +69 -0
- data/lib/search_solr_tools/translators/eol_to_solr.rb +78 -0
- data/lib/search_solr_tools/translators/nsidc_json.rb +190 -0
- data/lib/search_solr_tools/version.rb +3 -0
- data/search_solr_tools.gemspec +45 -0
- metadata +345 -0
@@ -0,0 +1,107 @@
|
|
1
|
+
require 'search_solr_tools'
|
2
|
+
|
3
|
+
module SearchSolrTools
|
4
|
+
module Selectors
|
5
|
+
# The hash contains keys that should map to the fields in the solr schema,
|
6
|
+
# the keys are called selectors and are in charge of selecting the nodes
|
7
|
+
# from the ISO document, applying the default value if none of the xpaths
|
8
|
+
# resolved to a value and formatting the field. xpaths and multivalue are
|
9
|
+
# required, default_value and format are optional
|
10
|
+
NODC = {
|
11
|
+
authoritative_id: {
|
12
|
+
xpaths: ['.//gmd:fileIdentifier/gco:CharacterString'],
|
13
|
+
multivalue: false
|
14
|
+
},
|
15
|
+
title: {
|
16
|
+
xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:title/gco:CharacterString'],
|
17
|
+
multivalue: false
|
18
|
+
},
|
19
|
+
summary: {
|
20
|
+
xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:abstract/gco:CharacterString'],
|
21
|
+
multivalue: false
|
22
|
+
},
|
23
|
+
data_centers: {
|
24
|
+
xpaths: [''],
|
25
|
+
default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:NODC][:long_name]],
|
26
|
+
multivalue: false
|
27
|
+
},
|
28
|
+
authors: {
|
29
|
+
xpaths: [".//gmd:CI_ResponsibleParty[./gmd:role/gmd:CI_RoleCode[@codeListValue='principalInvestigator']]/gmd:individualName/gco:CharacterString"],
|
30
|
+
multivalue: true
|
31
|
+
},
|
32
|
+
keywords: {
|
33
|
+
xpaths: ['.//gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gco:CharacterString',
|
34
|
+
'.//gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gmx:Anchor'],
|
35
|
+
multivalue: true
|
36
|
+
},
|
37
|
+
last_revision_date: {
|
38
|
+
xpaths: ['.//gmd:dateStamp/gco:Date'],
|
39
|
+
default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
|
40
|
+
multivalue: false,
|
41
|
+
format: Helpers::SolrFormat::DATE
|
42
|
+
},
|
43
|
+
dataset_url: {
|
44
|
+
xpaths: ['.//gmd:MD_DigitalTransferOptions/gmd:onLine/gmd:CI_OnlineResource[contains(./gmd:protocol/gco:CharacterString/text(),"ftp")]/gmd:linkage/gmd:URL',
|
45
|
+
'.//gmd:MD_DigitalTransferOptions/gmd:onLine/gmd:CI_OnlineResource[contains(./gmd:protocol/gco:CharacterString/text(),"FTP")]/gmd:linkage/gmd:URL'],
|
46
|
+
multivalue: false
|
47
|
+
},
|
48
|
+
spatial_coverages: {
|
49
|
+
xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
|
50
|
+
multivalue: true,
|
51
|
+
format: Helpers::IsoToSolrFormat::SPATIAL_DISPLAY
|
52
|
+
},
|
53
|
+
spatial: {
|
54
|
+
xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
|
55
|
+
multivalue: true,
|
56
|
+
format: Helpers::IsoToSolrFormat::SPATIAL_INDEX
|
57
|
+
},
|
58
|
+
spatial_area: {
|
59
|
+
xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
|
60
|
+
multivalue: false,
|
61
|
+
reduce: Helpers::IsoToSolrFormat::MAX_SPATIAL_AREA,
|
62
|
+
format: Helpers::IsoToSolrFormat::SPATIAL_AREA
|
63
|
+
},
|
64
|
+
temporal_coverages: {
|
65
|
+
xpaths: ['.//gmd:EX_TemporalExtent'],
|
66
|
+
multivalue: true,
|
67
|
+
format: Helpers::IsoToSolrFormat::TEMPORAL_DISPLAY_STRING_FORMATTED
|
68
|
+
},
|
69
|
+
temporal_duration: {
|
70
|
+
xpaths: ['.//gmd:EX_TemporalExtent'],
|
71
|
+
multivalue: false,
|
72
|
+
reduce: Helpers::SolrFormat::REDUCE_TEMPORAL_DURATION,
|
73
|
+
format: Helpers::IsoToSolrFormat::TEMPORAL_DURATION
|
74
|
+
},
|
75
|
+
temporal: {
|
76
|
+
xpaths: ['.//gmd:EX_TemporalExtent'],
|
77
|
+
multivalue: true,
|
78
|
+
format: Helpers::IsoToSolrFormat::TEMPORAL_INDEX_STRING
|
79
|
+
},
|
80
|
+
sensors: {
|
81
|
+
xpaths: ['.//gmi:acquisitionInformation/gmi:MI_AcquisitionInformation/gmi:instrument/gmi:MI_Instrument/gmi:citation/gmd:CI_Citation/gmd:title/gco:CharacterString'],
|
82
|
+
multivalue: true
|
83
|
+
},
|
84
|
+
source: {
|
85
|
+
xpaths: [''],
|
86
|
+
default_values: ['ADE'],
|
87
|
+
multivalue: false
|
88
|
+
},
|
89
|
+
facet_data_center: {
|
90
|
+
xpaths: [''],
|
91
|
+
default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:NODC][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:NODC][:short_name]}"],
|
92
|
+
multivalue: false
|
93
|
+
},
|
94
|
+
facet_spatial_scope: {
|
95
|
+
xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
|
96
|
+
multivalue: true,
|
97
|
+
format: Helpers::IsoToSolrFormat::FACET_SPATIAL_SCOPE
|
98
|
+
},
|
99
|
+
facet_temporal_duration: {
|
100
|
+
xpaths: ['.//gmd:EX_TemporalExtent'],
|
101
|
+
default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
|
102
|
+
format: Helpers::IsoToSolrFormat::FACET_TEMPORAL_DURATION,
|
103
|
+
multivalue: true
|
104
|
+
}
|
105
|
+
}
|
106
|
+
end
|
107
|
+
end
|
@@ -0,0 +1,108 @@
|
|
1
|
+
require 'search_solr_tools'
|
2
|
+
|
3
|
+
module SearchSolrTools
|
4
|
+
module Selectors
|
5
|
+
# The hash contains keys that should map to the fields in the solr schema,
|
6
|
+
# the keys are called selectors and are in charge of selecting the nodes
|
7
|
+
# from the ISO document, applying the default value if none of the xpaths
|
8
|
+
# resolved to a value and formatting the field. xpaths and multivalue are
|
9
|
+
# required, default_value, format, and reduce are optional.
|
10
|
+
#
|
11
|
+
# reduce takes the formatted result of multiple nodes and produces a single
|
12
|
+
# result. This is for fields that are not multivalued, but their value
|
13
|
+
# should consider information from all the nodes (for example, storing
|
14
|
+
# only the maximum duration from multiple temporal coverage fields, taking
|
15
|
+
# the sum of multiple spatial areas)
|
16
|
+
PDC = {
|
17
|
+
authoritative_id: {
|
18
|
+
xpaths: ['.//oai:header/oai:identifier'],
|
19
|
+
multivalue: false
|
20
|
+
},
|
21
|
+
title: {
|
22
|
+
xpaths: ['.//gmd:citation//gmd:title/gco:CharacterString'],
|
23
|
+
multivalue: false
|
24
|
+
},
|
25
|
+
summary: {
|
26
|
+
xpaths: ['.//gmd:abstract/gco:CharacterString'],
|
27
|
+
multivalue: false
|
28
|
+
},
|
29
|
+
data_centers: {
|
30
|
+
xpaths: [''],
|
31
|
+
default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:PDC][:long_name]],
|
32
|
+
multivalue: false
|
33
|
+
},
|
34
|
+
authors: {
|
35
|
+
xpaths: ['.//gmd:identificationInfo//gmd:citedResponsibleParty//gmd:individualName/gco:CharacterString'],
|
36
|
+
multivalue: true
|
37
|
+
},
|
38
|
+
keywords: {
|
39
|
+
xpaths: ['.//gmd:descriptiveKeywords//gmd:keyword/gco:CharacterString'],
|
40
|
+
multivalue: true
|
41
|
+
},
|
42
|
+
last_revision_date: {
|
43
|
+
xpaths: ['.//oai:header/oai:datestamp'],
|
44
|
+
default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
|
45
|
+
multivalue: false,
|
46
|
+
format: Helpers::SolrFormat::DATE
|
47
|
+
},
|
48
|
+
dataset_url: {
|
49
|
+
xpaths: ['.//gmd:dataSetURI/gco:CharacterString'],
|
50
|
+
multivalue: false,
|
51
|
+
format: Helpers::SolrFormat::HTTP_URL_FORMAT
|
52
|
+
},
|
53
|
+
spatial_coverages: {
|
54
|
+
xpaths: ['.//gmd:extent//gmd:EX_GeographicBoundingBox'],
|
55
|
+
multivalue: true,
|
56
|
+
format: Helpers::IsoToSolrFormat::SPATIAL_DISPLAY
|
57
|
+
},
|
58
|
+
spatial: {
|
59
|
+
xpaths: ['.//gmd:extent//gmd:EX_GeographicBoundingBox'],
|
60
|
+
multivalue: true,
|
61
|
+
format: Helpers::IsoToSolrFormat::SPATIAL_INDEX
|
62
|
+
},
|
63
|
+
spatial_area: {
|
64
|
+
xpaths: ['.//gmd:extent//gmd:EX_GeographicBoundingBox'],
|
65
|
+
multivalue: false,
|
66
|
+
reduce: Helpers::IsoToSolrFormat::MAX_SPATIAL_AREA,
|
67
|
+
format: Helpers::IsoToSolrFormat::SPATIAL_AREA
|
68
|
+
},
|
69
|
+
temporal: {
|
70
|
+
xpaths: ['.//gmd:EX_TemporalExtent/gmd:extent'],
|
71
|
+
multivalue: true,
|
72
|
+
format: Helpers::IsoToSolrFormat::TEMPORAL_INDEX_STRING
|
73
|
+
},
|
74
|
+
temporal_coverages: {
|
75
|
+
xpaths: ['.//gmd:EX_TemporalExtent/gmd:extent'],
|
76
|
+
multivalue: true,
|
77
|
+
format: Helpers::IsoToSolrFormat::TEMPORAL_DISPLAY_STRING
|
78
|
+
},
|
79
|
+
temporal_duration: {
|
80
|
+
xpaths: ['.//gmd:EX_TemporalExtent/gmd:extent'],
|
81
|
+
multivalue: false,
|
82
|
+
reduce: Helpers::SolrFormat::REDUCE_TEMPORAL_DURATION,
|
83
|
+
format: Helpers::IsoToSolrFormat::TEMPORAL_DURATION
|
84
|
+
},
|
85
|
+
source: {
|
86
|
+
xpaths: [''],
|
87
|
+
default_values: ['ADE'],
|
88
|
+
multivalue: false
|
89
|
+
},
|
90
|
+
facet_data_center: {
|
91
|
+
xpaths: [''],
|
92
|
+
default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:PDC][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:PDC][:short_name]}"],
|
93
|
+
multivalue: false
|
94
|
+
},
|
95
|
+
facet_spatial_scope: {
|
96
|
+
xpaths: ['.//gmd:extent//gmd:EX_GeographicBoundingBox'],
|
97
|
+
multivalue: true,
|
98
|
+
format: Helpers::IsoToSolrFormat::FACET_SPATIAL_SCOPE
|
99
|
+
},
|
100
|
+
facet_temporal_duration: {
|
101
|
+
xpaths: ['.//gmd:EX_TemporalExtent/gmd:extent'],
|
102
|
+
default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
|
103
|
+
format: Helpers::IsoToSolrFormat::FACET_TEMPORAL_DURATION,
|
104
|
+
multivalue: true
|
105
|
+
}
|
106
|
+
}
|
107
|
+
end
|
108
|
+
end
|
@@ -0,0 +1,106 @@
|
|
1
|
+
require 'search_solr_tools'
|
2
|
+
|
3
|
+
module SearchSolrTools
|
4
|
+
module Selectors
|
5
|
+
# The hash contains keys that should map to the fields in the solr schema,
|
6
|
+
# the keys are called selectors and are in charge of selecting the nodes
|
7
|
+
# from the ISO document, applying the default value if none of the xpaths
|
8
|
+
# resolved to a value and formatting the field. xpaths and multivalue are
|
9
|
+
# required, default_value and format are optional.
|
10
|
+
RDA = {
|
11
|
+
authoritative_id: {
|
12
|
+
xpaths: ['.//oai:header/oai:identifier'],
|
13
|
+
multivalue: false
|
14
|
+
},
|
15
|
+
title: {
|
16
|
+
xpaths: ['.//dif:Entry_Title'],
|
17
|
+
multivalue: false
|
18
|
+
},
|
19
|
+
summary: {
|
20
|
+
xpaths: ['.//dif:Summary'],
|
21
|
+
multivalue: false
|
22
|
+
},
|
23
|
+
data_centers: {
|
24
|
+
xpaths: [''],
|
25
|
+
default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:RDA][:long_name]],
|
26
|
+
multivalue: false
|
27
|
+
},
|
28
|
+
authors: {
|
29
|
+
xpaths: [''],
|
30
|
+
multivalue: true
|
31
|
+
},
|
32
|
+
keywords: {
|
33
|
+
xpaths: [
|
34
|
+
'.//dif:Parameters/dif:Category',
|
35
|
+
'.//dif:Parameters/dif:Topic',
|
36
|
+
'.//dif:Parameters/dif:Term',
|
37
|
+
'.//dif:Parameters/dif:Variable_Level_1'
|
38
|
+
].reverse,
|
39
|
+
multivalue: true
|
40
|
+
},
|
41
|
+
last_revision_date: {
|
42
|
+
xpaths: ['.//dif:Last_DIF_Revision_Date'],
|
43
|
+
default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
|
44
|
+
multivalue: false,
|
45
|
+
format: Helpers::SolrFormat::DATE
|
46
|
+
},
|
47
|
+
dataset_url: {
|
48
|
+
xpaths: ['.//dif:Related_URL/dif:URL'],
|
49
|
+
multivalue: false
|
50
|
+
},
|
51
|
+
spatial_coverages: {
|
52
|
+
xpaths: ['.//dif:Spatial_Coverage'],
|
53
|
+
multivalue: true,
|
54
|
+
format: Helpers::IsoToSolrFormat::SPATIAL_DISPLAY
|
55
|
+
},
|
56
|
+
spatial: {
|
57
|
+
xpaths: ['.//dif:Spatial_Coverage'],
|
58
|
+
multivalue: true,
|
59
|
+
format: Helpers::IsoToSolrFormat::SPATIAL_INDEX
|
60
|
+
},
|
61
|
+
spatial_area: {
|
62
|
+
xpaths: ['.//dif:Spatial_Coverage'],
|
63
|
+
multivalue: false,
|
64
|
+
reduce: Helpers::IsoToSolrFormat::MAX_SPATIAL_AREA,
|
65
|
+
format: Helpers::IsoToSolrFormat::SPATIAL_AREA
|
66
|
+
},
|
67
|
+
temporal: {
|
68
|
+
xpaths: ['.//dif:Temporal_Coverage'],
|
69
|
+
multivalue: true,
|
70
|
+
format: Helpers::IsoToSolrFormat::TEMPORAL_INDEX_STRING
|
71
|
+
},
|
72
|
+
temporal_coverages: {
|
73
|
+
xpaths: ['.//dif:Temporal_Coverage'],
|
74
|
+
multivalue: true,
|
75
|
+
format: Helpers::IsoToSolrFormat::TEMPORAL_DISPLAY_STRING
|
76
|
+
},
|
77
|
+
temporal_duration: {
|
78
|
+
xpaths: ['.//dif:Temporal_Coverage'],
|
79
|
+
multivalue: false,
|
80
|
+
reduce: Helpers::SolrFormat::REDUCE_TEMPORAL_DURATION,
|
81
|
+
format: Helpers::IsoToSolrFormat::TEMPORAL_DURATION
|
82
|
+
},
|
83
|
+
source: {
|
84
|
+
xpaths: [''],
|
85
|
+
default_values: ['ADE'],
|
86
|
+
multivalue: false
|
87
|
+
},
|
88
|
+
facet_data_center: {
|
89
|
+
xpaths: [''],
|
90
|
+
default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:RDA][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:RDA][:short_name]}"],
|
91
|
+
multivalue: false
|
92
|
+
},
|
93
|
+
facet_spatial_scope: {
|
94
|
+
xpaths: ['.//dif:Spatial_Coverage'],
|
95
|
+
multivalue: true,
|
96
|
+
format: Helpers::IsoToSolrFormat::FACET_SPATIAL_SCOPE
|
97
|
+
},
|
98
|
+
facet_temporal_duration: {
|
99
|
+
xpaths: ['.//dif:Temporal_Coverage'],
|
100
|
+
default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
|
101
|
+
format: Helpers::IsoToSolrFormat::FACET_TEMPORAL_DURATION,
|
102
|
+
multivalue: true
|
103
|
+
}
|
104
|
+
}
|
105
|
+
end
|
106
|
+
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
require 'search_solr_tools'
|
2
|
+
|
3
|
+
module SearchSolrTools
|
4
|
+
module Selectors
|
5
|
+
# The hash contains keys that should map to the fields in the solr schema,
|
6
|
+
# the keys are called selectors and are in charge of selecting the nodes
|
7
|
+
# from the ISO document, applying the default value if none of the xpaths
|
8
|
+
# resolved to a value and formatting the field. xpaths and multivalue are
|
9
|
+
# required, default_value and format are optional
|
10
|
+
TDAR = {
|
11
|
+
authoritative_id: {
|
12
|
+
xpaths: ['.//atom:link/@href'],
|
13
|
+
multivalue: false,
|
14
|
+
format: proc do |node|
|
15
|
+
'TDAR-' << node.text.split('/')[4] || ''
|
16
|
+
end
|
17
|
+
},
|
18
|
+
title: {
|
19
|
+
xpaths: ['.//atom:title'],
|
20
|
+
multivalue: false
|
21
|
+
},
|
22
|
+
summary: {
|
23
|
+
xpaths: ['.//atom:summary'],
|
24
|
+
multivalue: false
|
25
|
+
},
|
26
|
+
data_centers: {
|
27
|
+
xpaths: [''],
|
28
|
+
default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:TDAR][:long_name]],
|
29
|
+
multivalue: false
|
30
|
+
},
|
31
|
+
authors: {
|
32
|
+
xpaths: ['.//atom:author/atom:name'],
|
33
|
+
multivalue: true
|
34
|
+
},
|
35
|
+
keywords: {
|
36
|
+
xpaths: [''],
|
37
|
+
multivalue: true,
|
38
|
+
format: Helpers::IsoToSolrFormat::KEYWORDS
|
39
|
+
},
|
40
|
+
last_revision_date: {
|
41
|
+
xpaths: ['.//atom:updated'],
|
42
|
+
default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
|
43
|
+
multivalue: false,
|
44
|
+
format: Helpers::SolrFormat::DATE
|
45
|
+
},
|
46
|
+
dataset_url: {
|
47
|
+
xpaths: ['.//atom:link/@href'],
|
48
|
+
multivalue: false
|
49
|
+
},
|
50
|
+
spatial_coverages: {
|
51
|
+
xpaths: ['.//georss:box'],
|
52
|
+
multivalue: true,
|
53
|
+
format: Helpers::TdarFormat::SPATIAL_DISPLAY
|
54
|
+
},
|
55
|
+
spatial: {
|
56
|
+
xpaths: ['.//georss:box'],
|
57
|
+
multivalue: true,
|
58
|
+
format: Helpers::TdarFormat::SPATIAL_INDEX
|
59
|
+
},
|
60
|
+
spatial_area: {
|
61
|
+
xpaths: ['.//georss:box'],
|
62
|
+
multivalue: false,
|
63
|
+
reduce: Helpers::TdarFormat::MAX_SPATIAL_AREA,
|
64
|
+
format: Helpers::TdarFormat::SPATIAL_AREA
|
65
|
+
},
|
66
|
+
source: {
|
67
|
+
xpaths: [''],
|
68
|
+
default_values: ['ADE'],
|
69
|
+
multivalue: false
|
70
|
+
},
|
71
|
+
facet_data_center: {
|
72
|
+
xpaths: [''],
|
73
|
+
default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:TDAR][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:TDAR][:short_name]}"],
|
74
|
+
multivalue: false
|
75
|
+
},
|
76
|
+
facet_spatial_scope: {
|
77
|
+
xpaths: ['.//georss:box'],
|
78
|
+
multivalue: true,
|
79
|
+
format: Helpers::TdarFormat::FACET_SPATIAL_SCOPE
|
80
|
+
},
|
81
|
+
facet_temporal_duration: {
|
82
|
+
xpaths: [''],
|
83
|
+
default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
|
84
|
+
format: Helpers::IsoToSolrFormat::FACET_TEMPORAL_DURATION,
|
85
|
+
multivalue: true
|
86
|
+
}
|
87
|
+
}
|
88
|
+
end
|
89
|
+
end
|
@@ -0,0 +1,105 @@
|
|
1
|
+
require 'search_solr_tools'
|
2
|
+
|
3
|
+
module SearchSolrTools
|
4
|
+
module Selectors
|
5
|
+
# The hash contains keys that should map to the fields in the solr schema,
|
6
|
+
# the keys are called selectors and are in charge of selecting the nodes
|
7
|
+
# from the ISO document, applying the default value if none of the xpaths
|
8
|
+
# resolved to a value and formatting the field. xpaths and multivalue are
|
9
|
+
# required, default_value and format are optional
|
10
|
+
USGS = {
|
11
|
+
authoritative_id: {
|
12
|
+
xpaths: ['.//gmd:fileIdentifier/gco:CharacterString'],
|
13
|
+
multivalue: false
|
14
|
+
},
|
15
|
+
title: {
|
16
|
+
xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:title/gco:CharacterString'],
|
17
|
+
multivalue: false
|
18
|
+
},
|
19
|
+
summary: {
|
20
|
+
xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:abstract/gco:CharacterString'],
|
21
|
+
multivalue: false
|
22
|
+
},
|
23
|
+
data_centers: {
|
24
|
+
xpaths: [''],
|
25
|
+
default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:USGS][:long_name]],
|
26
|
+
multivalue: false
|
27
|
+
},
|
28
|
+
authors: {
|
29
|
+
xpaths: [".//gmd:contact/gmd:CI_ResponsibleParty[./gmd:role/gmd:CI_RoleCode[@codeListValue='originator']]/gmd:organisationName/gco:CharacterString"],
|
30
|
+
multivalue: true
|
31
|
+
},
|
32
|
+
keywords: {
|
33
|
+
xpaths: ['.//gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gco:CharacterString'],
|
34
|
+
multivalue: true
|
35
|
+
},
|
36
|
+
last_revision_date: {
|
37
|
+
xpaths: ['.//gmd:dateStamp/gco:DateTime'],
|
38
|
+
default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
|
39
|
+
multivalue: false,
|
40
|
+
format: Helpers::SolrFormat::DATE
|
41
|
+
},
|
42
|
+
dataset_url: {
|
43
|
+
xpaths: ['.//gmd:transferOptions/gmd:MD_DigitalTransferOptions/gmd:onLine/gmd:CI_OnlineResource[contains(./gmd:name/gco:CharacterString/text(),"Summary")]/gmd:linkage/gmd:URL'],
|
44
|
+
multivalue: false
|
45
|
+
},
|
46
|
+
spatial_coverages: {
|
47
|
+
xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
|
48
|
+
multivalue: true,
|
49
|
+
format: Helpers::IsoToSolrFormat::SPATIAL_DISPLAY
|
50
|
+
},
|
51
|
+
spatial: {
|
52
|
+
xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
|
53
|
+
multivalue: true,
|
54
|
+
format: Helpers::IsoToSolrFormat::SPATIAL_INDEX
|
55
|
+
},
|
56
|
+
spatial_area: {
|
57
|
+
xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
|
58
|
+
multivalue: false,
|
59
|
+
reduce: Helpers::IsoToSolrFormat::MAX_SPATIAL_AREA,
|
60
|
+
format: Helpers::IsoToSolrFormat::SPATIAL_AREA
|
61
|
+
},
|
62
|
+
temporal: {
|
63
|
+
xpaths: [".//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:date/gmd:CI_Date[./gmd:dateType/gmd:CI_DateTypeCode[@codeListValue='Time Period']]/gmd:date"],
|
64
|
+
multivalue: true,
|
65
|
+
format: Helpers::UsgsFormat::TEMPORAL_INDEX_STRING
|
66
|
+
},
|
67
|
+
temporal_coverages: {
|
68
|
+
xpaths: [".//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:date/gmd:CI_Date[./gmd:dateType/gmd:CI_DateTypeCode[@codeListValue='Time Period']]/gmd:date"],
|
69
|
+
multivalue: true,
|
70
|
+
format: Helpers::UsgsFormat::TEMPORAL_DISPLAY_STRING
|
71
|
+
},
|
72
|
+
temporal_duration: {
|
73
|
+
xpaths: [".//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:date/gmd:CI_Date[./gmd:dateType/gmd:CI_DateTypeCode[@codeListValue='Time Period']]/gmd:date"],
|
74
|
+
multivalue: false,
|
75
|
+
reduce: Helpers::SolrFormat::REDUCE_TEMPORAL_DURATION,
|
76
|
+
format: Helpers::UsgsFormat::TEMPORAL_DURATION
|
77
|
+
},
|
78
|
+
sensors: {
|
79
|
+
xpaths: [''],
|
80
|
+
multivalue: true
|
81
|
+
},
|
82
|
+
source: {
|
83
|
+
xpaths: [''],
|
84
|
+
default_values: ['ADE'],
|
85
|
+
multivalue: false
|
86
|
+
},
|
87
|
+
facet_data_center: {
|
88
|
+
xpaths: [''],
|
89
|
+
default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:USGS][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:USGS][:short_name]}"],
|
90
|
+
multivalue: false
|
91
|
+
},
|
92
|
+
facet_spatial_scope: {
|
93
|
+
xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
|
94
|
+
multivalue: true,
|
95
|
+
format: Helpers::IsoToSolrFormat::FACET_SPATIAL_SCOPE
|
96
|
+
},
|
97
|
+
facet_temporal_duration: {
|
98
|
+
xpaths: [".//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:date/gmd:CI_Date[./gmd:dateType/gmd:CI_DateTypeCode[@codeListValue='Time Period']]/gmd:date"],
|
99
|
+
default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
|
100
|
+
format: Helpers::UsgsFormat::FACET_TEMPORAL_DURATION,
|
101
|
+
multivalue: true
|
102
|
+
}
|
103
|
+
}
|
104
|
+
end
|
105
|
+
end
|