search_solr_tools 3.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +88 -0
- data/COPYING +674 -0
- data/README.md +203 -0
- data/bin/search_solr_tools +87 -0
- data/lib/search_solr_tools.rb +8 -0
- data/lib/search_solr_tools/config/environments.rb +12 -0
- data/lib/search_solr_tools/config/environments.yaml +73 -0
- data/lib/search_solr_tools/harvesters/ade_auto_suggest.rb +43 -0
- data/lib/search_solr_tools/harvesters/auto_suggest.rb +61 -0
- data/lib/search_solr_tools/harvesters/base.rb +183 -0
- data/lib/search_solr_tools/harvesters/bcodmo.rb +55 -0
- data/lib/search_solr_tools/harvesters/cisl.rb +63 -0
- data/lib/search_solr_tools/harvesters/echo.rb +50 -0
- data/lib/search_solr_tools/harvesters/eol.rb +53 -0
- data/lib/search_solr_tools/harvesters/ices.rb +55 -0
- data/lib/search_solr_tools/harvesters/nmi.rb +32 -0
- data/lib/search_solr_tools/harvesters/nodc.rb +72 -0
- data/lib/search_solr_tools/harvesters/nsidc_auto_suggest.rb +33 -0
- data/lib/search_solr_tools/harvesters/nsidc_json.rb +60 -0
- data/lib/search_solr_tools/harvesters/oai.rb +59 -0
- data/lib/search_solr_tools/harvesters/pdc.rb +38 -0
- data/lib/search_solr_tools/harvesters/rda.rb +33 -0
- data/lib/search_solr_tools/harvesters/tdar.rb +57 -0
- data/lib/search_solr_tools/harvesters/usgs.rb +74 -0
- data/lib/search_solr_tools/helpers/bounding_box_util.rb +37 -0
- data/lib/search_solr_tools/helpers/csw_iso_query_builder.rb +30 -0
- data/lib/search_solr_tools/helpers/facet_configuration.rb +19 -0
- data/lib/search_solr_tools/helpers/iso_namespaces.rb +30 -0
- data/lib/search_solr_tools/helpers/iso_to_solr.rb +96 -0
- data/lib/search_solr_tools/helpers/iso_to_solr_format.rb +198 -0
- data/lib/search_solr_tools/helpers/query_builder.rb +13 -0
- data/lib/search_solr_tools/helpers/selectors.rb +20 -0
- data/lib/search_solr_tools/helpers/solr_format.rb +260 -0
- data/lib/search_solr_tools/helpers/tdar_format.rb +70 -0
- data/lib/search_solr_tools/helpers/translate_spatial_coverage.rb +77 -0
- data/lib/search_solr_tools/helpers/translate_temporal_coverage.rb +40 -0
- data/lib/search_solr_tools/helpers/usgs_format.rb +50 -0
- data/lib/search_solr_tools/selectors/cisl.rb +112 -0
- data/lib/search_solr_tools/selectors/echo_iso.rb +111 -0
- data/lib/search_solr_tools/selectors/ices_iso.rb +107 -0
- data/lib/search_solr_tools/selectors/nmi.rb +106 -0
- data/lib/search_solr_tools/selectors/nodc_iso.rb +107 -0
- data/lib/search_solr_tools/selectors/pdc_iso.rb +108 -0
- data/lib/search_solr_tools/selectors/rda.rb +106 -0
- data/lib/search_solr_tools/selectors/tdar_opensearch.rb +89 -0
- data/lib/search_solr_tools/selectors/usgs_iso.rb +105 -0
- data/lib/search_solr_tools/translators/bcodmo_json.rb +69 -0
- data/lib/search_solr_tools/translators/eol_to_solr.rb +78 -0
- data/lib/search_solr_tools/translators/nsidc_json.rb +190 -0
- data/lib/search_solr_tools/version.rb +3 -0
- data/search_solr_tools.gemspec +45 -0
- metadata +345 -0
@@ -0,0 +1,107 @@
|
|
1
|
+
require 'search_solr_tools'
|
2
|
+
|
3
|
+
module SearchSolrTools
|
4
|
+
module Selectors
|
5
|
+
# The hash contains keys that should map to the fields in the solr schema,
|
6
|
+
# the keys are called selectors and are in charge of selecting the nodes
|
7
|
+
# from the ISO document, applying the default value if none of the xpaths
|
8
|
+
# resolved to a value and formatting the field. xpaths and multivalue are
|
9
|
+
# required, default_value and format are optional
|
10
|
+
NODC = {
|
11
|
+
authoritative_id: {
|
12
|
+
xpaths: ['.//gmd:fileIdentifier/gco:CharacterString'],
|
13
|
+
multivalue: false
|
14
|
+
},
|
15
|
+
title: {
|
16
|
+
xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:title/gco:CharacterString'],
|
17
|
+
multivalue: false
|
18
|
+
},
|
19
|
+
summary: {
|
20
|
+
xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:abstract/gco:CharacterString'],
|
21
|
+
multivalue: false
|
22
|
+
},
|
23
|
+
data_centers: {
|
24
|
+
xpaths: [''],
|
25
|
+
default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:NODC][:long_name]],
|
26
|
+
multivalue: false
|
27
|
+
},
|
28
|
+
authors: {
|
29
|
+
xpaths: [".//gmd:CI_ResponsibleParty[./gmd:role/gmd:CI_RoleCode[@codeListValue='principalInvestigator']]/gmd:individualName/gco:CharacterString"],
|
30
|
+
multivalue: true
|
31
|
+
},
|
32
|
+
keywords: {
|
33
|
+
xpaths: ['.//gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gco:CharacterString',
|
34
|
+
'.//gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gmx:Anchor'],
|
35
|
+
multivalue: true
|
36
|
+
},
|
37
|
+
last_revision_date: {
|
38
|
+
xpaths: ['.//gmd:dateStamp/gco:Date'],
|
39
|
+
default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
|
40
|
+
multivalue: false,
|
41
|
+
format: Helpers::SolrFormat::DATE
|
42
|
+
},
|
43
|
+
dataset_url: {
|
44
|
+
xpaths: ['.//gmd:MD_DigitalTransferOptions/gmd:onLine/gmd:CI_OnlineResource[contains(./gmd:protocol/gco:CharacterString/text(),"ftp")]/gmd:linkage/gmd:URL',
|
45
|
+
'.//gmd:MD_DigitalTransferOptions/gmd:onLine/gmd:CI_OnlineResource[contains(./gmd:protocol/gco:CharacterString/text(),"FTP")]/gmd:linkage/gmd:URL'],
|
46
|
+
multivalue: false
|
47
|
+
},
|
48
|
+
spatial_coverages: {
|
49
|
+
xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
|
50
|
+
multivalue: true,
|
51
|
+
format: Helpers::IsoToSolrFormat::SPATIAL_DISPLAY
|
52
|
+
},
|
53
|
+
spatial: {
|
54
|
+
xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
|
55
|
+
multivalue: true,
|
56
|
+
format: Helpers::IsoToSolrFormat::SPATIAL_INDEX
|
57
|
+
},
|
58
|
+
spatial_area: {
|
59
|
+
xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
|
60
|
+
multivalue: false,
|
61
|
+
reduce: Helpers::IsoToSolrFormat::MAX_SPATIAL_AREA,
|
62
|
+
format: Helpers::IsoToSolrFormat::SPATIAL_AREA
|
63
|
+
},
|
64
|
+
temporal_coverages: {
|
65
|
+
xpaths: ['.//gmd:EX_TemporalExtent'],
|
66
|
+
multivalue: true,
|
67
|
+
format: Helpers::IsoToSolrFormat::TEMPORAL_DISPLAY_STRING_FORMATTED
|
68
|
+
},
|
69
|
+
temporal_duration: {
|
70
|
+
xpaths: ['.//gmd:EX_TemporalExtent'],
|
71
|
+
multivalue: false,
|
72
|
+
reduce: Helpers::SolrFormat::REDUCE_TEMPORAL_DURATION,
|
73
|
+
format: Helpers::IsoToSolrFormat::TEMPORAL_DURATION
|
74
|
+
},
|
75
|
+
temporal: {
|
76
|
+
xpaths: ['.//gmd:EX_TemporalExtent'],
|
77
|
+
multivalue: true,
|
78
|
+
format: Helpers::IsoToSolrFormat::TEMPORAL_INDEX_STRING
|
79
|
+
},
|
80
|
+
sensors: {
|
81
|
+
xpaths: ['.//gmi:acquisitionInformation/gmi:MI_AcquisitionInformation/gmi:instrument/gmi:MI_Instrument/gmi:citation/gmd:CI_Citation/gmd:title/gco:CharacterString'],
|
82
|
+
multivalue: true
|
83
|
+
},
|
84
|
+
source: {
|
85
|
+
xpaths: [''],
|
86
|
+
default_values: ['ADE'],
|
87
|
+
multivalue: false
|
88
|
+
},
|
89
|
+
facet_data_center: {
|
90
|
+
xpaths: [''],
|
91
|
+
default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:NODC][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:NODC][:short_name]}"],
|
92
|
+
multivalue: false
|
93
|
+
},
|
94
|
+
facet_spatial_scope: {
|
95
|
+
xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
|
96
|
+
multivalue: true,
|
97
|
+
format: Helpers::IsoToSolrFormat::FACET_SPATIAL_SCOPE
|
98
|
+
},
|
99
|
+
facet_temporal_duration: {
|
100
|
+
xpaths: ['.//gmd:EX_TemporalExtent'],
|
101
|
+
default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
|
102
|
+
format: Helpers::IsoToSolrFormat::FACET_TEMPORAL_DURATION,
|
103
|
+
multivalue: true
|
104
|
+
}
|
105
|
+
}
|
106
|
+
end
|
107
|
+
end
|
@@ -0,0 +1,108 @@
|
|
1
|
+
require 'search_solr_tools'
|
2
|
+
|
3
|
+
module SearchSolrTools
|
4
|
+
module Selectors
|
5
|
+
# The hash contains keys that should map to the fields in the solr schema,
|
6
|
+
# the keys are called selectors and are in charge of selecting the nodes
|
7
|
+
# from the ISO document, applying the default value if none of the xpaths
|
8
|
+
# resolved to a value and formatting the field. xpaths and multivalue are
|
9
|
+
# required, default_value, format, and reduce are optional.
|
10
|
+
#
|
11
|
+
# reduce takes the formatted result of multiple nodes and produces a single
|
12
|
+
# result. This is for fields that are not multivalued, but their value
|
13
|
+
# should consider information from all the nodes (for example, storing
|
14
|
+
# only the maximum duration from multiple temporal coverage fields, taking
|
15
|
+
# the sum of multiple spatial areas)
|
16
|
+
PDC = {
|
17
|
+
authoritative_id: {
|
18
|
+
xpaths: ['.//oai:header/oai:identifier'],
|
19
|
+
multivalue: false
|
20
|
+
},
|
21
|
+
title: {
|
22
|
+
xpaths: ['.//gmd:citation//gmd:title/gco:CharacterString'],
|
23
|
+
multivalue: false
|
24
|
+
},
|
25
|
+
summary: {
|
26
|
+
xpaths: ['.//gmd:abstract/gco:CharacterString'],
|
27
|
+
multivalue: false
|
28
|
+
},
|
29
|
+
data_centers: {
|
30
|
+
xpaths: [''],
|
31
|
+
default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:PDC][:long_name]],
|
32
|
+
multivalue: false
|
33
|
+
},
|
34
|
+
authors: {
|
35
|
+
xpaths: ['.//gmd:identificationInfo//gmd:citedResponsibleParty//gmd:individualName/gco:CharacterString'],
|
36
|
+
multivalue: true
|
37
|
+
},
|
38
|
+
keywords: {
|
39
|
+
xpaths: ['.//gmd:descriptiveKeywords//gmd:keyword/gco:CharacterString'],
|
40
|
+
multivalue: true
|
41
|
+
},
|
42
|
+
last_revision_date: {
|
43
|
+
xpaths: ['.//oai:header/oai:datestamp'],
|
44
|
+
default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
|
45
|
+
multivalue: false,
|
46
|
+
format: Helpers::SolrFormat::DATE
|
47
|
+
},
|
48
|
+
dataset_url: {
|
49
|
+
xpaths: ['.//gmd:dataSetURI/gco:CharacterString'],
|
50
|
+
multivalue: false,
|
51
|
+
format: Helpers::SolrFormat::HTTP_URL_FORMAT
|
52
|
+
},
|
53
|
+
spatial_coverages: {
|
54
|
+
xpaths: ['.//gmd:extent//gmd:EX_GeographicBoundingBox'],
|
55
|
+
multivalue: true,
|
56
|
+
format: Helpers::IsoToSolrFormat::SPATIAL_DISPLAY
|
57
|
+
},
|
58
|
+
spatial: {
|
59
|
+
xpaths: ['.//gmd:extent//gmd:EX_GeographicBoundingBox'],
|
60
|
+
multivalue: true,
|
61
|
+
format: Helpers::IsoToSolrFormat::SPATIAL_INDEX
|
62
|
+
},
|
63
|
+
spatial_area: {
|
64
|
+
xpaths: ['.//gmd:extent//gmd:EX_GeographicBoundingBox'],
|
65
|
+
multivalue: false,
|
66
|
+
reduce: Helpers::IsoToSolrFormat::MAX_SPATIAL_AREA,
|
67
|
+
format: Helpers::IsoToSolrFormat::SPATIAL_AREA
|
68
|
+
},
|
69
|
+
temporal: {
|
70
|
+
xpaths: ['.//gmd:EX_TemporalExtent/gmd:extent'],
|
71
|
+
multivalue: true,
|
72
|
+
format: Helpers::IsoToSolrFormat::TEMPORAL_INDEX_STRING
|
73
|
+
},
|
74
|
+
temporal_coverages: {
|
75
|
+
xpaths: ['.//gmd:EX_TemporalExtent/gmd:extent'],
|
76
|
+
multivalue: true,
|
77
|
+
format: Helpers::IsoToSolrFormat::TEMPORAL_DISPLAY_STRING
|
78
|
+
},
|
79
|
+
temporal_duration: {
|
80
|
+
xpaths: ['.//gmd:EX_TemporalExtent/gmd:extent'],
|
81
|
+
multivalue: false,
|
82
|
+
reduce: Helpers::SolrFormat::REDUCE_TEMPORAL_DURATION,
|
83
|
+
format: Helpers::IsoToSolrFormat::TEMPORAL_DURATION
|
84
|
+
},
|
85
|
+
source: {
|
86
|
+
xpaths: [''],
|
87
|
+
default_values: ['ADE'],
|
88
|
+
multivalue: false
|
89
|
+
},
|
90
|
+
facet_data_center: {
|
91
|
+
xpaths: [''],
|
92
|
+
default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:PDC][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:PDC][:short_name]}"],
|
93
|
+
multivalue: false
|
94
|
+
},
|
95
|
+
facet_spatial_scope: {
|
96
|
+
xpaths: ['.//gmd:extent//gmd:EX_GeographicBoundingBox'],
|
97
|
+
multivalue: true,
|
98
|
+
format: Helpers::IsoToSolrFormat::FACET_SPATIAL_SCOPE
|
99
|
+
},
|
100
|
+
facet_temporal_duration: {
|
101
|
+
xpaths: ['.//gmd:EX_TemporalExtent/gmd:extent'],
|
102
|
+
default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
|
103
|
+
format: Helpers::IsoToSolrFormat::FACET_TEMPORAL_DURATION,
|
104
|
+
multivalue: true
|
105
|
+
}
|
106
|
+
}
|
107
|
+
end
|
108
|
+
end
|
@@ -0,0 +1,106 @@
|
|
1
|
+
require 'search_solr_tools'
|
2
|
+
|
3
|
+
module SearchSolrTools
|
4
|
+
module Selectors
|
5
|
+
# The hash contains keys that should map to the fields in the solr schema,
|
6
|
+
# the keys are called selectors and are in charge of selecting the nodes
|
7
|
+
# from the ISO document, applying the default value if none of the xpaths
|
8
|
+
# resolved to a value and formatting the field. xpaths and multivalue are
|
9
|
+
# required, default_value and format are optional.
|
10
|
+
RDA = {
|
11
|
+
authoritative_id: {
|
12
|
+
xpaths: ['.//oai:header/oai:identifier'],
|
13
|
+
multivalue: false
|
14
|
+
},
|
15
|
+
title: {
|
16
|
+
xpaths: ['.//dif:Entry_Title'],
|
17
|
+
multivalue: false
|
18
|
+
},
|
19
|
+
summary: {
|
20
|
+
xpaths: ['.//dif:Summary'],
|
21
|
+
multivalue: false
|
22
|
+
},
|
23
|
+
data_centers: {
|
24
|
+
xpaths: [''],
|
25
|
+
default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:RDA][:long_name]],
|
26
|
+
multivalue: false
|
27
|
+
},
|
28
|
+
authors: {
|
29
|
+
xpaths: [''],
|
30
|
+
multivalue: true
|
31
|
+
},
|
32
|
+
keywords: {
|
33
|
+
xpaths: [
|
34
|
+
'.//dif:Parameters/dif:Category',
|
35
|
+
'.//dif:Parameters/dif:Topic',
|
36
|
+
'.//dif:Parameters/dif:Term',
|
37
|
+
'.//dif:Parameters/dif:Variable_Level_1'
|
38
|
+
].reverse,
|
39
|
+
multivalue: true
|
40
|
+
},
|
41
|
+
last_revision_date: {
|
42
|
+
xpaths: ['.//dif:Last_DIF_Revision_Date'],
|
43
|
+
default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
|
44
|
+
multivalue: false,
|
45
|
+
format: Helpers::SolrFormat::DATE
|
46
|
+
},
|
47
|
+
dataset_url: {
|
48
|
+
xpaths: ['.//dif:Related_URL/dif:URL'],
|
49
|
+
multivalue: false
|
50
|
+
},
|
51
|
+
spatial_coverages: {
|
52
|
+
xpaths: ['.//dif:Spatial_Coverage'],
|
53
|
+
multivalue: true,
|
54
|
+
format: Helpers::IsoToSolrFormat::SPATIAL_DISPLAY
|
55
|
+
},
|
56
|
+
spatial: {
|
57
|
+
xpaths: ['.//dif:Spatial_Coverage'],
|
58
|
+
multivalue: true,
|
59
|
+
format: Helpers::IsoToSolrFormat::SPATIAL_INDEX
|
60
|
+
},
|
61
|
+
spatial_area: {
|
62
|
+
xpaths: ['.//dif:Spatial_Coverage'],
|
63
|
+
multivalue: false,
|
64
|
+
reduce: Helpers::IsoToSolrFormat::MAX_SPATIAL_AREA,
|
65
|
+
format: Helpers::IsoToSolrFormat::SPATIAL_AREA
|
66
|
+
},
|
67
|
+
temporal: {
|
68
|
+
xpaths: ['.//dif:Temporal_Coverage'],
|
69
|
+
multivalue: true,
|
70
|
+
format: Helpers::IsoToSolrFormat::TEMPORAL_INDEX_STRING
|
71
|
+
},
|
72
|
+
temporal_coverages: {
|
73
|
+
xpaths: ['.//dif:Temporal_Coverage'],
|
74
|
+
multivalue: true,
|
75
|
+
format: Helpers::IsoToSolrFormat::TEMPORAL_DISPLAY_STRING
|
76
|
+
},
|
77
|
+
temporal_duration: {
|
78
|
+
xpaths: ['.//dif:Temporal_Coverage'],
|
79
|
+
multivalue: false,
|
80
|
+
reduce: Helpers::SolrFormat::REDUCE_TEMPORAL_DURATION,
|
81
|
+
format: Helpers::IsoToSolrFormat::TEMPORAL_DURATION
|
82
|
+
},
|
83
|
+
source: {
|
84
|
+
xpaths: [''],
|
85
|
+
default_values: ['ADE'],
|
86
|
+
multivalue: false
|
87
|
+
},
|
88
|
+
facet_data_center: {
|
89
|
+
xpaths: [''],
|
90
|
+
default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:RDA][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:RDA][:short_name]}"],
|
91
|
+
multivalue: false
|
92
|
+
},
|
93
|
+
facet_spatial_scope: {
|
94
|
+
xpaths: ['.//dif:Spatial_Coverage'],
|
95
|
+
multivalue: true,
|
96
|
+
format: Helpers::IsoToSolrFormat::FACET_SPATIAL_SCOPE
|
97
|
+
},
|
98
|
+
facet_temporal_duration: {
|
99
|
+
xpaths: ['.//dif:Temporal_Coverage'],
|
100
|
+
default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
|
101
|
+
format: Helpers::IsoToSolrFormat::FACET_TEMPORAL_DURATION,
|
102
|
+
multivalue: true
|
103
|
+
}
|
104
|
+
}
|
105
|
+
end
|
106
|
+
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
require 'search_solr_tools'
|
2
|
+
|
3
|
+
module SearchSolrTools
|
4
|
+
module Selectors
|
5
|
+
# The hash contains keys that should map to the fields in the solr schema,
|
6
|
+
# the keys are called selectors and are in charge of selecting the nodes
|
7
|
+
# from the ISO document, applying the default value if none of the xpaths
|
8
|
+
# resolved to a value and formatting the field. xpaths and multivalue are
|
9
|
+
# required, default_value and format are optional
|
10
|
+
TDAR = {
|
11
|
+
authoritative_id: {
|
12
|
+
xpaths: ['.//atom:link/@href'],
|
13
|
+
multivalue: false,
|
14
|
+
format: proc do |node|
|
15
|
+
'TDAR-' << node.text.split('/')[4] || ''
|
16
|
+
end
|
17
|
+
},
|
18
|
+
title: {
|
19
|
+
xpaths: ['.//atom:title'],
|
20
|
+
multivalue: false
|
21
|
+
},
|
22
|
+
summary: {
|
23
|
+
xpaths: ['.//atom:summary'],
|
24
|
+
multivalue: false
|
25
|
+
},
|
26
|
+
data_centers: {
|
27
|
+
xpaths: [''],
|
28
|
+
default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:TDAR][:long_name]],
|
29
|
+
multivalue: false
|
30
|
+
},
|
31
|
+
authors: {
|
32
|
+
xpaths: ['.//atom:author/atom:name'],
|
33
|
+
multivalue: true
|
34
|
+
},
|
35
|
+
keywords: {
|
36
|
+
xpaths: [''],
|
37
|
+
multivalue: true,
|
38
|
+
format: Helpers::IsoToSolrFormat::KEYWORDS
|
39
|
+
},
|
40
|
+
last_revision_date: {
|
41
|
+
xpaths: ['.//atom:updated'],
|
42
|
+
default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
|
43
|
+
multivalue: false,
|
44
|
+
format: Helpers::SolrFormat::DATE
|
45
|
+
},
|
46
|
+
dataset_url: {
|
47
|
+
xpaths: ['.//atom:link/@href'],
|
48
|
+
multivalue: false
|
49
|
+
},
|
50
|
+
spatial_coverages: {
|
51
|
+
xpaths: ['.//georss:box'],
|
52
|
+
multivalue: true,
|
53
|
+
format: Helpers::TdarFormat::SPATIAL_DISPLAY
|
54
|
+
},
|
55
|
+
spatial: {
|
56
|
+
xpaths: ['.//georss:box'],
|
57
|
+
multivalue: true,
|
58
|
+
format: Helpers::TdarFormat::SPATIAL_INDEX
|
59
|
+
},
|
60
|
+
spatial_area: {
|
61
|
+
xpaths: ['.//georss:box'],
|
62
|
+
multivalue: false,
|
63
|
+
reduce: Helpers::TdarFormat::MAX_SPATIAL_AREA,
|
64
|
+
format: Helpers::TdarFormat::SPATIAL_AREA
|
65
|
+
},
|
66
|
+
source: {
|
67
|
+
xpaths: [''],
|
68
|
+
default_values: ['ADE'],
|
69
|
+
multivalue: false
|
70
|
+
},
|
71
|
+
facet_data_center: {
|
72
|
+
xpaths: [''],
|
73
|
+
default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:TDAR][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:TDAR][:short_name]}"],
|
74
|
+
multivalue: false
|
75
|
+
},
|
76
|
+
facet_spatial_scope: {
|
77
|
+
xpaths: ['.//georss:box'],
|
78
|
+
multivalue: true,
|
79
|
+
format: Helpers::TdarFormat::FACET_SPATIAL_SCOPE
|
80
|
+
},
|
81
|
+
facet_temporal_duration: {
|
82
|
+
xpaths: [''],
|
83
|
+
default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
|
84
|
+
format: Helpers::IsoToSolrFormat::FACET_TEMPORAL_DURATION,
|
85
|
+
multivalue: true
|
86
|
+
}
|
87
|
+
}
|
88
|
+
end
|
89
|
+
end
|
@@ -0,0 +1,105 @@
|
|
1
|
+
require 'search_solr_tools'
|
2
|
+
|
3
|
+
module SearchSolrTools
|
4
|
+
module Selectors
|
5
|
+
# The hash contains keys that should map to the fields in the solr schema,
|
6
|
+
# the keys are called selectors and are in charge of selecting the nodes
|
7
|
+
# from the ISO document, applying the default value if none of the xpaths
|
8
|
+
# resolved to a value and formatting the field. xpaths and multivalue are
|
9
|
+
# required, default_value and format are optional
|
10
|
+
USGS = {
|
11
|
+
authoritative_id: {
|
12
|
+
xpaths: ['.//gmd:fileIdentifier/gco:CharacterString'],
|
13
|
+
multivalue: false
|
14
|
+
},
|
15
|
+
title: {
|
16
|
+
xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:title/gco:CharacterString'],
|
17
|
+
multivalue: false
|
18
|
+
},
|
19
|
+
summary: {
|
20
|
+
xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:abstract/gco:CharacterString'],
|
21
|
+
multivalue: false
|
22
|
+
},
|
23
|
+
data_centers: {
|
24
|
+
xpaths: [''],
|
25
|
+
default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:USGS][:long_name]],
|
26
|
+
multivalue: false
|
27
|
+
},
|
28
|
+
authors: {
|
29
|
+
xpaths: [".//gmd:contact/gmd:CI_ResponsibleParty[./gmd:role/gmd:CI_RoleCode[@codeListValue='originator']]/gmd:organisationName/gco:CharacterString"],
|
30
|
+
multivalue: true
|
31
|
+
},
|
32
|
+
keywords: {
|
33
|
+
xpaths: ['.//gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gco:CharacterString'],
|
34
|
+
multivalue: true
|
35
|
+
},
|
36
|
+
last_revision_date: {
|
37
|
+
xpaths: ['.//gmd:dateStamp/gco:DateTime'],
|
38
|
+
default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
|
39
|
+
multivalue: false,
|
40
|
+
format: Helpers::SolrFormat::DATE
|
41
|
+
},
|
42
|
+
dataset_url: {
|
43
|
+
xpaths: ['.//gmd:transferOptions/gmd:MD_DigitalTransferOptions/gmd:onLine/gmd:CI_OnlineResource[contains(./gmd:name/gco:CharacterString/text(),"Summary")]/gmd:linkage/gmd:URL'],
|
44
|
+
multivalue: false
|
45
|
+
},
|
46
|
+
spatial_coverages: {
|
47
|
+
xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
|
48
|
+
multivalue: true,
|
49
|
+
format: Helpers::IsoToSolrFormat::SPATIAL_DISPLAY
|
50
|
+
},
|
51
|
+
spatial: {
|
52
|
+
xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
|
53
|
+
multivalue: true,
|
54
|
+
format: Helpers::IsoToSolrFormat::SPATIAL_INDEX
|
55
|
+
},
|
56
|
+
spatial_area: {
|
57
|
+
xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
|
58
|
+
multivalue: false,
|
59
|
+
reduce: Helpers::IsoToSolrFormat::MAX_SPATIAL_AREA,
|
60
|
+
format: Helpers::IsoToSolrFormat::SPATIAL_AREA
|
61
|
+
},
|
62
|
+
temporal: {
|
63
|
+
xpaths: [".//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:date/gmd:CI_Date[./gmd:dateType/gmd:CI_DateTypeCode[@codeListValue='Time Period']]/gmd:date"],
|
64
|
+
multivalue: true,
|
65
|
+
format: Helpers::UsgsFormat::TEMPORAL_INDEX_STRING
|
66
|
+
},
|
67
|
+
temporal_coverages: {
|
68
|
+
xpaths: [".//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:date/gmd:CI_Date[./gmd:dateType/gmd:CI_DateTypeCode[@codeListValue='Time Period']]/gmd:date"],
|
69
|
+
multivalue: true,
|
70
|
+
format: Helpers::UsgsFormat::TEMPORAL_DISPLAY_STRING
|
71
|
+
},
|
72
|
+
temporal_duration: {
|
73
|
+
xpaths: [".//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:date/gmd:CI_Date[./gmd:dateType/gmd:CI_DateTypeCode[@codeListValue='Time Period']]/gmd:date"],
|
74
|
+
multivalue: false,
|
75
|
+
reduce: Helpers::SolrFormat::REDUCE_TEMPORAL_DURATION,
|
76
|
+
format: Helpers::UsgsFormat::TEMPORAL_DURATION
|
77
|
+
},
|
78
|
+
sensors: {
|
79
|
+
xpaths: [''],
|
80
|
+
multivalue: true
|
81
|
+
},
|
82
|
+
source: {
|
83
|
+
xpaths: [''],
|
84
|
+
default_values: ['ADE'],
|
85
|
+
multivalue: false
|
86
|
+
},
|
87
|
+
facet_data_center: {
|
88
|
+
xpaths: [''],
|
89
|
+
default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:USGS][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:USGS][:short_name]}"],
|
90
|
+
multivalue: false
|
91
|
+
},
|
92
|
+
facet_spatial_scope: {
|
93
|
+
xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
|
94
|
+
multivalue: true,
|
95
|
+
format: Helpers::IsoToSolrFormat::FACET_SPATIAL_SCOPE
|
96
|
+
},
|
97
|
+
facet_temporal_duration: {
|
98
|
+
xpaths: [".//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:date/gmd:CI_Date[./gmd:dateType/gmd:CI_DateTypeCode[@codeListValue='Time Period']]/gmd:date"],
|
99
|
+
default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
|
100
|
+
format: Helpers::UsgsFormat::FACET_TEMPORAL_DURATION,
|
101
|
+
multivalue: true
|
102
|
+
}
|
103
|
+
}
|
104
|
+
end
|
105
|
+
end
|