search_solr_tools 3.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +88 -0
  3. data/COPYING +674 -0
  4. data/README.md +203 -0
  5. data/bin/search_solr_tools +87 -0
  6. data/lib/search_solr_tools.rb +8 -0
  7. data/lib/search_solr_tools/config/environments.rb +12 -0
  8. data/lib/search_solr_tools/config/environments.yaml +73 -0
  9. data/lib/search_solr_tools/harvesters/ade_auto_suggest.rb +43 -0
  10. data/lib/search_solr_tools/harvesters/auto_suggest.rb +61 -0
  11. data/lib/search_solr_tools/harvesters/base.rb +183 -0
  12. data/lib/search_solr_tools/harvesters/bcodmo.rb +55 -0
  13. data/lib/search_solr_tools/harvesters/cisl.rb +63 -0
  14. data/lib/search_solr_tools/harvesters/echo.rb +50 -0
  15. data/lib/search_solr_tools/harvesters/eol.rb +53 -0
  16. data/lib/search_solr_tools/harvesters/ices.rb +55 -0
  17. data/lib/search_solr_tools/harvesters/nmi.rb +32 -0
  18. data/lib/search_solr_tools/harvesters/nodc.rb +72 -0
  19. data/lib/search_solr_tools/harvesters/nsidc_auto_suggest.rb +33 -0
  20. data/lib/search_solr_tools/harvesters/nsidc_json.rb +60 -0
  21. data/lib/search_solr_tools/harvesters/oai.rb +59 -0
  22. data/lib/search_solr_tools/harvesters/pdc.rb +38 -0
  23. data/lib/search_solr_tools/harvesters/rda.rb +33 -0
  24. data/lib/search_solr_tools/harvesters/tdar.rb +57 -0
  25. data/lib/search_solr_tools/harvesters/usgs.rb +74 -0
  26. data/lib/search_solr_tools/helpers/bounding_box_util.rb +37 -0
  27. data/lib/search_solr_tools/helpers/csw_iso_query_builder.rb +30 -0
  28. data/lib/search_solr_tools/helpers/facet_configuration.rb +19 -0
  29. data/lib/search_solr_tools/helpers/iso_namespaces.rb +30 -0
  30. data/lib/search_solr_tools/helpers/iso_to_solr.rb +96 -0
  31. data/lib/search_solr_tools/helpers/iso_to_solr_format.rb +198 -0
  32. data/lib/search_solr_tools/helpers/query_builder.rb +13 -0
  33. data/lib/search_solr_tools/helpers/selectors.rb +20 -0
  34. data/lib/search_solr_tools/helpers/solr_format.rb +260 -0
  35. data/lib/search_solr_tools/helpers/tdar_format.rb +70 -0
  36. data/lib/search_solr_tools/helpers/translate_spatial_coverage.rb +77 -0
  37. data/lib/search_solr_tools/helpers/translate_temporal_coverage.rb +40 -0
  38. data/lib/search_solr_tools/helpers/usgs_format.rb +50 -0
  39. data/lib/search_solr_tools/selectors/cisl.rb +112 -0
  40. data/lib/search_solr_tools/selectors/echo_iso.rb +111 -0
  41. data/lib/search_solr_tools/selectors/ices_iso.rb +107 -0
  42. data/lib/search_solr_tools/selectors/nmi.rb +106 -0
  43. data/lib/search_solr_tools/selectors/nodc_iso.rb +107 -0
  44. data/lib/search_solr_tools/selectors/pdc_iso.rb +108 -0
  45. data/lib/search_solr_tools/selectors/rda.rb +106 -0
  46. data/lib/search_solr_tools/selectors/tdar_opensearch.rb +89 -0
  47. data/lib/search_solr_tools/selectors/usgs_iso.rb +105 -0
  48. data/lib/search_solr_tools/translators/bcodmo_json.rb +69 -0
  49. data/lib/search_solr_tools/translators/eol_to_solr.rb +78 -0
  50. data/lib/search_solr_tools/translators/nsidc_json.rb +190 -0
  51. data/lib/search_solr_tools/version.rb +3 -0
  52. data/search_solr_tools.gemspec +45 -0
  53. metadata +345 -0
@@ -0,0 +1,107 @@
1
+ require 'search_solr_tools'
2
+
3
+ module SearchSolrTools
4
+ module Selectors
5
+ # The hash contains keys that should map to the fields in the solr schema,
6
+ # the keys are called selectors and are in charge of selecting the nodes
7
+ # from the ISO document, applying the default value if none of the xpaths
8
+ # resolved to a value and formatting the field. xpaths and multivalue are
9
+ # required, default_value and format are optional
10
+ NODC = {
11
+ authoritative_id: {
12
+ xpaths: ['.//gmd:fileIdentifier/gco:CharacterString'],
13
+ multivalue: false
14
+ },
15
+ title: {
16
+ xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:title/gco:CharacterString'],
17
+ multivalue: false
18
+ },
19
+ summary: {
20
+ xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:abstract/gco:CharacterString'],
21
+ multivalue: false
22
+ },
23
+ data_centers: {
24
+ xpaths: [''],
25
+ default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:NODC][:long_name]],
26
+ multivalue: false
27
+ },
28
+ authors: {
29
+ xpaths: [".//gmd:CI_ResponsibleParty[./gmd:role/gmd:CI_RoleCode[@codeListValue='principalInvestigator']]/gmd:individualName/gco:CharacterString"],
30
+ multivalue: true
31
+ },
32
+ keywords: {
33
+ xpaths: ['.//gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gco:CharacterString',
34
+ './/gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gmx:Anchor'],
35
+ multivalue: true
36
+ },
37
+ last_revision_date: {
38
+ xpaths: ['.//gmd:dateStamp/gco:Date'],
39
+ default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
40
+ multivalue: false,
41
+ format: Helpers::SolrFormat::DATE
42
+ },
43
+ dataset_url: {
44
+ xpaths: ['.//gmd:MD_DigitalTransferOptions/gmd:onLine/gmd:CI_OnlineResource[contains(./gmd:protocol/gco:CharacterString/text(),"ftp")]/gmd:linkage/gmd:URL',
45
+ './/gmd:MD_DigitalTransferOptions/gmd:onLine/gmd:CI_OnlineResource[contains(./gmd:protocol/gco:CharacterString/text(),"FTP")]/gmd:linkage/gmd:URL'],
46
+ multivalue: false
47
+ },
48
+ spatial_coverages: {
49
+ xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
50
+ multivalue: true,
51
+ format: Helpers::IsoToSolrFormat::SPATIAL_DISPLAY
52
+ },
53
+ spatial: {
54
+ xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
55
+ multivalue: true,
56
+ format: Helpers::IsoToSolrFormat::SPATIAL_INDEX
57
+ },
58
+ spatial_area: {
59
+ xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
60
+ multivalue: false,
61
+ reduce: Helpers::IsoToSolrFormat::MAX_SPATIAL_AREA,
62
+ format: Helpers::IsoToSolrFormat::SPATIAL_AREA
63
+ },
64
+ temporal_coverages: {
65
+ xpaths: ['.//gmd:EX_TemporalExtent'],
66
+ multivalue: true,
67
+ format: Helpers::IsoToSolrFormat::TEMPORAL_DISPLAY_STRING_FORMATTED
68
+ },
69
+ temporal_duration: {
70
+ xpaths: ['.//gmd:EX_TemporalExtent'],
71
+ multivalue: false,
72
+ reduce: Helpers::SolrFormat::REDUCE_TEMPORAL_DURATION,
73
+ format: Helpers::IsoToSolrFormat::TEMPORAL_DURATION
74
+ },
75
+ temporal: {
76
+ xpaths: ['.//gmd:EX_TemporalExtent'],
77
+ multivalue: true,
78
+ format: Helpers::IsoToSolrFormat::TEMPORAL_INDEX_STRING
79
+ },
80
+ sensors: {
81
+ xpaths: ['.//gmi:acquisitionInformation/gmi:MI_AcquisitionInformation/gmi:instrument/gmi:MI_Instrument/gmi:citation/gmd:CI_Citation/gmd:title/gco:CharacterString'],
82
+ multivalue: true
83
+ },
84
+ source: {
85
+ xpaths: [''],
86
+ default_values: ['ADE'],
87
+ multivalue: false
88
+ },
89
+ facet_data_center: {
90
+ xpaths: [''],
91
+ default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:NODC][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:NODC][:short_name]}"],
92
+ multivalue: false
93
+ },
94
+ facet_spatial_scope: {
95
+ xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
96
+ multivalue: true,
97
+ format: Helpers::IsoToSolrFormat::FACET_SPATIAL_SCOPE
98
+ },
99
+ facet_temporal_duration: {
100
+ xpaths: ['.//gmd:EX_TemporalExtent'],
101
+ default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
102
+ format: Helpers::IsoToSolrFormat::FACET_TEMPORAL_DURATION,
103
+ multivalue: true
104
+ }
105
+ }
106
+ end
107
+ end
@@ -0,0 +1,108 @@
1
+ require 'search_solr_tools'
2
+
3
+ module SearchSolrTools
4
+ module Selectors
5
+ # The hash contains keys that should map to the fields in the solr schema,
6
+ # the keys are called selectors and are in charge of selecting the nodes
7
+ # from the ISO document, applying the default value if none of the xpaths
8
+ # resolved to a value and formatting the field. xpaths and multivalue are
9
+ # required, default_value, format, and reduce are optional.
10
+ #
11
+ # reduce takes the formatted result of multiple nodes and produces a single
12
+ # result. This is for fields that are not multivalued, but their value
13
+ # should consider information from all the nodes (for example, storing
14
+ # only the maximum duration from multiple temporal coverage fields, taking
15
+ # the sum of multiple spatial areas)
16
+ PDC = {
17
+ authoritative_id: {
18
+ xpaths: ['.//oai:header/oai:identifier'],
19
+ multivalue: false
20
+ },
21
+ title: {
22
+ xpaths: ['.//gmd:citation//gmd:title/gco:CharacterString'],
23
+ multivalue: false
24
+ },
25
+ summary: {
26
+ xpaths: ['.//gmd:abstract/gco:CharacterString'],
27
+ multivalue: false
28
+ },
29
+ data_centers: {
30
+ xpaths: [''],
31
+ default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:PDC][:long_name]],
32
+ multivalue: false
33
+ },
34
+ authors: {
35
+ xpaths: ['.//gmd:identificationInfo//gmd:citedResponsibleParty//gmd:individualName/gco:CharacterString'],
36
+ multivalue: true
37
+ },
38
+ keywords: {
39
+ xpaths: ['.//gmd:descriptiveKeywords//gmd:keyword/gco:CharacterString'],
40
+ multivalue: true
41
+ },
42
+ last_revision_date: {
43
+ xpaths: ['.//oai:header/oai:datestamp'],
44
+ default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
45
+ multivalue: false,
46
+ format: Helpers::SolrFormat::DATE
47
+ },
48
+ dataset_url: {
49
+ xpaths: ['.//gmd:dataSetURI/gco:CharacterString'],
50
+ multivalue: false,
51
+ format: Helpers::SolrFormat::HTTP_URL_FORMAT
52
+ },
53
+ spatial_coverages: {
54
+ xpaths: ['.//gmd:extent//gmd:EX_GeographicBoundingBox'],
55
+ multivalue: true,
56
+ format: Helpers::IsoToSolrFormat::SPATIAL_DISPLAY
57
+ },
58
+ spatial: {
59
+ xpaths: ['.//gmd:extent//gmd:EX_GeographicBoundingBox'],
60
+ multivalue: true,
61
+ format: Helpers::IsoToSolrFormat::SPATIAL_INDEX
62
+ },
63
+ spatial_area: {
64
+ xpaths: ['.//gmd:extent//gmd:EX_GeographicBoundingBox'],
65
+ multivalue: false,
66
+ reduce: Helpers::IsoToSolrFormat::MAX_SPATIAL_AREA,
67
+ format: Helpers::IsoToSolrFormat::SPATIAL_AREA
68
+ },
69
+ temporal: {
70
+ xpaths: ['.//gmd:EX_TemporalExtent/gmd:extent'],
71
+ multivalue: true,
72
+ format: Helpers::IsoToSolrFormat::TEMPORAL_INDEX_STRING
73
+ },
74
+ temporal_coverages: {
75
+ xpaths: ['.//gmd:EX_TemporalExtent/gmd:extent'],
76
+ multivalue: true,
77
+ format: Helpers::IsoToSolrFormat::TEMPORAL_DISPLAY_STRING
78
+ },
79
+ temporal_duration: {
80
+ xpaths: ['.//gmd:EX_TemporalExtent/gmd:extent'],
81
+ multivalue: false,
82
+ reduce: Helpers::SolrFormat::REDUCE_TEMPORAL_DURATION,
83
+ format: Helpers::IsoToSolrFormat::TEMPORAL_DURATION
84
+ },
85
+ source: {
86
+ xpaths: [''],
87
+ default_values: ['ADE'],
88
+ multivalue: false
89
+ },
90
+ facet_data_center: {
91
+ xpaths: [''],
92
+ default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:PDC][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:PDC][:short_name]}"],
93
+ multivalue: false
94
+ },
95
+ facet_spatial_scope: {
96
+ xpaths: ['.//gmd:extent//gmd:EX_GeographicBoundingBox'],
97
+ multivalue: true,
98
+ format: Helpers::IsoToSolrFormat::FACET_SPATIAL_SCOPE
99
+ },
100
+ facet_temporal_duration: {
101
+ xpaths: ['.//gmd:EX_TemporalExtent/gmd:extent'],
102
+ default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
103
+ format: Helpers::IsoToSolrFormat::FACET_TEMPORAL_DURATION,
104
+ multivalue: true
105
+ }
106
+ }
107
+ end
108
+ end
@@ -0,0 +1,106 @@
1
+ require 'search_solr_tools'
2
+
3
+ module SearchSolrTools
4
+ module Selectors
5
+ # The hash contains keys that should map to the fields in the solr schema,
6
+ # the keys are called selectors and are in charge of selecting the nodes
7
+ # from the ISO document, applying the default value if none of the xpaths
8
+ # resolved to a value and formatting the field. xpaths and multivalue are
9
+ # required, default_value and format are optional.
10
+ RDA = {
11
+ authoritative_id: {
12
+ xpaths: ['.//oai:header/oai:identifier'],
13
+ multivalue: false
14
+ },
15
+ title: {
16
+ xpaths: ['.//dif:Entry_Title'],
17
+ multivalue: false
18
+ },
19
+ summary: {
20
+ xpaths: ['.//dif:Summary'],
21
+ multivalue: false
22
+ },
23
+ data_centers: {
24
+ xpaths: [''],
25
+ default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:RDA][:long_name]],
26
+ multivalue: false
27
+ },
28
+ authors: {
29
+ xpaths: [''],
30
+ multivalue: true
31
+ },
32
+ keywords: {
33
+ xpaths: [
34
+ './/dif:Parameters/dif:Category',
35
+ './/dif:Parameters/dif:Topic',
36
+ './/dif:Parameters/dif:Term',
37
+ './/dif:Parameters/dif:Variable_Level_1'
38
+ ].reverse,
39
+ multivalue: true
40
+ },
41
+ last_revision_date: {
42
+ xpaths: ['.//dif:Last_DIF_Revision_Date'],
43
+ default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
44
+ multivalue: false,
45
+ format: Helpers::SolrFormat::DATE
46
+ },
47
+ dataset_url: {
48
+ xpaths: ['.//dif:Related_URL/dif:URL'],
49
+ multivalue: false
50
+ },
51
+ spatial_coverages: {
52
+ xpaths: ['.//dif:Spatial_Coverage'],
53
+ multivalue: true,
54
+ format: Helpers::IsoToSolrFormat::SPATIAL_DISPLAY
55
+ },
56
+ spatial: {
57
+ xpaths: ['.//dif:Spatial_Coverage'],
58
+ multivalue: true,
59
+ format: Helpers::IsoToSolrFormat::SPATIAL_INDEX
60
+ },
61
+ spatial_area: {
62
+ xpaths: ['.//dif:Spatial_Coverage'],
63
+ multivalue: false,
64
+ reduce: Helpers::IsoToSolrFormat::MAX_SPATIAL_AREA,
65
+ format: Helpers::IsoToSolrFormat::SPATIAL_AREA
66
+ },
67
+ temporal: {
68
+ xpaths: ['.//dif:Temporal_Coverage'],
69
+ multivalue: true,
70
+ format: Helpers::IsoToSolrFormat::TEMPORAL_INDEX_STRING
71
+ },
72
+ temporal_coverages: {
73
+ xpaths: ['.//dif:Temporal_Coverage'],
74
+ multivalue: true,
75
+ format: Helpers::IsoToSolrFormat::TEMPORAL_DISPLAY_STRING
76
+ },
77
+ temporal_duration: {
78
+ xpaths: ['.//dif:Temporal_Coverage'],
79
+ multivalue: false,
80
+ reduce: Helpers::SolrFormat::REDUCE_TEMPORAL_DURATION,
81
+ format: Helpers::IsoToSolrFormat::TEMPORAL_DURATION
82
+ },
83
+ source: {
84
+ xpaths: [''],
85
+ default_values: ['ADE'],
86
+ multivalue: false
87
+ },
88
+ facet_data_center: {
89
+ xpaths: [''],
90
+ default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:RDA][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:RDA][:short_name]}"],
91
+ multivalue: false
92
+ },
93
+ facet_spatial_scope: {
94
+ xpaths: ['.//dif:Spatial_Coverage'],
95
+ multivalue: true,
96
+ format: Helpers::IsoToSolrFormat::FACET_SPATIAL_SCOPE
97
+ },
98
+ facet_temporal_duration: {
99
+ xpaths: ['.//dif:Temporal_Coverage'],
100
+ default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
101
+ format: Helpers::IsoToSolrFormat::FACET_TEMPORAL_DURATION,
102
+ multivalue: true
103
+ }
104
+ }
105
+ end
106
+ end
@@ -0,0 +1,89 @@
1
+ require 'search_solr_tools'
2
+
3
+ module SearchSolrTools
4
+ module Selectors
5
+ # The hash contains keys that should map to the fields in the solr schema,
6
+ # the keys are called selectors and are in charge of selecting the nodes
7
+ # from the ISO document, applying the default value if none of the xpaths
8
+ # resolved to a value and formatting the field. xpaths and multivalue are
9
+ # required, default_value and format are optional
10
+ TDAR = {
11
+ authoritative_id: {
12
+ xpaths: ['.//atom:link/@href'],
13
+ multivalue: false,
14
+ format: proc do |node|
15
+ 'TDAR-' << node.text.split('/')[4] || ''
16
+ end
17
+ },
18
+ title: {
19
+ xpaths: ['.//atom:title'],
20
+ multivalue: false
21
+ },
22
+ summary: {
23
+ xpaths: ['.//atom:summary'],
24
+ multivalue: false
25
+ },
26
+ data_centers: {
27
+ xpaths: [''],
28
+ default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:TDAR][:long_name]],
29
+ multivalue: false
30
+ },
31
+ authors: {
32
+ xpaths: ['.//atom:author/atom:name'],
33
+ multivalue: true
34
+ },
35
+ keywords: {
36
+ xpaths: [''],
37
+ multivalue: true,
38
+ format: Helpers::IsoToSolrFormat::KEYWORDS
39
+ },
40
+ last_revision_date: {
41
+ xpaths: ['.//atom:updated'],
42
+ default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
43
+ multivalue: false,
44
+ format: Helpers::SolrFormat::DATE
45
+ },
46
+ dataset_url: {
47
+ xpaths: ['.//atom:link/@href'],
48
+ multivalue: false
49
+ },
50
+ spatial_coverages: {
51
+ xpaths: ['.//georss:box'],
52
+ multivalue: true,
53
+ format: Helpers::TdarFormat::SPATIAL_DISPLAY
54
+ },
55
+ spatial: {
56
+ xpaths: ['.//georss:box'],
57
+ multivalue: true,
58
+ format: Helpers::TdarFormat::SPATIAL_INDEX
59
+ },
60
+ spatial_area: {
61
+ xpaths: ['.//georss:box'],
62
+ multivalue: false,
63
+ reduce: Helpers::TdarFormat::MAX_SPATIAL_AREA,
64
+ format: Helpers::TdarFormat::SPATIAL_AREA
65
+ },
66
+ source: {
67
+ xpaths: [''],
68
+ default_values: ['ADE'],
69
+ multivalue: false
70
+ },
71
+ facet_data_center: {
72
+ xpaths: [''],
73
+ default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:TDAR][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:TDAR][:short_name]}"],
74
+ multivalue: false
75
+ },
76
+ facet_spatial_scope: {
77
+ xpaths: ['.//georss:box'],
78
+ multivalue: true,
79
+ format: Helpers::TdarFormat::FACET_SPATIAL_SCOPE
80
+ },
81
+ facet_temporal_duration: {
82
+ xpaths: [''],
83
+ default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
84
+ format: Helpers::IsoToSolrFormat::FACET_TEMPORAL_DURATION,
85
+ multivalue: true
86
+ }
87
+ }
88
+ end
89
+ end
@@ -0,0 +1,105 @@
1
+ require 'search_solr_tools'
2
+
3
+ module SearchSolrTools
4
+ module Selectors
5
+ # The hash contains keys that should map to the fields in the solr schema,
6
+ # the keys are called selectors and are in charge of selecting the nodes
7
+ # from the ISO document, applying the default value if none of the xpaths
8
+ # resolved to a value and formatting the field. xpaths and multivalue are
9
+ # required, default_value and format are optional
10
+ USGS = {
11
+ authoritative_id: {
12
+ xpaths: ['.//gmd:fileIdentifier/gco:CharacterString'],
13
+ multivalue: false
14
+ },
15
+ title: {
16
+ xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:title/gco:CharacterString'],
17
+ multivalue: false
18
+ },
19
+ summary: {
20
+ xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:abstract/gco:CharacterString'],
21
+ multivalue: false
22
+ },
23
+ data_centers: {
24
+ xpaths: [''],
25
+ default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:USGS][:long_name]],
26
+ multivalue: false
27
+ },
28
+ authors: {
29
+ xpaths: [".//gmd:contact/gmd:CI_ResponsibleParty[./gmd:role/gmd:CI_RoleCode[@codeListValue='originator']]/gmd:organisationName/gco:CharacterString"],
30
+ multivalue: true
31
+ },
32
+ keywords: {
33
+ xpaths: ['.//gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gco:CharacterString'],
34
+ multivalue: true
35
+ },
36
+ last_revision_date: {
37
+ xpaths: ['.//gmd:dateStamp/gco:DateTime'],
38
+ default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
39
+ multivalue: false,
40
+ format: Helpers::SolrFormat::DATE
41
+ },
42
+ dataset_url: {
43
+ xpaths: ['.//gmd:transferOptions/gmd:MD_DigitalTransferOptions/gmd:onLine/gmd:CI_OnlineResource[contains(./gmd:name/gco:CharacterString/text(),"Summary")]/gmd:linkage/gmd:URL'],
44
+ multivalue: false
45
+ },
46
+ spatial_coverages: {
47
+ xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
48
+ multivalue: true,
49
+ format: Helpers::IsoToSolrFormat::SPATIAL_DISPLAY
50
+ },
51
+ spatial: {
52
+ xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
53
+ multivalue: true,
54
+ format: Helpers::IsoToSolrFormat::SPATIAL_INDEX
55
+ },
56
+ spatial_area: {
57
+ xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
58
+ multivalue: false,
59
+ reduce: Helpers::IsoToSolrFormat::MAX_SPATIAL_AREA,
60
+ format: Helpers::IsoToSolrFormat::SPATIAL_AREA
61
+ },
62
+ temporal: {
63
+ xpaths: [".//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:date/gmd:CI_Date[./gmd:dateType/gmd:CI_DateTypeCode[@codeListValue='Time Period']]/gmd:date"],
64
+ multivalue: true,
65
+ format: Helpers::UsgsFormat::TEMPORAL_INDEX_STRING
66
+ },
67
+ temporal_coverages: {
68
+ xpaths: [".//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:date/gmd:CI_Date[./gmd:dateType/gmd:CI_DateTypeCode[@codeListValue='Time Period']]/gmd:date"],
69
+ multivalue: true,
70
+ format: Helpers::UsgsFormat::TEMPORAL_DISPLAY_STRING
71
+ },
72
+ temporal_duration: {
73
+ xpaths: [".//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:date/gmd:CI_Date[./gmd:dateType/gmd:CI_DateTypeCode[@codeListValue='Time Period']]/gmd:date"],
74
+ multivalue: false,
75
+ reduce: Helpers::SolrFormat::REDUCE_TEMPORAL_DURATION,
76
+ format: Helpers::UsgsFormat::TEMPORAL_DURATION
77
+ },
78
+ sensors: {
79
+ xpaths: [''],
80
+ multivalue: true
81
+ },
82
+ source: {
83
+ xpaths: [''],
84
+ default_values: ['ADE'],
85
+ multivalue: false
86
+ },
87
+ facet_data_center: {
88
+ xpaths: [''],
89
+ default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:USGS][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:USGS][:short_name]}"],
90
+ multivalue: false
91
+ },
92
+ facet_spatial_scope: {
93
+ xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
94
+ multivalue: true,
95
+ format: Helpers::IsoToSolrFormat::FACET_SPATIAL_SCOPE
96
+ },
97
+ facet_temporal_duration: {
98
+ xpaths: [".//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:date/gmd:CI_Date[./gmd:dateType/gmd:CI_DateTypeCode[@codeListValue='Time Period']]/gmd:date"],
99
+ default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
100
+ format: Helpers::UsgsFormat::FACET_TEMPORAL_DURATION,
101
+ multivalue: true
102
+ }
103
+ }
104
+ end
105
+ end