search_solr_tools 6.1.0 → 6.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/bin/search_solr_tools +1 -13
  4. data/lib/search_solr_tools/config/environments.yaml +0 -32
  5. data/lib/search_solr_tools/harvesters/base.rb +0 -1
  6. data/lib/search_solr_tools/helpers/solr_format.rb +0 -15
  7. data/lib/search_solr_tools/helpers/translate_spatial_coverage.rb +0 -1
  8. data/lib/search_solr_tools/version.rb +1 -1
  9. data/lib/search_solr_tools.rb +1 -2
  10. metadata +2 -44
  11. data/lib/search_solr_tools/harvesters/adc.rb +0 -49
  12. data/lib/search_solr_tools/harvesters/ade_auto_suggest.rb +0 -46
  13. data/lib/search_solr_tools/harvesters/bcodmo.rb +0 -64
  14. data/lib/search_solr_tools/harvesters/data_one.rb +0 -49
  15. data/lib/search_solr_tools/harvesters/echo.rb +0 -52
  16. data/lib/search_solr_tools/harvesters/eol.rb +0 -51
  17. data/lib/search_solr_tools/harvesters/gtnp.rb +0 -67
  18. data/lib/search_solr_tools/harvesters/ices.rb +0 -58
  19. data/lib/search_solr_tools/harvesters/ncdc_paleo.rb +0 -62
  20. data/lib/search_solr_tools/harvesters/nmi.rb +0 -34
  21. data/lib/search_solr_tools/harvesters/nodc.rb +0 -75
  22. data/lib/search_solr_tools/harvesters/oai.rb +0 -62
  23. data/lib/search_solr_tools/harvesters/pdc.rb +0 -40
  24. data/lib/search_solr_tools/harvesters/r2r.rb +0 -61
  25. data/lib/search_solr_tools/harvesters/rda.rb +0 -35
  26. data/lib/search_solr_tools/harvesters/tdar.rb +0 -71
  27. data/lib/search_solr_tools/harvesters/usgs.rb +0 -76
  28. data/lib/search_solr_tools/helpers/csw_iso_query_builder.rb +0 -29
  29. data/lib/search_solr_tools/helpers/data_one_format.rb +0 -74
  30. data/lib/search_solr_tools/helpers/iso_to_solr.rb +0 -97
  31. data/lib/search_solr_tools/helpers/iso_to_solr_format.rb +0 -197
  32. data/lib/search_solr_tools/helpers/ncdc_paleo_format.rb +0 -61
  33. data/lib/search_solr_tools/helpers/query_builder.rb +0 -13
  34. data/lib/search_solr_tools/helpers/r2r_format.rb +0 -25
  35. data/lib/search_solr_tools/helpers/selectors.rb +0 -22
  36. data/lib/search_solr_tools/helpers/tdar_format.rb +0 -70
  37. data/lib/search_solr_tools/helpers/usgs_format.rb +0 -50
  38. data/lib/search_solr_tools/selectors/adc.rb +0 -96
  39. data/lib/search_solr_tools/selectors/data_one.rb +0 -96
  40. data/lib/search_solr_tools/selectors/echo_iso.rb +0 -112
  41. data/lib/search_solr_tools/selectors/ices_iso.rb +0 -108
  42. data/lib/search_solr_tools/selectors/ncdc_paleo.rb +0 -90
  43. data/lib/search_solr_tools/selectors/nmi.rb +0 -107
  44. data/lib/search_solr_tools/selectors/nodc_iso.rb +0 -108
  45. data/lib/search_solr_tools/selectors/pdc_iso.rb +0 -109
  46. data/lib/search_solr_tools/selectors/r2r.rb +0 -115
  47. data/lib/search_solr_tools/selectors/rda.rb +0 -107
  48. data/lib/search_solr_tools/selectors/tdar_opensearch.rb +0 -91
  49. data/lib/search_solr_tools/selectors/usgs_iso.rb +0 -107
  50. data/lib/search_solr_tools/translators/bcodmo_json.rb +0 -89
  51. data/lib/search_solr_tools/translators/eol_to_solr.rb +0 -84
  52. data/lib/search_solr_tools/translators/gtnp_json.rb +0 -59
@@ -1,96 +0,0 @@
1
- require_relative '../helpers/data_one_format'
2
- require_relative '../helpers/solr_format'
3
-
4
- module SearchSolrTools
5
- module Selectors
6
- DATA_ONE = {
7
- authoritative_id: {
8
- xpaths: ['.//str[@name="id"]'],
9
- multivalue: false
10
- },
11
- title: {
12
- xpaths: ['.//str[@name="title"]'],
13
- multivalue: false
14
- },
15
- summary: {
16
- xpaths: ['.//str[@name="abstract"]'],
17
- multivalue: false
18
- },
19
- data_centers: {
20
- xpaths: [''],
21
- default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:DATA_ONE][:long_name]],
22
- multivalue: false
23
- },
24
- authors: {
25
- xpaths: ['.//str[@name="author"]'],
26
- multivalue: false
27
- },
28
- keywords: {
29
- xpaths: ['.//arr[@name="keywords"]/str'],
30
- multivalue: true
31
- },
32
- last_revision_date: {
33
- xpaths: ['.//date[@name="updateDate"]'],
34
- default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
35
- multivalue: false,
36
- format: Helpers::SolrFormat::DATE
37
- },
38
- dataset_url: {
39
- xpaths: ['.//str[@name="dataUrl"]'],
40
- default_values: [''],
41
- multivalue: false
42
- },
43
- spatial_coverages: {
44
- xpaths: ['.'],
45
- multivalue: false,
46
- format: Helpers::DataOneFormat.method(:spatial_display)
47
- },
48
- spatial: {
49
- xpaths: ['.'],
50
- multivalue: false,
51
- format: Helpers::DataOneFormat.method(:spatial_index)
52
- },
53
- spatial_area: {
54
- xpaths: ['.'],
55
- multivalue: false,
56
- format: Helpers::DataOneFormat.method(:spatial_area)
57
- },
58
- temporal_coverages: {
59
- xpaths: ['.'],
60
- multivalue: false,
61
- format: Helpers::DataOneFormat.method(:temporal_coverage)
62
- },
63
- temporal_duration: {
64
- xpaths: ['.'],
65
- multivalue: false,
66
- format: Helpers::DataOneFormat.method(:temporal_duration)
67
- },
68
- temporal: {
69
- xpaths: ['.'],
70
- multivalue: false,
71
- format: Helpers::DataOneFormat.method(:temporal_index_string)
72
- },
73
- source: {
74
- xpaths: [''],
75
- default_values: ['ADE'],
76
- multivalue: false
77
- },
78
- facet_data_center: {
79
- xpaths: [''],
80
- default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:DATA_ONE][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:DATA_ONE][:short_name]}"],
81
- multivalue: false
82
- },
83
- facet_spatial_scope: {
84
- xpaths: ['.'],
85
- multivalue: false,
86
- format: Helpers::DataOneFormat.method(:facet_spatial_scope)
87
- },
88
- facet_temporal_duration: {
89
- xpaths: ['.'],
90
- default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
91
- format: Helpers::DataOneFormat.method(:facet_temporal_duration),
92
- multivalue: false
93
- }
94
- }
95
- end
96
- end
@@ -1,112 +0,0 @@
1
- require_relative '../helpers/solr_format'
2
- require_relative '../helpers/iso_to_solr_format'
3
-
4
- module SearchSolrTools
5
- module Selectors
6
- # The hash contains keys that should map to the fields in the solr schema,
7
- # the keys are called selectors and are in charge of selecting the nodes
8
- # from the ISO document, applying the default value if none of the xpaths
9
- # resolved to a value and formatting the field. xpaths and multivalue are
10
- # required, default_value and format are optional
11
- ECHO = {
12
- authoritative_id: {
13
- xpaths: ['.//@echo_dataset_id'],
14
- multivalue: false
15
- },
16
- title: {
17
- xpaths: ['.//Collection/LongName'],
18
- multivalue: false
19
- },
20
- summary: {
21
- xpaths: ['.//Collection/Description'],
22
- multivalue: false
23
- },
24
- data_centers: {
25
- xpaths: [''],
26
- default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:ECHO][:long_name]],
27
- multivalue: false
28
- },
29
- authors: {
30
- xpaths: [''],
31
- multivalue: true
32
- },
33
- keywords: {
34
- xpaths: ['.//Collection/ScienceKeywords/ScienceKeyword'],
35
- multivalue: true,
36
- format: Helpers::IsoToSolrFormat::KEYWORDS
37
- },
38
- last_revision_date: {
39
- xpaths: ['.//Collection/LastUpdate'],
40
- default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
41
- multivalue: false,
42
- format: Helpers::SolrFormat::DATE
43
- },
44
- dataset_url: {
45
- xpaths: ['.//Collection/OnlineResources/OnlineResource[contains(./Type/text(),"static URL")]/URL',
46
- './/Collection/OnlineResources/OnlineResource[contains(./Type/text(), "VIEW RELATED INFORMATION")]/URL',
47
- './/Collection/OnlineAccessURLs/OnlineAccessURL/[contains(./URLDescription/text(), "Data Access")]/URL',
48
- './/Collection/OnlineResources/OnlineResource[contains(./Type/text(),"Guide Document for this product at NSIDC")]/URL',
49
- './/Collection/OnlineResources/OnlineResource[contains(./Type/text(),"DOI URL")]/URL',
50
- './/Collection/OnlineResources/OnlineResource[contains(./Type/text(),"ECSCollGuide")]/URL',
51
- './/Collection/OnlineResources/OnlineResource[contains(./Type/text(),"GET DATA : ON-LINE ARCHIVE")]/URL',
52
- './/Collection/OnlineResources/OnlineResource/URL',
53
- './/Collection/OnlineAccessURLs/OnlineAccessURL/URL'],
54
- default_values: ['https://earthdata.nasa.gov/echo'],
55
- multivalue: false
56
- },
57
- spatial_coverages: {
58
- xpaths: ['.//Collection/Spatial/HorizontalSpatialDomain/Geometry/BoundingRectangle'],
59
- multivalue: true,
60
- format: Helpers::IsoToSolrFormat::SPATIAL_DISPLAY
61
- },
62
- spatial: {
63
- xpaths: ['.//Collection/Spatial/HorizontalSpatialDomain/Geometry/BoundingRectangle'],
64
- multivalue: true,
65
- format: Helpers::IsoToSolrFormat::SPATIAL_INDEX
66
- },
67
- spatial_area: {
68
- xpaths: ['.//Collection/Spatial/HorizontalSpatialDomain/Geometry/BoundingRectangle'],
69
- multivalue: false,
70
- reduce: Helpers::IsoToSolrFormat::MAX_SPATIAL_AREA,
71
- format: Helpers::IsoToSolrFormat::SPATIAL_AREA
72
- },
73
- temporal_coverages: {
74
- xpaths: ['.//Collection/Temporal/RangeDateTime'],
75
- multivalue: true,
76
- format: Helpers::IsoToSolrFormat::TEMPORAL_DISPLAY_STRING_FORMATTED
77
- },
78
- temporal_duration: {
79
- xpaths: ['.//Collection/Temporal/RangeDateTime'],
80
- multivalue: false,
81
- reduce: Helpers::SolrFormat::REDUCE_TEMPORAL_DURATION,
82
- format: Helpers::IsoToSolrFormat::TEMPORAL_DURATION
83
- },
84
- temporal: {
85
- xpaths: ['.//Collection/Temporal/RangeDateTime'],
86
- multivalue: true,
87
- format: Helpers::IsoToSolrFormat::TEMPORAL_INDEX_STRING
88
- },
89
- source: {
90
- xpaths: [''],
91
- default_values: ['ADE'],
92
- multivalue: false
93
- },
94
- facet_data_center: {
95
- xpaths: [''],
96
- default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:ECHO][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:ECHO][:short_name]}"],
97
- multivalue: false
98
- },
99
- facet_spatial_scope: {
100
- xpaths: ['.//Collection/Spatial/HorizontalSpatialDomain/Geometry/BoundingRectangle'],
101
- multivalue: true,
102
- format: Helpers::IsoToSolrFormat::FACET_SPATIAL_SCOPE
103
- },
104
- facet_temporal_duration: {
105
- xpaths: ['.//Collection/Temporal/RangeDateTime'],
106
- default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
107
- format: Helpers::IsoToSolrFormat::FACET_TEMPORAL_DURATION,
108
- multivalue: true
109
- }
110
- }
111
- end
112
- end
@@ -1,108 +0,0 @@
1
- require_relative '../helpers/solr_format'
2
- require_relative '../helpers/iso_to_solr_format'
3
-
4
- module SearchSolrTools
5
- module Selectors
6
- # The hash contains keys that should map to the fields in the solr schema,
7
- # the keys are called selectors and are in charge of selecting the nodes
8
- # from the ISO document, applying the default value if none of the xpaths
9
- # resolved to a value and formatting the field. xpaths and multivalue are
10
- # required, default_value and format are optional
11
- ICES = {
12
- authoritative_id: {
13
- xpaths: ['.//gmd:fileIdentifier/gco:CharacterString'],
14
- multivalue: false
15
- },
16
- title: {
17
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:title/gco:CharacterString'],
18
- multivalue: false
19
- },
20
- summary: {
21
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:abstract/gco:CharacterString'],
22
- multivalue: false
23
- },
24
- data_centers: {
25
- xpaths: [''],
26
- default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:ICES][:long_name]],
27
- multivalue: false
28
- },
29
- authors: {
30
- xpaths: [".//gmd:CI_ResponsibleParty[./gmd:role/gmd:CI_RoleCode[@codeListValue='principalInvestigator']]/gmd:individualName/gco:CharacterString"],
31
- multivalue: true
32
- },
33
- keywords: {
34
- xpaths: ['.//gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gco:CharacterString',
35
- './/gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gmx:Anchor'],
36
- multivalue: true
37
- },
38
- last_revision_date: {
39
- xpaths: ['.//gmd:dateStamp/gco:Date', './/gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:date/gmd:CI_Date/gmd:date/gco:DateTime'],
40
- default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
41
- multivalue: false,
42
- format: Helpers::SolrFormat::DATE
43
- },
44
- dataset_url: {
45
- xpaths: ['.//gmd:fileIdentifier/gco:CharacterString'],
46
- multivalue: false,
47
- format: Helpers::IsoToSolrFormat::ICES_DATASET_URL
48
- },
49
- spatial_coverages: {
50
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
51
- multivalue: true,
52
- format: Helpers::IsoToSolrFormat::SPATIAL_DISPLAY
53
- },
54
- spatial: {
55
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
56
- multivalue: true,
57
- format: Helpers::IsoToSolrFormat::SPATIAL_INDEX
58
- },
59
- spatial_area: {
60
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
61
- multivalue: false,
62
- reduce: Helpers::IsoToSolrFormat::MAX_SPATIAL_AREA,
63
- format: Helpers::IsoToSolrFormat::SPATIAL_AREA
64
- },
65
- temporal_coverages: {
66
- xpaths: ['.//gmd:EX_TemporalExtent'],
67
- multivalue: false,
68
- format: Helpers::IsoToSolrFormat::TEMPORAL_DISPLAY_STRING_FORMATTED
69
- },
70
- temporal_duration: {
71
- xpaths: ['.//gmd:EX_TemporalExtent'],
72
- multivalue: false,
73
- reduce: Helpers::SolrFormat::REDUCE_TEMPORAL_DURATION,
74
- format: Helpers::IsoToSolrFormat::TEMPORAL_DURATION
75
- },
76
- temporal: {
77
- xpaths: ['.//gmd:EX_TemporalExtent'],
78
- multivalue: true,
79
- format: Helpers::IsoToSolrFormat::TEMPORAL_INDEX_STRING
80
- },
81
- sensors: {
82
- xpaths: ['.//gmi:acquisitionInformation/gmi:MI_AcquisitionInformation/gmi:instrument/gmi:MI_Instrument/gmi:citation/gmd:CI_Citation/gmd:title/gco:CharacterString'],
83
- multivalue: true
84
- },
85
- source: {
86
- xpaths: [''],
87
- default_values: ['ADE'],
88
- multivalue: false
89
- },
90
- facet_data_center: {
91
- xpaths: [''],
92
- default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:ICES][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:ICES][:short_name]}"],
93
- multivalue: false
94
- },
95
- facet_spatial_scope: {
96
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
97
- multivalue: true,
98
- format: Helpers::IsoToSolrFormat::FACET_SPATIAL_SCOPE
99
- },
100
- facet_temporal_duration: {
101
- xpaths: ['.//gmd:EX_TemporalExtent'],
102
- default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
103
- format: Helpers::IsoToSolrFormat::FACET_TEMPORAL_DURATION,
104
- multivalue: true
105
- }
106
- }
107
- end
108
- end
@@ -1,90 +0,0 @@
1
- require_relative '../helpers/solr_format'
2
- require_relative '../helpers/ncdc_paleo_format'
3
-
4
- module SearchSolrTools
5
- module Selectors
6
- NCDC_PALEO = {
7
- title: {
8
- xpaths: ['/rdf:RDF/rdf:Description/dc:title'],
9
- multivalue: false
10
- },
11
- summary: {
12
- xpaths: ['/rdf:RDF/rdf:Description/dc:description'],
13
- multivalue: false
14
- },
15
- data_centers: {
16
- xpaths: [''],
17
- default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:NCDC_PALEO][:long_name]],
18
- multivalue: false
19
- },
20
- authors: {
21
- xpaths: ['/rdf:RDF/rdf:Description/dc:creator'],
22
- multivalue: true,
23
- format: Helpers::NcdcPaleoFormat.method(:author)
24
- },
25
- keywords: {
26
- xpaths: ['/rdf:RDF/rdf:Description/dc:subject'],
27
- multivalue: true
28
- },
29
- last_revision_date: {
30
- xpaths: ['/rdf:RDF/rdf:Description/dc:date'],
31
- default_values: [''], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
32
- multivalue: false,
33
- format: Helpers::SolrFormat::DATE
34
- },
35
- spatial_coverages: {
36
- xpaths: ['/rdf:RDF/rdf:Description/ows:WGS84BoundingBox'],
37
- multivalue: true,
38
- format: Helpers::NcdcPaleoFormat.method(:spatial_display_str)
39
- },
40
- spatial: {
41
- xpaths: ['/rdf:RDF/rdf:Description/ows:WGS84BoundingBox'],
42
- multivalue: true,
43
- format: Helpers::NcdcPaleoFormat.method(:spatial_index_str)
44
- },
45
- spatial_area: {
46
- xpaths: ['/rdf:RDF/rdf:Description/ows:WGS84BoundingBox'],
47
- multivalue: false,
48
- reduce: Helpers::NcdcPaleoFormat.method(:get_max_spatial_area),
49
- format: Helpers::NcdcPaleoFormat.method(:spatial_area_str)
50
- },
51
- temporal: {
52
- xpaths: ['/rdf:RDF/rdf:Description/dc:coverage'],
53
- multivalue: true,
54
- format: Helpers::NcdcPaleoFormat.method(:temporal_index_str)
55
- },
56
- temporal_coverages: {
57
- xpaths: ['/rdf:RDF/rdf:Description/dc:coverage'],
58
- multivalue: true,
59
- format: Helpers::NcdcPaleoFormat.method(:temporal_display_str)
60
- },
61
- temporal_duration: {
62
- xpaths: ['/rdf:RDF/rdf:Description/dc:coverage'],
63
- multivalue: false,
64
- reduce: Helpers::SolrFormat::REDUCE_TEMPORAL_DURATION,
65
- format: Helpers::NcdcPaleoFormat.method(:get_temporal_duration)
66
- },
67
- source: {
68
- xpaths: [''],
69
- default_values: ['ADE'],
70
- multivalue: false
71
- },
72
- facet_data_center: {
73
- xpaths: [''],
74
- default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:NCDC_PALEO][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:NCDC_PALEO][:short_name]}"],
75
- multivalue: false
76
- },
77
- facet_spatial_scope: {
78
- xpaths: ['/rdf:RDF/rdf:Description/ows:WGS84BoundingBox'],
79
- multivalue: true,
80
- format: Helpers::NcdcPaleoFormat.method(:get_spatial_scope_facet)
81
- },
82
- facet_temporal_duration: {
83
- xpaths: ['/rdf:RDF/rdf:Description/dc:coverage'],
84
- default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
85
- format: Helpers::NcdcPaleoFormat.method(:get_temporal_duration_facet),
86
- multivalue: true
87
- }
88
- }
89
- end
90
- end
@@ -1,107 +0,0 @@
1
- require_relative '../helpers/solr_format'
2
- require_relative '../helpers/iso_to_solr_format'
3
-
4
- module SearchSolrTools
5
- module Selectors
6
- # The hash contains keys that should map to the fields in the solr schema,
7
- # the keys are called selectors and are in charge of selecting the nodes
8
- # from the ISO document, applying the default value if none of the xpaths
9
- # resolved to a value and formatting the field. xpaths and multivalue are
10
- # required, default_value and format are optional.
11
- NMI = {
12
- authoritative_id: {
13
- xpaths: ['.//oai:header/oai:identifier'],
14
- multivalue: false
15
- },
16
- title: {
17
- xpaths: ['.//dif:Entry_Title'],
18
- multivalue: false
19
- },
20
- summary: {
21
- xpaths: ['.//dif:Summary'],
22
- multivalue: false
23
- },
24
- data_centers: {
25
- xpaths: [''],
26
- default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:NMI][:long_name]],
27
- multivalue: false
28
- },
29
- authors: {
30
- xpaths: [''],
31
- multivalue: true
32
- },
33
- keywords: {
34
- xpaths: [
35
- './/dif:Parameters/dif:Category',
36
- './/dif:Parameters/dif:Topic',
37
- './/dif:Parameters/dif:Term',
38
- './/dif:Parameters/dif:Variable_Level_1'
39
- ].reverse,
40
- multivalue: true
41
- },
42
- last_revision_date: {
43
- xpaths: ['.//dif:Last_DIF_Revision_Date'],
44
- default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
45
- multivalue: false,
46
- format: Helpers::SolrFormat::DATE
47
- },
48
- dataset_url: {
49
- xpaths: ['.//dif:Related_URL/dif:URL'],
50
- multivalue: false
51
- },
52
- spatial_coverages: {
53
- xpaths: ['.//dif:Spatial_Coverage'],
54
- multivalue: true,
55
- format: Helpers::IsoToSolrFormat::SPATIAL_DISPLAY
56
- },
57
- spatial: {
58
- xpaths: ['.//dif:Spatial_Coverage'],
59
- multivalue: true,
60
- format: Helpers::IsoToSolrFormat::SPATIAL_INDEX
61
- },
62
- spatial_area: {
63
- xpaths: ['.//dif:Spatial_Coverage'],
64
- multivalue: false,
65
- reduce: Helpers::IsoToSolrFormat::MAX_SPATIAL_AREA,
66
- format: Helpers::IsoToSolrFormat::SPATIAL_AREA
67
- },
68
- temporal: {
69
- xpaths: ['.//dif:Temporal_Coverage'],
70
- multivalue: true,
71
- format: Helpers::IsoToSolrFormat::TEMPORAL_INDEX_STRING
72
- },
73
- temporal_coverages: {
74
- xpaths: ['.//dif:Temporal_Coverage'],
75
- multivalue: true,
76
- format: Helpers::IsoToSolrFormat::TEMPORAL_DISPLAY_STRING
77
- },
78
- temporal_duration: {
79
- xpaths: ['.//dif:Temporal_Coverage'],
80
- multivalue: false,
81
- reduce: Helpers::SolrFormat::REDUCE_TEMPORAL_DURATION,
82
- format: Helpers::IsoToSolrFormat::TEMPORAL_DURATION
83
- },
84
- source: {
85
- xpaths: [''],
86
- default_values: ['ADE'],
87
- multivalue: false
88
- },
89
- facet_data_center: {
90
- xpaths: [''],
91
- default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:NMI][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:NMI][:short_name]}"],
92
- multivalue: false
93
- },
94
- facet_spatial_scope: {
95
- xpaths: ['.//dif:Spatial_Coverage'],
96
- multivalue: true,
97
- format: Helpers::IsoToSolrFormat::FACET_SPATIAL_SCOPE
98
- },
99
- facet_temporal_duration: {
100
- xpaths: ['.//dif:Temporal_Coverage'],
101
- default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
102
- format: Helpers::IsoToSolrFormat::FACET_TEMPORAL_DURATION,
103
- multivalue: true
104
- }
105
- }
106
- end
107
- end
@@ -1,108 +0,0 @@
1
- require_relative '../helpers/solr_format'
2
- require_relative '../helpers/iso_to_solr_format'
3
-
4
- module SearchSolrTools
5
- module Selectors
6
- # The hash contains keys that should map to the fields in the solr schema,
7
- # the keys are called selectors and are in charge of selecting the nodes
8
- # from the ISO document, applying the default value if none of the xpaths
9
- # resolved to a value and formatting the field. xpaths and multivalue are
10
- # required, default_value and format are optional
11
- NODC = {
12
- authoritative_id: {
13
- xpaths: ['.//gmd:fileIdentifier/gco:CharacterString'],
14
- multivalue: false
15
- },
16
- title: {
17
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:title/gco:CharacterString'],
18
- multivalue: false
19
- },
20
- summary: {
21
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:abstract/gco:CharacterString'],
22
- multivalue: false
23
- },
24
- data_centers: {
25
- xpaths: [''],
26
- default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:NODC][:long_name]],
27
- multivalue: false
28
- },
29
- authors: {
30
- xpaths: [".//gmd:CI_ResponsibleParty[./gmd:role/gmd:CI_RoleCode[@codeListValue='principalInvestigator']]/gmd:individualName/gco:CharacterString"],
31
- multivalue: true
32
- },
33
- keywords: {
34
- xpaths: ['.//gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gco:CharacterString',
35
- './/gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gmx:Anchor'],
36
- multivalue: true
37
- },
38
- last_revision_date: {
39
- xpaths: ['.//gmd:dateStamp/gco:Date'],
40
- default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
41
- multivalue: false,
42
- format: Helpers::SolrFormat::DATE
43
- },
44
- dataset_url: {
45
- xpaths: ['.//gmd:MD_DigitalTransferOptions/gmd:onLine/gmd:CI_OnlineResource[contains(./gmd:protocol/gco:CharacterString/text(),"ftp")]/gmd:linkage/gmd:URL',
46
- './/gmd:MD_DigitalTransferOptions/gmd:onLine/gmd:CI_OnlineResource[contains(./gmd:protocol/gco:CharacterString/text(),"FTP")]/gmd:linkage/gmd:URL'],
47
- multivalue: false
48
- },
49
- spatial_coverages: {
50
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
51
- multivalue: true,
52
- format: Helpers::IsoToSolrFormat::SPATIAL_DISPLAY
53
- },
54
- spatial: {
55
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
56
- multivalue: true,
57
- format: Helpers::IsoToSolrFormat::SPATIAL_INDEX
58
- },
59
- spatial_area: {
60
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
61
- multivalue: false,
62
- reduce: Helpers::IsoToSolrFormat::MAX_SPATIAL_AREA,
63
- format: Helpers::IsoToSolrFormat::SPATIAL_AREA
64
- },
65
- temporal_coverages: {
66
- xpaths: ['.//gmd:EX_TemporalExtent'],
67
- multivalue: true,
68
- format: Helpers::IsoToSolrFormat::TEMPORAL_DISPLAY_STRING_FORMATTED
69
- },
70
- temporal_duration: {
71
- xpaths: ['.//gmd:EX_TemporalExtent'],
72
- multivalue: false,
73
- reduce: Helpers::SolrFormat::REDUCE_TEMPORAL_DURATION,
74
- format: Helpers::IsoToSolrFormat::TEMPORAL_DURATION
75
- },
76
- temporal: {
77
- xpaths: ['.//gmd:EX_TemporalExtent'],
78
- multivalue: true,
79
- format: Helpers::IsoToSolrFormat::TEMPORAL_INDEX_STRING
80
- },
81
- sensors: {
82
- xpaths: ['.//gmi:acquisitionInformation/gmi:MI_AcquisitionInformation/gmi:instrument/gmi:MI_Instrument/gmi:citation/gmd:CI_Citation/gmd:title/gco:CharacterString'],
83
- multivalue: true
84
- },
85
- source: {
86
- xpaths: [''],
87
- default_values: ['ADE'],
88
- multivalue: false
89
- },
90
- facet_data_center: {
91
- xpaths: [''],
92
- default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:NODC][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:NODC][:short_name]}"],
93
- multivalue: false
94
- },
95
- facet_spatial_scope: {
96
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
97
- multivalue: true,
98
- format: Helpers::IsoToSolrFormat::FACET_SPATIAL_SCOPE
99
- },
100
- facet_temporal_duration: {
101
- xpaths: ['.//gmd:EX_TemporalExtent'],
102
- default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
103
- format: Helpers::IsoToSolrFormat::FACET_TEMPORAL_DURATION,
104
- multivalue: true
105
- }
106
- }
107
- end
108
- end