search_solr_tools 6.1.0 → 6.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +11 -2
  3. data/bin/search_solr_tools +5 -17
  4. data/lib/search_solr_tools/config/environments.rb +3 -1
  5. data/lib/search_solr_tools/config/environments.yaml +0 -32
  6. data/lib/search_solr_tools/errors/harvest_error.rb +44 -31
  7. data/lib/search_solr_tools/harvesters/auto_suggest.rb +5 -3
  8. data/lib/search_solr_tools/harvesters/base.rb +21 -20
  9. data/lib/search_solr_tools/harvesters/nsidc_auto_suggest.rb +7 -5
  10. data/lib/search_solr_tools/harvesters/nsidc_json.rb +9 -8
  11. data/lib/search_solr_tools/helpers/bounding_box_util.rb +8 -8
  12. data/lib/search_solr_tools/helpers/facet_configuration.rb +3 -1
  13. data/lib/search_solr_tools/helpers/harvest_status.rb +10 -8
  14. data/lib/search_solr_tools/helpers/iso_namespaces.rb +3 -1
  15. data/lib/search_solr_tools/helpers/solr_format.rb +25 -45
  16. data/lib/search_solr_tools/helpers/translate_spatial_coverage.rb +13 -10
  17. data/lib/search_solr_tools/helpers/translate_temporal_coverage.rb +2 -0
  18. data/lib/search_solr_tools/translators/nsidc_json.rb +48 -44
  19. data/lib/search_solr_tools/version.rb +3 -1
  20. data/lib/search_solr_tools.rb +3 -2
  21. metadata +3 -45
  22. data/lib/search_solr_tools/harvesters/adc.rb +0 -49
  23. data/lib/search_solr_tools/harvesters/ade_auto_suggest.rb +0 -46
  24. data/lib/search_solr_tools/harvesters/bcodmo.rb +0 -64
  25. data/lib/search_solr_tools/harvesters/data_one.rb +0 -49
  26. data/lib/search_solr_tools/harvesters/echo.rb +0 -52
  27. data/lib/search_solr_tools/harvesters/eol.rb +0 -51
  28. data/lib/search_solr_tools/harvesters/gtnp.rb +0 -67
  29. data/lib/search_solr_tools/harvesters/ices.rb +0 -58
  30. data/lib/search_solr_tools/harvesters/ncdc_paleo.rb +0 -62
  31. data/lib/search_solr_tools/harvesters/nmi.rb +0 -34
  32. data/lib/search_solr_tools/harvesters/nodc.rb +0 -75
  33. data/lib/search_solr_tools/harvesters/oai.rb +0 -62
  34. data/lib/search_solr_tools/harvesters/pdc.rb +0 -40
  35. data/lib/search_solr_tools/harvesters/r2r.rb +0 -61
  36. data/lib/search_solr_tools/harvesters/rda.rb +0 -35
  37. data/lib/search_solr_tools/harvesters/tdar.rb +0 -71
  38. data/lib/search_solr_tools/harvesters/usgs.rb +0 -76
  39. data/lib/search_solr_tools/helpers/csw_iso_query_builder.rb +0 -29
  40. data/lib/search_solr_tools/helpers/data_one_format.rb +0 -74
  41. data/lib/search_solr_tools/helpers/iso_to_solr.rb +0 -97
  42. data/lib/search_solr_tools/helpers/iso_to_solr_format.rb +0 -197
  43. data/lib/search_solr_tools/helpers/ncdc_paleo_format.rb +0 -61
  44. data/lib/search_solr_tools/helpers/query_builder.rb +0 -13
  45. data/lib/search_solr_tools/helpers/r2r_format.rb +0 -25
  46. data/lib/search_solr_tools/helpers/selectors.rb +0 -22
  47. data/lib/search_solr_tools/helpers/tdar_format.rb +0 -70
  48. data/lib/search_solr_tools/helpers/usgs_format.rb +0 -50
  49. data/lib/search_solr_tools/selectors/adc.rb +0 -96
  50. data/lib/search_solr_tools/selectors/data_one.rb +0 -96
  51. data/lib/search_solr_tools/selectors/echo_iso.rb +0 -112
  52. data/lib/search_solr_tools/selectors/ices_iso.rb +0 -108
  53. data/lib/search_solr_tools/selectors/ncdc_paleo.rb +0 -90
  54. data/lib/search_solr_tools/selectors/nmi.rb +0 -107
  55. data/lib/search_solr_tools/selectors/nodc_iso.rb +0 -108
  56. data/lib/search_solr_tools/selectors/pdc_iso.rb +0 -109
  57. data/lib/search_solr_tools/selectors/r2r.rb +0 -115
  58. data/lib/search_solr_tools/selectors/rda.rb +0 -107
  59. data/lib/search_solr_tools/selectors/tdar_opensearch.rb +0 -91
  60. data/lib/search_solr_tools/selectors/usgs_iso.rb +0 -107
  61. data/lib/search_solr_tools/translators/bcodmo_json.rb +0 -89
  62. data/lib/search_solr_tools/translators/eol_to_solr.rb +0 -84
  63. data/lib/search_solr_tools/translators/gtnp_json.rb +0 -59
@@ -1,22 +0,0 @@
1
- Dir[File.join(__dir__, '..', 'selectors', '*.rb')].each { |file| require file }
2
-
3
- module SearchSolrTools
4
- module Helpers
5
- # This hash grabs all the selector files inside the selectors directory,
6
- # to add a new source we need to create a selector file and add it to this hash.
7
- SELECTORS = {
8
- adc: Selectors::ADC,
9
- data_one: Selectors::DATA_ONE,
10
- echo: Selectors::ECHO,
11
- ices: Selectors::ICES,
12
- nmi: Selectors::NMI,
13
- ncdc_paleo: Selectors::NCDC_PALEO,
14
- nodc: Selectors::NODC,
15
- pdc: Selectors::PDC,
16
- r2r: Selectors::R2R,
17
- rda: Selectors::RDA,
18
- tdar: Selectors::TDAR,
19
- usgs: Selectors::USGS
20
- }
21
- end
22
- end
@@ -1,70 +0,0 @@
1
- require_relative 'iso_namespaces'
2
- require_relative 'iso_to_solr_format'
3
- require_relative 'solr_format'
4
-
5
- module SearchSolrTools
6
- module Helpers
7
- # Special formatter for dealing with temporal metadata issues in the TDAR feed
8
- class TdarFormat < IsoToSolrFormat
9
- SPATIAL_DISPLAY = proc { |node| TdarFormat.spatial_display_str(node) }
10
- SPATIAL_INDEX = proc { |node| TdarFormat.spatial_index_str(node) }
11
- FACET_SPATIAL_SCOPE = proc { |node| TdarFormat.get_spatial_scope_facet(node) }
12
-
13
- TEMPORAL_INDEX_STRING = proc { |node| TdarFormat.temporal_index_str(node) }
14
- TEMPORAL_DISPLAY_STRING = proc { |node| TdarFormat.temporal_display_str(node) }
15
- TEMPORAL_DISPLAY_STRING_FORMATTED = proc { |node| TdarFormat.temporal_display_str(node, true) }
16
- TEMPORAL_DURATION = proc { |node| TdarFormat.get_temporal_duration(node) }
17
- FACET_TEMPORAL_DURATION = proc { |node| TdarFormat.get_temporal_duration_facet(node) }
18
-
19
- def self.get_spatial_scope_facet(node)
20
- box = bounding_box(node)
21
- SolrFormat.get_spatial_scope_facet_with_bounding_box(box)
22
- end
23
-
24
- def self.date_range(temporal_node, formatted = false)
25
- xpath = '.'
26
- namespaces = IsoNamespaces.namespaces(temporal_node)
27
-
28
- temporal_node_count = temporal_node.xpath(xpath, namespaces).size
29
- date_str = temporal_node.at_xpath(xpath, namespaces).text
30
-
31
- super if temporal_node_count != 1
32
-
33
- case date_str
34
- when /^[0-9]{4}$/
35
- year_to_range(date_str)
36
- when /^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$/
37
- single_date_to_range(date_str)
38
- else
39
- super
40
- end
41
- end
42
-
43
- def self.single_date_to_range(date)
44
- {
45
- start: date,
46
- end: date
47
- }
48
- end
49
-
50
- def self.year_to_range(year)
51
- {
52
- start: "#{year}-01-01",
53
- end: "#{year}-12-31"
54
- }
55
- end
56
-
57
- # Bounding box is defined by two coordinates to create a point.
58
- # Create a bounding box from this point.
59
- def self.bounding_box(node)
60
- point = node.text.split(' ')
61
- {
62
- west: point[1],
63
- south: point[0],
64
- east: point[3],
65
- north: point[2]
66
- }
67
- end
68
- end
69
- end
70
- end
@@ -1,50 +0,0 @@
1
- require_relative 'iso_namespaces'
2
- require_relative 'iso_to_solr_format'
3
-
4
- module SearchSolrTools
5
- module Helpers
6
- # Special formatter for dealing with temporal metadata issues in the USGS feed
7
- class UsgsFormat < IsoToSolrFormat
8
- TEMPORAL_INDEX_STRING = proc { |node| UsgsFormat.temporal_index_str(node) }
9
- TEMPORAL_DISPLAY_STRING = proc { |node| UsgsFormat.temporal_display_str(node) }
10
- TEMPORAL_DURATION = proc { |node| UsgsFormat.get_temporal_duration(node) }
11
- FACET_TEMPORAL_DURATION = proc { |node| UsgsFormat.get_temporal_duration_facet(node) }
12
-
13
- # for USGS, a single date entry (i.e., missing either start or end date, and
14
- # the value that is present is not clearly labeled) means the whole year if
15
- # just a year is given, or just a single day if just a single day is given
16
- def self.date_range(temporal_node, formatted = false)
17
- xpath = './/gco:Date'
18
- namespaces = IsoNamespaces.namespaces(temporal_node)
19
-
20
- temporal_node_count = temporal_node.xpath(xpath, namespaces).size
21
- date_str = temporal_node.at_xpath(xpath, namespaces).text
22
-
23
- super if temporal_node_count != 1
24
-
25
- case date_str
26
- when /^[0-9]{4}$/
27
- year_to_range(date_str)
28
- when /^[0-9]{4}-[0-9]{2}-[0-9]{2}$/
29
- single_date_to_range(date_str)
30
- else
31
- super
32
- end
33
- end
34
-
35
- def self.single_date_to_range(date)
36
- {
37
- start: date,
38
- end: date
39
- }
40
- end
41
-
42
- def self.year_to_range(year)
43
- {
44
- start: "#{year}-01-01",
45
- end: "#{year}-12-31"
46
- }
47
- end
48
- end
49
- end
50
- end
@@ -1,96 +0,0 @@
1
- require_relative '../helpers/solr_format'
2
- require_relative '../helpers/data_one_format'
3
-
4
- module SearchSolrTools
5
- module Selectors
6
- ADC = {
7
- authoritative_id: {
8
- xpaths: ['.//str[@name="id"]'],
9
- multivalue: false
10
- },
11
- title: {
12
- xpaths: ['.//str[@name="title"]'],
13
- multivalue: false
14
- },
15
- summary: {
16
- xpaths: ['.//str[@name="abstract"]'],
17
- multivalue: false
18
- },
19
- data_centers: {
20
- xpaths: [''],
21
- default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:ADC][:long_name]],
22
- multivalue: false
23
- },
24
- authors: {
25
- xpaths: ['.//str[@name="author"]'],
26
- multivalue: false
27
- },
28
- keywords: {
29
- xpaths: ['.//arr[@name="keywords"]/str'],
30
- multivalue: true
31
- },
32
- last_revision_date: {
33
- xpaths: ['.//date[@name="updateDate"]'],
34
- default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
35
- multivalue: false,
36
- format: Helpers::SolrFormat::DATE
37
- },
38
- dataset_url: {
39
- xpaths: ['.//str[@name="dataUrl"]'],
40
- default_values: [''],
41
- multivalue: false
42
- },
43
- spatial_coverages: {
44
- xpaths: ['.'],
45
- multivalue: false,
46
- format: Helpers::DataOneFormat.method(:spatial_display)
47
- },
48
- spatial: {
49
- xpaths: ['.'],
50
- multivalue: false,
51
- format: Helpers::DataOneFormat.method(:spatial_index)
52
- },
53
- spatial_area: {
54
- xpaths: ['.'],
55
- multivalue: false,
56
- format: Helpers::DataOneFormat.method(:spatial_area)
57
- },
58
- temporal_coverages: {
59
- xpaths: ['.'],
60
- multivalue: false,
61
- format: Helpers::DataOneFormat.method(:temporal_coverage)
62
- },
63
- temporal_duration: {
64
- xpaths: ['.'],
65
- multivalue: false,
66
- format: Helpers::DataOneFormat.method(:temporal_duration)
67
- },
68
- temporal: {
69
- xpaths: ['.'],
70
- multivalue: false,
71
- format: Helpers::DataOneFormat.method(:temporal_index_string)
72
- },
73
- source: {
74
- xpaths: [''],
75
- default_values: ['ADE'],
76
- multivalue: false
77
- },
78
- facet_data_center: {
79
- xpaths: [''],
80
- default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:ADC][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:ADC][:short_name]}"],
81
- multivalue: false
82
- },
83
- facet_spatial_scope: {
84
- xpaths: ['.'],
85
- multivalue: false,
86
- format: Helpers::DataOneFormat.method(:facet_spatial_scope)
87
- },
88
- facet_temporal_duration: {
89
- xpaths: ['.'],
90
- default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
91
- format: Helpers::DataOneFormat.method(:facet_temporal_duration),
92
- multivalue: false
93
- }
94
- }
95
- end
96
- end
@@ -1,96 +0,0 @@
1
- require_relative '../helpers/data_one_format'
2
- require_relative '../helpers/solr_format'
3
-
4
- module SearchSolrTools
5
- module Selectors
6
- DATA_ONE = {
7
- authoritative_id: {
8
- xpaths: ['.//str[@name="id"]'],
9
- multivalue: false
10
- },
11
- title: {
12
- xpaths: ['.//str[@name="title"]'],
13
- multivalue: false
14
- },
15
- summary: {
16
- xpaths: ['.//str[@name="abstract"]'],
17
- multivalue: false
18
- },
19
- data_centers: {
20
- xpaths: [''],
21
- default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:DATA_ONE][:long_name]],
22
- multivalue: false
23
- },
24
- authors: {
25
- xpaths: ['.//str[@name="author"]'],
26
- multivalue: false
27
- },
28
- keywords: {
29
- xpaths: ['.//arr[@name="keywords"]/str'],
30
- multivalue: true
31
- },
32
- last_revision_date: {
33
- xpaths: ['.//date[@name="updateDate"]'],
34
- default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
35
- multivalue: false,
36
- format: Helpers::SolrFormat::DATE
37
- },
38
- dataset_url: {
39
- xpaths: ['.//str[@name="dataUrl"]'],
40
- default_values: [''],
41
- multivalue: false
42
- },
43
- spatial_coverages: {
44
- xpaths: ['.'],
45
- multivalue: false,
46
- format: Helpers::DataOneFormat.method(:spatial_display)
47
- },
48
- spatial: {
49
- xpaths: ['.'],
50
- multivalue: false,
51
- format: Helpers::DataOneFormat.method(:spatial_index)
52
- },
53
- spatial_area: {
54
- xpaths: ['.'],
55
- multivalue: false,
56
- format: Helpers::DataOneFormat.method(:spatial_area)
57
- },
58
- temporal_coverages: {
59
- xpaths: ['.'],
60
- multivalue: false,
61
- format: Helpers::DataOneFormat.method(:temporal_coverage)
62
- },
63
- temporal_duration: {
64
- xpaths: ['.'],
65
- multivalue: false,
66
- format: Helpers::DataOneFormat.method(:temporal_duration)
67
- },
68
- temporal: {
69
- xpaths: ['.'],
70
- multivalue: false,
71
- format: Helpers::DataOneFormat.method(:temporal_index_string)
72
- },
73
- source: {
74
- xpaths: [''],
75
- default_values: ['ADE'],
76
- multivalue: false
77
- },
78
- facet_data_center: {
79
- xpaths: [''],
80
- default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:DATA_ONE][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:DATA_ONE][:short_name]}"],
81
- multivalue: false
82
- },
83
- facet_spatial_scope: {
84
- xpaths: ['.'],
85
- multivalue: false,
86
- format: Helpers::DataOneFormat.method(:facet_spatial_scope)
87
- },
88
- facet_temporal_duration: {
89
- xpaths: ['.'],
90
- default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
91
- format: Helpers::DataOneFormat.method(:facet_temporal_duration),
92
- multivalue: false
93
- }
94
- }
95
- end
96
- end
@@ -1,112 +0,0 @@
1
- require_relative '../helpers/solr_format'
2
- require_relative '../helpers/iso_to_solr_format'
3
-
4
- module SearchSolrTools
5
- module Selectors
6
- # The hash contains keys that should map to the fields in the solr schema,
7
- # the keys are called selectors and are in charge of selecting the nodes
8
- # from the ISO document, applying the default value if none of the xpaths
9
- # resolved to a value and formatting the field. xpaths and multivalue are
10
- # required, default_value and format are optional
11
- ECHO = {
12
- authoritative_id: {
13
- xpaths: ['.//@echo_dataset_id'],
14
- multivalue: false
15
- },
16
- title: {
17
- xpaths: ['.//Collection/LongName'],
18
- multivalue: false
19
- },
20
- summary: {
21
- xpaths: ['.//Collection/Description'],
22
- multivalue: false
23
- },
24
- data_centers: {
25
- xpaths: [''],
26
- default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:ECHO][:long_name]],
27
- multivalue: false
28
- },
29
- authors: {
30
- xpaths: [''],
31
- multivalue: true
32
- },
33
- keywords: {
34
- xpaths: ['.//Collection/ScienceKeywords/ScienceKeyword'],
35
- multivalue: true,
36
- format: Helpers::IsoToSolrFormat::KEYWORDS
37
- },
38
- last_revision_date: {
39
- xpaths: ['.//Collection/LastUpdate'],
40
- default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
41
- multivalue: false,
42
- format: Helpers::SolrFormat::DATE
43
- },
44
- dataset_url: {
45
- xpaths: ['.//Collection/OnlineResources/OnlineResource[contains(./Type/text(),"static URL")]/URL',
46
- './/Collection/OnlineResources/OnlineResource[contains(./Type/text(), "VIEW RELATED INFORMATION")]/URL',
47
- './/Collection/OnlineAccessURLs/OnlineAccessURL/[contains(./URLDescription/text(), "Data Access")]/URL',
48
- './/Collection/OnlineResources/OnlineResource[contains(./Type/text(),"Guide Document for this product at NSIDC")]/URL',
49
- './/Collection/OnlineResources/OnlineResource[contains(./Type/text(),"DOI URL")]/URL',
50
- './/Collection/OnlineResources/OnlineResource[contains(./Type/text(),"ECSCollGuide")]/URL',
51
- './/Collection/OnlineResources/OnlineResource[contains(./Type/text(),"GET DATA : ON-LINE ARCHIVE")]/URL',
52
- './/Collection/OnlineResources/OnlineResource/URL',
53
- './/Collection/OnlineAccessURLs/OnlineAccessURL/URL'],
54
- default_values: ['https://earthdata.nasa.gov/echo'],
55
- multivalue: false
56
- },
57
- spatial_coverages: {
58
- xpaths: ['.//Collection/Spatial/HorizontalSpatialDomain/Geometry/BoundingRectangle'],
59
- multivalue: true,
60
- format: Helpers::IsoToSolrFormat::SPATIAL_DISPLAY
61
- },
62
- spatial: {
63
- xpaths: ['.//Collection/Spatial/HorizontalSpatialDomain/Geometry/BoundingRectangle'],
64
- multivalue: true,
65
- format: Helpers::IsoToSolrFormat::SPATIAL_INDEX
66
- },
67
- spatial_area: {
68
- xpaths: ['.//Collection/Spatial/HorizontalSpatialDomain/Geometry/BoundingRectangle'],
69
- multivalue: false,
70
- reduce: Helpers::IsoToSolrFormat::MAX_SPATIAL_AREA,
71
- format: Helpers::IsoToSolrFormat::SPATIAL_AREA
72
- },
73
- temporal_coverages: {
74
- xpaths: ['.//Collection/Temporal/RangeDateTime'],
75
- multivalue: true,
76
- format: Helpers::IsoToSolrFormat::TEMPORAL_DISPLAY_STRING_FORMATTED
77
- },
78
- temporal_duration: {
79
- xpaths: ['.//Collection/Temporal/RangeDateTime'],
80
- multivalue: false,
81
- reduce: Helpers::SolrFormat::REDUCE_TEMPORAL_DURATION,
82
- format: Helpers::IsoToSolrFormat::TEMPORAL_DURATION
83
- },
84
- temporal: {
85
- xpaths: ['.//Collection/Temporal/RangeDateTime'],
86
- multivalue: true,
87
- format: Helpers::IsoToSolrFormat::TEMPORAL_INDEX_STRING
88
- },
89
- source: {
90
- xpaths: [''],
91
- default_values: ['ADE'],
92
- multivalue: false
93
- },
94
- facet_data_center: {
95
- xpaths: [''],
96
- default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:ECHO][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:ECHO][:short_name]}"],
97
- multivalue: false
98
- },
99
- facet_spatial_scope: {
100
- xpaths: ['.//Collection/Spatial/HorizontalSpatialDomain/Geometry/BoundingRectangle'],
101
- multivalue: true,
102
- format: Helpers::IsoToSolrFormat::FACET_SPATIAL_SCOPE
103
- },
104
- facet_temporal_duration: {
105
- xpaths: ['.//Collection/Temporal/RangeDateTime'],
106
- default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
107
- format: Helpers::IsoToSolrFormat::FACET_TEMPORAL_DURATION,
108
- multivalue: true
109
- }
110
- }
111
- end
112
- end
@@ -1,108 +0,0 @@
1
- require_relative '../helpers/solr_format'
2
- require_relative '../helpers/iso_to_solr_format'
3
-
4
- module SearchSolrTools
5
- module Selectors
6
- # The hash contains keys that should map to the fields in the solr schema,
7
- # the keys are called selectors and are in charge of selecting the nodes
8
- # from the ISO document, applying the default value if none of the xpaths
9
- # resolved to a value and formatting the field. xpaths and multivalue are
10
- # required, default_value and format are optional
11
- ICES = {
12
- authoritative_id: {
13
- xpaths: ['.//gmd:fileIdentifier/gco:CharacterString'],
14
- multivalue: false
15
- },
16
- title: {
17
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:title/gco:CharacterString'],
18
- multivalue: false
19
- },
20
- summary: {
21
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:abstract/gco:CharacterString'],
22
- multivalue: false
23
- },
24
- data_centers: {
25
- xpaths: [''],
26
- default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:ICES][:long_name]],
27
- multivalue: false
28
- },
29
- authors: {
30
- xpaths: [".//gmd:CI_ResponsibleParty[./gmd:role/gmd:CI_RoleCode[@codeListValue='principalInvestigator']]/gmd:individualName/gco:CharacterString"],
31
- multivalue: true
32
- },
33
- keywords: {
34
- xpaths: ['.//gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gco:CharacterString',
35
- './/gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gmx:Anchor'],
36
- multivalue: true
37
- },
38
- last_revision_date: {
39
- xpaths: ['.//gmd:dateStamp/gco:Date', './/gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:date/gmd:CI_Date/gmd:date/gco:DateTime'],
40
- default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
41
- multivalue: false,
42
- format: Helpers::SolrFormat::DATE
43
- },
44
- dataset_url: {
45
- xpaths: ['.//gmd:fileIdentifier/gco:CharacterString'],
46
- multivalue: false,
47
- format: Helpers::IsoToSolrFormat::ICES_DATASET_URL
48
- },
49
- spatial_coverages: {
50
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
51
- multivalue: true,
52
- format: Helpers::IsoToSolrFormat::SPATIAL_DISPLAY
53
- },
54
- spatial: {
55
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
56
- multivalue: true,
57
- format: Helpers::IsoToSolrFormat::SPATIAL_INDEX
58
- },
59
- spatial_area: {
60
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
61
- multivalue: false,
62
- reduce: Helpers::IsoToSolrFormat::MAX_SPATIAL_AREA,
63
- format: Helpers::IsoToSolrFormat::SPATIAL_AREA
64
- },
65
- temporal_coverages: {
66
- xpaths: ['.//gmd:EX_TemporalExtent'],
67
- multivalue: false,
68
- format: Helpers::IsoToSolrFormat::TEMPORAL_DISPLAY_STRING_FORMATTED
69
- },
70
- temporal_duration: {
71
- xpaths: ['.//gmd:EX_TemporalExtent'],
72
- multivalue: false,
73
- reduce: Helpers::SolrFormat::REDUCE_TEMPORAL_DURATION,
74
- format: Helpers::IsoToSolrFormat::TEMPORAL_DURATION
75
- },
76
- temporal: {
77
- xpaths: ['.//gmd:EX_TemporalExtent'],
78
- multivalue: true,
79
- format: Helpers::IsoToSolrFormat::TEMPORAL_INDEX_STRING
80
- },
81
- sensors: {
82
- xpaths: ['.//gmi:acquisitionInformation/gmi:MI_AcquisitionInformation/gmi:instrument/gmi:MI_Instrument/gmi:citation/gmd:CI_Citation/gmd:title/gco:CharacterString'],
83
- multivalue: true
84
- },
85
- source: {
86
- xpaths: [''],
87
- default_values: ['ADE'],
88
- multivalue: false
89
- },
90
- facet_data_center: {
91
- xpaths: [''],
92
- default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:ICES][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:ICES][:short_name]}"],
93
- multivalue: false
94
- },
95
- facet_spatial_scope: {
96
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
97
- multivalue: true,
98
- format: Helpers::IsoToSolrFormat::FACET_SPATIAL_SCOPE
99
- },
100
- facet_temporal_duration: {
101
- xpaths: ['.//gmd:EX_TemporalExtent'],
102
- default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
103
- format: Helpers::IsoToSolrFormat::FACET_TEMPORAL_DURATION,
104
- multivalue: true
105
- }
106
- }
107
- end
108
- end
@@ -1,90 +0,0 @@
1
- require_relative '../helpers/solr_format'
2
- require_relative '../helpers/ncdc_paleo_format'
3
-
4
- module SearchSolrTools
5
- module Selectors
6
- NCDC_PALEO = {
7
- title: {
8
- xpaths: ['/rdf:RDF/rdf:Description/dc:title'],
9
- multivalue: false
10
- },
11
- summary: {
12
- xpaths: ['/rdf:RDF/rdf:Description/dc:description'],
13
- multivalue: false
14
- },
15
- data_centers: {
16
- xpaths: [''],
17
- default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:NCDC_PALEO][:long_name]],
18
- multivalue: false
19
- },
20
- authors: {
21
- xpaths: ['/rdf:RDF/rdf:Description/dc:creator'],
22
- multivalue: true,
23
- format: Helpers::NcdcPaleoFormat.method(:author)
24
- },
25
- keywords: {
26
- xpaths: ['/rdf:RDF/rdf:Description/dc:subject'],
27
- multivalue: true
28
- },
29
- last_revision_date: {
30
- xpaths: ['/rdf:RDF/rdf:Description/dc:date'],
31
- default_values: [''], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
32
- multivalue: false,
33
- format: Helpers::SolrFormat::DATE
34
- },
35
- spatial_coverages: {
36
- xpaths: ['/rdf:RDF/rdf:Description/ows:WGS84BoundingBox'],
37
- multivalue: true,
38
- format: Helpers::NcdcPaleoFormat.method(:spatial_display_str)
39
- },
40
- spatial: {
41
- xpaths: ['/rdf:RDF/rdf:Description/ows:WGS84BoundingBox'],
42
- multivalue: true,
43
- format: Helpers::NcdcPaleoFormat.method(:spatial_index_str)
44
- },
45
- spatial_area: {
46
- xpaths: ['/rdf:RDF/rdf:Description/ows:WGS84BoundingBox'],
47
- multivalue: false,
48
- reduce: Helpers::NcdcPaleoFormat.method(:get_max_spatial_area),
49
- format: Helpers::NcdcPaleoFormat.method(:spatial_area_str)
50
- },
51
- temporal: {
52
- xpaths: ['/rdf:RDF/rdf:Description/dc:coverage'],
53
- multivalue: true,
54
- format: Helpers::NcdcPaleoFormat.method(:temporal_index_str)
55
- },
56
- temporal_coverages: {
57
- xpaths: ['/rdf:RDF/rdf:Description/dc:coverage'],
58
- multivalue: true,
59
- format: Helpers::NcdcPaleoFormat.method(:temporal_display_str)
60
- },
61
- temporal_duration: {
62
- xpaths: ['/rdf:RDF/rdf:Description/dc:coverage'],
63
- multivalue: false,
64
- reduce: Helpers::SolrFormat::REDUCE_TEMPORAL_DURATION,
65
- format: Helpers::NcdcPaleoFormat.method(:get_temporal_duration)
66
- },
67
- source: {
68
- xpaths: [''],
69
- default_values: ['ADE'],
70
- multivalue: false
71
- },
72
- facet_data_center: {
73
- xpaths: [''],
74
- default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:NCDC_PALEO][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:NCDC_PALEO][:short_name]}"],
75
- multivalue: false
76
- },
77
- facet_spatial_scope: {
78
- xpaths: ['/rdf:RDF/rdf:Description/ows:WGS84BoundingBox'],
79
- multivalue: true,
80
- format: Helpers::NcdcPaleoFormat.method(:get_spatial_scope_facet)
81
- },
82
- facet_temporal_duration: {
83
- xpaths: ['/rdf:RDF/rdf:Description/dc:coverage'],
84
- default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
85
- format: Helpers::NcdcPaleoFormat.method(:get_temporal_duration_facet),
86
- multivalue: true
87
- }
88
- }
89
- end
90
- end