search_solr_tools 6.1.0 → 6.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +11 -2
  3. data/bin/search_solr_tools +5 -17
  4. data/lib/search_solr_tools/config/environments.rb +3 -1
  5. data/lib/search_solr_tools/config/environments.yaml +0 -32
  6. data/lib/search_solr_tools/errors/harvest_error.rb +44 -31
  7. data/lib/search_solr_tools/harvesters/auto_suggest.rb +5 -3
  8. data/lib/search_solr_tools/harvesters/base.rb +21 -20
  9. data/lib/search_solr_tools/harvesters/nsidc_auto_suggest.rb +7 -5
  10. data/lib/search_solr_tools/harvesters/nsidc_json.rb +9 -8
  11. data/lib/search_solr_tools/helpers/bounding_box_util.rb +8 -8
  12. data/lib/search_solr_tools/helpers/facet_configuration.rb +3 -1
  13. data/lib/search_solr_tools/helpers/harvest_status.rb +10 -8
  14. data/lib/search_solr_tools/helpers/iso_namespaces.rb +3 -1
  15. data/lib/search_solr_tools/helpers/solr_format.rb +25 -45
  16. data/lib/search_solr_tools/helpers/translate_spatial_coverage.rb +13 -10
  17. data/lib/search_solr_tools/helpers/translate_temporal_coverage.rb +2 -0
  18. data/lib/search_solr_tools/translators/nsidc_json.rb +48 -44
  19. data/lib/search_solr_tools/version.rb +3 -1
  20. data/lib/search_solr_tools.rb +3 -2
  21. metadata +3 -45
  22. data/lib/search_solr_tools/harvesters/adc.rb +0 -49
  23. data/lib/search_solr_tools/harvesters/ade_auto_suggest.rb +0 -46
  24. data/lib/search_solr_tools/harvesters/bcodmo.rb +0 -64
  25. data/lib/search_solr_tools/harvesters/data_one.rb +0 -49
  26. data/lib/search_solr_tools/harvesters/echo.rb +0 -52
  27. data/lib/search_solr_tools/harvesters/eol.rb +0 -51
  28. data/lib/search_solr_tools/harvesters/gtnp.rb +0 -67
  29. data/lib/search_solr_tools/harvesters/ices.rb +0 -58
  30. data/lib/search_solr_tools/harvesters/ncdc_paleo.rb +0 -62
  31. data/lib/search_solr_tools/harvesters/nmi.rb +0 -34
  32. data/lib/search_solr_tools/harvesters/nodc.rb +0 -75
  33. data/lib/search_solr_tools/harvesters/oai.rb +0 -62
  34. data/lib/search_solr_tools/harvesters/pdc.rb +0 -40
  35. data/lib/search_solr_tools/harvesters/r2r.rb +0 -61
  36. data/lib/search_solr_tools/harvesters/rda.rb +0 -35
  37. data/lib/search_solr_tools/harvesters/tdar.rb +0 -71
  38. data/lib/search_solr_tools/harvesters/usgs.rb +0 -76
  39. data/lib/search_solr_tools/helpers/csw_iso_query_builder.rb +0 -29
  40. data/lib/search_solr_tools/helpers/data_one_format.rb +0 -74
  41. data/lib/search_solr_tools/helpers/iso_to_solr.rb +0 -97
  42. data/lib/search_solr_tools/helpers/iso_to_solr_format.rb +0 -197
  43. data/lib/search_solr_tools/helpers/ncdc_paleo_format.rb +0 -61
  44. data/lib/search_solr_tools/helpers/query_builder.rb +0 -13
  45. data/lib/search_solr_tools/helpers/r2r_format.rb +0 -25
  46. data/lib/search_solr_tools/helpers/selectors.rb +0 -22
  47. data/lib/search_solr_tools/helpers/tdar_format.rb +0 -70
  48. data/lib/search_solr_tools/helpers/usgs_format.rb +0 -50
  49. data/lib/search_solr_tools/selectors/adc.rb +0 -96
  50. data/lib/search_solr_tools/selectors/data_one.rb +0 -96
  51. data/lib/search_solr_tools/selectors/echo_iso.rb +0 -112
  52. data/lib/search_solr_tools/selectors/ices_iso.rb +0 -108
  53. data/lib/search_solr_tools/selectors/ncdc_paleo.rb +0 -90
  54. data/lib/search_solr_tools/selectors/nmi.rb +0 -107
  55. data/lib/search_solr_tools/selectors/nodc_iso.rb +0 -108
  56. data/lib/search_solr_tools/selectors/pdc_iso.rb +0 -109
  57. data/lib/search_solr_tools/selectors/r2r.rb +0 -115
  58. data/lib/search_solr_tools/selectors/rda.rb +0 -107
  59. data/lib/search_solr_tools/selectors/tdar_opensearch.rb +0 -91
  60. data/lib/search_solr_tools/selectors/usgs_iso.rb +0 -107
  61. data/lib/search_solr_tools/translators/bcodmo_json.rb +0 -89
  62. data/lib/search_solr_tools/translators/eol_to_solr.rb +0 -84
  63. data/lib/search_solr_tools/translators/gtnp_json.rb +0 -59
@@ -1,22 +0,0 @@
1
- Dir[File.join(__dir__, '..', 'selectors', '*.rb')].each { |file| require file }
2
-
3
- module SearchSolrTools
4
- module Helpers
5
- # This hash grabs all the selector files inside the selectors directory,
6
- # to add a new source we need to create a selector file and add it to this hash.
7
- SELECTORS = {
8
- adc: Selectors::ADC,
9
- data_one: Selectors::DATA_ONE,
10
- echo: Selectors::ECHO,
11
- ices: Selectors::ICES,
12
- nmi: Selectors::NMI,
13
- ncdc_paleo: Selectors::NCDC_PALEO,
14
- nodc: Selectors::NODC,
15
- pdc: Selectors::PDC,
16
- r2r: Selectors::R2R,
17
- rda: Selectors::RDA,
18
- tdar: Selectors::TDAR,
19
- usgs: Selectors::USGS
20
- }
21
- end
22
- end
@@ -1,70 +0,0 @@
1
- require_relative 'iso_namespaces'
2
- require_relative 'iso_to_solr_format'
3
- require_relative 'solr_format'
4
-
5
- module SearchSolrTools
6
- module Helpers
7
- # Special formatter for dealing with temporal metadata issues in the TDAR feed
8
- class TdarFormat < IsoToSolrFormat
9
- SPATIAL_DISPLAY = proc { |node| TdarFormat.spatial_display_str(node) }
10
- SPATIAL_INDEX = proc { |node| TdarFormat.spatial_index_str(node) }
11
- FACET_SPATIAL_SCOPE = proc { |node| TdarFormat.get_spatial_scope_facet(node) }
12
-
13
- TEMPORAL_INDEX_STRING = proc { |node| TdarFormat.temporal_index_str(node) }
14
- TEMPORAL_DISPLAY_STRING = proc { |node| TdarFormat.temporal_display_str(node) }
15
- TEMPORAL_DISPLAY_STRING_FORMATTED = proc { |node| TdarFormat.temporal_display_str(node, true) }
16
- TEMPORAL_DURATION = proc { |node| TdarFormat.get_temporal_duration(node) }
17
- FACET_TEMPORAL_DURATION = proc { |node| TdarFormat.get_temporal_duration_facet(node) }
18
-
19
- def self.get_spatial_scope_facet(node)
20
- box = bounding_box(node)
21
- SolrFormat.get_spatial_scope_facet_with_bounding_box(box)
22
- end
23
-
24
- def self.date_range(temporal_node, formatted = false)
25
- xpath = '.'
26
- namespaces = IsoNamespaces.namespaces(temporal_node)
27
-
28
- temporal_node_count = temporal_node.xpath(xpath, namespaces).size
29
- date_str = temporal_node.at_xpath(xpath, namespaces).text
30
-
31
- super if temporal_node_count != 1
32
-
33
- case date_str
34
- when /^[0-9]{4}$/
35
- year_to_range(date_str)
36
- when /^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$/
37
- single_date_to_range(date_str)
38
- else
39
- super
40
- end
41
- end
42
-
43
- def self.single_date_to_range(date)
44
- {
45
- start: date,
46
- end: date
47
- }
48
- end
49
-
50
- def self.year_to_range(year)
51
- {
52
- start: "#{year}-01-01",
53
- end: "#{year}-12-31"
54
- }
55
- end
56
-
57
- # Bounding box is defined by two coordinates to create a point.
58
- # Create a bounding box from this point.
59
- def self.bounding_box(node)
60
- point = node.text.split(' ')
61
- {
62
- west: point[1],
63
- south: point[0],
64
- east: point[3],
65
- north: point[2]
66
- }
67
- end
68
- end
69
- end
70
- end
@@ -1,50 +0,0 @@
1
- require_relative 'iso_namespaces'
2
- require_relative 'iso_to_solr_format'
3
-
4
- module SearchSolrTools
5
- module Helpers
6
- # Special formatter for dealing with temporal metadata issues in the USGS feed
7
- class UsgsFormat < IsoToSolrFormat
8
- TEMPORAL_INDEX_STRING = proc { |node| UsgsFormat.temporal_index_str(node) }
9
- TEMPORAL_DISPLAY_STRING = proc { |node| UsgsFormat.temporal_display_str(node) }
10
- TEMPORAL_DURATION = proc { |node| UsgsFormat.get_temporal_duration(node) }
11
- FACET_TEMPORAL_DURATION = proc { |node| UsgsFormat.get_temporal_duration_facet(node) }
12
-
13
- # for USGS, a single date entry (i.e., missing either start or end date, and
14
- # the value that is present is not clearly labeled) means the whole year if
15
- # just a year is given, or just a single day if just a single day is given
16
- def self.date_range(temporal_node, formatted = false)
17
- xpath = './/gco:Date'
18
- namespaces = IsoNamespaces.namespaces(temporal_node)
19
-
20
- temporal_node_count = temporal_node.xpath(xpath, namespaces).size
21
- date_str = temporal_node.at_xpath(xpath, namespaces).text
22
-
23
- super if temporal_node_count != 1
24
-
25
- case date_str
26
- when /^[0-9]{4}$/
27
- year_to_range(date_str)
28
- when /^[0-9]{4}-[0-9]{2}-[0-9]{2}$/
29
- single_date_to_range(date_str)
30
- else
31
- super
32
- end
33
- end
34
-
35
- def self.single_date_to_range(date)
36
- {
37
- start: date,
38
- end: date
39
- }
40
- end
41
-
42
- def self.year_to_range(year)
43
- {
44
- start: "#{year}-01-01",
45
- end: "#{year}-12-31"
46
- }
47
- end
48
- end
49
- end
50
- end
@@ -1,96 +0,0 @@
1
- require_relative '../helpers/solr_format'
2
- require_relative '../helpers/data_one_format'
3
-
4
- module SearchSolrTools
5
- module Selectors
6
- ADC = {
7
- authoritative_id: {
8
- xpaths: ['.//str[@name="id"]'],
9
- multivalue: false
10
- },
11
- title: {
12
- xpaths: ['.//str[@name="title"]'],
13
- multivalue: false
14
- },
15
- summary: {
16
- xpaths: ['.//str[@name="abstract"]'],
17
- multivalue: false
18
- },
19
- data_centers: {
20
- xpaths: [''],
21
- default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:ADC][:long_name]],
22
- multivalue: false
23
- },
24
- authors: {
25
- xpaths: ['.//str[@name="author"]'],
26
- multivalue: false
27
- },
28
- keywords: {
29
- xpaths: ['.//arr[@name="keywords"]/str'],
30
- multivalue: true
31
- },
32
- last_revision_date: {
33
- xpaths: ['.//date[@name="updateDate"]'],
34
- default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
35
- multivalue: false,
36
- format: Helpers::SolrFormat::DATE
37
- },
38
- dataset_url: {
39
- xpaths: ['.//str[@name="dataUrl"]'],
40
- default_values: [''],
41
- multivalue: false
42
- },
43
- spatial_coverages: {
44
- xpaths: ['.'],
45
- multivalue: false,
46
- format: Helpers::DataOneFormat.method(:spatial_display)
47
- },
48
- spatial: {
49
- xpaths: ['.'],
50
- multivalue: false,
51
- format: Helpers::DataOneFormat.method(:spatial_index)
52
- },
53
- spatial_area: {
54
- xpaths: ['.'],
55
- multivalue: false,
56
- format: Helpers::DataOneFormat.method(:spatial_area)
57
- },
58
- temporal_coverages: {
59
- xpaths: ['.'],
60
- multivalue: false,
61
- format: Helpers::DataOneFormat.method(:temporal_coverage)
62
- },
63
- temporal_duration: {
64
- xpaths: ['.'],
65
- multivalue: false,
66
- format: Helpers::DataOneFormat.method(:temporal_duration)
67
- },
68
- temporal: {
69
- xpaths: ['.'],
70
- multivalue: false,
71
- format: Helpers::DataOneFormat.method(:temporal_index_string)
72
- },
73
- source: {
74
- xpaths: [''],
75
- default_values: ['ADE'],
76
- multivalue: false
77
- },
78
- facet_data_center: {
79
- xpaths: [''],
80
- default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:ADC][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:ADC][:short_name]}"],
81
- multivalue: false
82
- },
83
- facet_spatial_scope: {
84
- xpaths: ['.'],
85
- multivalue: false,
86
- format: Helpers::DataOneFormat.method(:facet_spatial_scope)
87
- },
88
- facet_temporal_duration: {
89
- xpaths: ['.'],
90
- default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
91
- format: Helpers::DataOneFormat.method(:facet_temporal_duration),
92
- multivalue: false
93
- }
94
- }
95
- end
96
- end
@@ -1,96 +0,0 @@
1
- require_relative '../helpers/data_one_format'
2
- require_relative '../helpers/solr_format'
3
-
4
- module SearchSolrTools
5
- module Selectors
6
- DATA_ONE = {
7
- authoritative_id: {
8
- xpaths: ['.//str[@name="id"]'],
9
- multivalue: false
10
- },
11
- title: {
12
- xpaths: ['.//str[@name="title"]'],
13
- multivalue: false
14
- },
15
- summary: {
16
- xpaths: ['.//str[@name="abstract"]'],
17
- multivalue: false
18
- },
19
- data_centers: {
20
- xpaths: [''],
21
- default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:DATA_ONE][:long_name]],
22
- multivalue: false
23
- },
24
- authors: {
25
- xpaths: ['.//str[@name="author"]'],
26
- multivalue: false
27
- },
28
- keywords: {
29
- xpaths: ['.//arr[@name="keywords"]/str'],
30
- multivalue: true
31
- },
32
- last_revision_date: {
33
- xpaths: ['.//date[@name="updateDate"]'],
34
- default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
35
- multivalue: false,
36
- format: Helpers::SolrFormat::DATE
37
- },
38
- dataset_url: {
39
- xpaths: ['.//str[@name="dataUrl"]'],
40
- default_values: [''],
41
- multivalue: false
42
- },
43
- spatial_coverages: {
44
- xpaths: ['.'],
45
- multivalue: false,
46
- format: Helpers::DataOneFormat.method(:spatial_display)
47
- },
48
- spatial: {
49
- xpaths: ['.'],
50
- multivalue: false,
51
- format: Helpers::DataOneFormat.method(:spatial_index)
52
- },
53
- spatial_area: {
54
- xpaths: ['.'],
55
- multivalue: false,
56
- format: Helpers::DataOneFormat.method(:spatial_area)
57
- },
58
- temporal_coverages: {
59
- xpaths: ['.'],
60
- multivalue: false,
61
- format: Helpers::DataOneFormat.method(:temporal_coverage)
62
- },
63
- temporal_duration: {
64
- xpaths: ['.'],
65
- multivalue: false,
66
- format: Helpers::DataOneFormat.method(:temporal_duration)
67
- },
68
- temporal: {
69
- xpaths: ['.'],
70
- multivalue: false,
71
- format: Helpers::DataOneFormat.method(:temporal_index_string)
72
- },
73
- source: {
74
- xpaths: [''],
75
- default_values: ['ADE'],
76
- multivalue: false
77
- },
78
- facet_data_center: {
79
- xpaths: [''],
80
- default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:DATA_ONE][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:DATA_ONE][:short_name]}"],
81
- multivalue: false
82
- },
83
- facet_spatial_scope: {
84
- xpaths: ['.'],
85
- multivalue: false,
86
- format: Helpers::DataOneFormat.method(:facet_spatial_scope)
87
- },
88
- facet_temporal_duration: {
89
- xpaths: ['.'],
90
- default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
91
- format: Helpers::DataOneFormat.method(:facet_temporal_duration),
92
- multivalue: false
93
- }
94
- }
95
- end
96
- end
@@ -1,112 +0,0 @@
1
- require_relative '../helpers/solr_format'
2
- require_relative '../helpers/iso_to_solr_format'
3
-
4
- module SearchSolrTools
5
- module Selectors
6
- # The hash contains keys that should map to the fields in the solr schema,
7
- # the keys are called selectors and are in charge of selecting the nodes
8
- # from the ISO document, applying the default value if none of the xpaths
9
- # resolved to a value and formatting the field. xpaths and multivalue are
10
- # required, default_value and format are optional
11
- ECHO = {
12
- authoritative_id: {
13
- xpaths: ['.//@echo_dataset_id'],
14
- multivalue: false
15
- },
16
- title: {
17
- xpaths: ['.//Collection/LongName'],
18
- multivalue: false
19
- },
20
- summary: {
21
- xpaths: ['.//Collection/Description'],
22
- multivalue: false
23
- },
24
- data_centers: {
25
- xpaths: [''],
26
- default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:ECHO][:long_name]],
27
- multivalue: false
28
- },
29
- authors: {
30
- xpaths: [''],
31
- multivalue: true
32
- },
33
- keywords: {
34
- xpaths: ['.//Collection/ScienceKeywords/ScienceKeyword'],
35
- multivalue: true,
36
- format: Helpers::IsoToSolrFormat::KEYWORDS
37
- },
38
- last_revision_date: {
39
- xpaths: ['.//Collection/LastUpdate'],
40
- default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
41
- multivalue: false,
42
- format: Helpers::SolrFormat::DATE
43
- },
44
- dataset_url: {
45
- xpaths: ['.//Collection/OnlineResources/OnlineResource[contains(./Type/text(),"static URL")]/URL',
46
- './/Collection/OnlineResources/OnlineResource[contains(./Type/text(), "VIEW RELATED INFORMATION")]/URL',
47
- './/Collection/OnlineAccessURLs/OnlineAccessURL/[contains(./URLDescription/text(), "Data Access")]/URL',
48
- './/Collection/OnlineResources/OnlineResource[contains(./Type/text(),"Guide Document for this product at NSIDC")]/URL',
49
- './/Collection/OnlineResources/OnlineResource[contains(./Type/text(),"DOI URL")]/URL',
50
- './/Collection/OnlineResources/OnlineResource[contains(./Type/text(),"ECSCollGuide")]/URL',
51
- './/Collection/OnlineResources/OnlineResource[contains(./Type/text(),"GET DATA : ON-LINE ARCHIVE")]/URL',
52
- './/Collection/OnlineResources/OnlineResource/URL',
53
- './/Collection/OnlineAccessURLs/OnlineAccessURL/URL'],
54
- default_values: ['https://earthdata.nasa.gov/echo'],
55
- multivalue: false
56
- },
57
- spatial_coverages: {
58
- xpaths: ['.//Collection/Spatial/HorizontalSpatialDomain/Geometry/BoundingRectangle'],
59
- multivalue: true,
60
- format: Helpers::IsoToSolrFormat::SPATIAL_DISPLAY
61
- },
62
- spatial: {
63
- xpaths: ['.//Collection/Spatial/HorizontalSpatialDomain/Geometry/BoundingRectangle'],
64
- multivalue: true,
65
- format: Helpers::IsoToSolrFormat::SPATIAL_INDEX
66
- },
67
- spatial_area: {
68
- xpaths: ['.//Collection/Spatial/HorizontalSpatialDomain/Geometry/BoundingRectangle'],
69
- multivalue: false,
70
- reduce: Helpers::IsoToSolrFormat::MAX_SPATIAL_AREA,
71
- format: Helpers::IsoToSolrFormat::SPATIAL_AREA
72
- },
73
- temporal_coverages: {
74
- xpaths: ['.//Collection/Temporal/RangeDateTime'],
75
- multivalue: true,
76
- format: Helpers::IsoToSolrFormat::TEMPORAL_DISPLAY_STRING_FORMATTED
77
- },
78
- temporal_duration: {
79
- xpaths: ['.//Collection/Temporal/RangeDateTime'],
80
- multivalue: false,
81
- reduce: Helpers::SolrFormat::REDUCE_TEMPORAL_DURATION,
82
- format: Helpers::IsoToSolrFormat::TEMPORAL_DURATION
83
- },
84
- temporal: {
85
- xpaths: ['.//Collection/Temporal/RangeDateTime'],
86
- multivalue: true,
87
- format: Helpers::IsoToSolrFormat::TEMPORAL_INDEX_STRING
88
- },
89
- source: {
90
- xpaths: [''],
91
- default_values: ['ADE'],
92
- multivalue: false
93
- },
94
- facet_data_center: {
95
- xpaths: [''],
96
- default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:ECHO][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:ECHO][:short_name]}"],
97
- multivalue: false
98
- },
99
- facet_spatial_scope: {
100
- xpaths: ['.//Collection/Spatial/HorizontalSpatialDomain/Geometry/BoundingRectangle'],
101
- multivalue: true,
102
- format: Helpers::IsoToSolrFormat::FACET_SPATIAL_SCOPE
103
- },
104
- facet_temporal_duration: {
105
- xpaths: ['.//Collection/Temporal/RangeDateTime'],
106
- default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
107
- format: Helpers::IsoToSolrFormat::FACET_TEMPORAL_DURATION,
108
- multivalue: true
109
- }
110
- }
111
- end
112
- end
@@ -1,108 +0,0 @@
1
- require_relative '../helpers/solr_format'
2
- require_relative '../helpers/iso_to_solr_format'
3
-
4
- module SearchSolrTools
5
- module Selectors
6
- # The hash contains keys that should map to the fields in the solr schema,
7
- # the keys are called selectors and are in charge of selecting the nodes
8
- # from the ISO document, applying the default value if none of the xpaths
9
- # resolved to a value and formatting the field. xpaths and multivalue are
10
- # required, default_value and format are optional
11
- ICES = {
12
- authoritative_id: {
13
- xpaths: ['.//gmd:fileIdentifier/gco:CharacterString'],
14
- multivalue: false
15
- },
16
- title: {
17
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:title/gco:CharacterString'],
18
- multivalue: false
19
- },
20
- summary: {
21
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:abstract/gco:CharacterString'],
22
- multivalue: false
23
- },
24
- data_centers: {
25
- xpaths: [''],
26
- default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:ICES][:long_name]],
27
- multivalue: false
28
- },
29
- authors: {
30
- xpaths: [".//gmd:CI_ResponsibleParty[./gmd:role/gmd:CI_RoleCode[@codeListValue='principalInvestigator']]/gmd:individualName/gco:CharacterString"],
31
- multivalue: true
32
- },
33
- keywords: {
34
- xpaths: ['.//gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gco:CharacterString',
35
- './/gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gmx:Anchor'],
36
- multivalue: true
37
- },
38
- last_revision_date: {
39
- xpaths: ['.//gmd:dateStamp/gco:Date', './/gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:date/gmd:CI_Date/gmd:date/gco:DateTime'],
40
- default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
41
- multivalue: false,
42
- format: Helpers::SolrFormat::DATE
43
- },
44
- dataset_url: {
45
- xpaths: ['.//gmd:fileIdentifier/gco:CharacterString'],
46
- multivalue: false,
47
- format: Helpers::IsoToSolrFormat::ICES_DATASET_URL
48
- },
49
- spatial_coverages: {
50
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
51
- multivalue: true,
52
- format: Helpers::IsoToSolrFormat::SPATIAL_DISPLAY
53
- },
54
- spatial: {
55
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
56
- multivalue: true,
57
- format: Helpers::IsoToSolrFormat::SPATIAL_INDEX
58
- },
59
- spatial_area: {
60
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
61
- multivalue: false,
62
- reduce: Helpers::IsoToSolrFormat::MAX_SPATIAL_AREA,
63
- format: Helpers::IsoToSolrFormat::SPATIAL_AREA
64
- },
65
- temporal_coverages: {
66
- xpaths: ['.//gmd:EX_TemporalExtent'],
67
- multivalue: false,
68
- format: Helpers::IsoToSolrFormat::TEMPORAL_DISPLAY_STRING_FORMATTED
69
- },
70
- temporal_duration: {
71
- xpaths: ['.//gmd:EX_TemporalExtent'],
72
- multivalue: false,
73
- reduce: Helpers::SolrFormat::REDUCE_TEMPORAL_DURATION,
74
- format: Helpers::IsoToSolrFormat::TEMPORAL_DURATION
75
- },
76
- temporal: {
77
- xpaths: ['.//gmd:EX_TemporalExtent'],
78
- multivalue: true,
79
- format: Helpers::IsoToSolrFormat::TEMPORAL_INDEX_STRING
80
- },
81
- sensors: {
82
- xpaths: ['.//gmi:acquisitionInformation/gmi:MI_AcquisitionInformation/gmi:instrument/gmi:MI_Instrument/gmi:citation/gmd:CI_Citation/gmd:title/gco:CharacterString'],
83
- multivalue: true
84
- },
85
- source: {
86
- xpaths: [''],
87
- default_values: ['ADE'],
88
- multivalue: false
89
- },
90
- facet_data_center: {
91
- xpaths: [''],
92
- default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:ICES][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:ICES][:short_name]}"],
93
- multivalue: false
94
- },
95
- facet_spatial_scope: {
96
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
97
- multivalue: true,
98
- format: Helpers::IsoToSolrFormat::FACET_SPATIAL_SCOPE
99
- },
100
- facet_temporal_duration: {
101
- xpaths: ['.//gmd:EX_TemporalExtent'],
102
- default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
103
- format: Helpers::IsoToSolrFormat::FACET_TEMPORAL_DURATION,
104
- multivalue: true
105
- }
106
- }
107
- end
108
- end
@@ -1,90 +0,0 @@
1
- require_relative '../helpers/solr_format'
2
- require_relative '../helpers/ncdc_paleo_format'
3
-
4
- module SearchSolrTools
5
- module Selectors
6
- NCDC_PALEO = {
7
- title: {
8
- xpaths: ['/rdf:RDF/rdf:Description/dc:title'],
9
- multivalue: false
10
- },
11
- summary: {
12
- xpaths: ['/rdf:RDF/rdf:Description/dc:description'],
13
- multivalue: false
14
- },
15
- data_centers: {
16
- xpaths: [''],
17
- default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:NCDC_PALEO][:long_name]],
18
- multivalue: false
19
- },
20
- authors: {
21
- xpaths: ['/rdf:RDF/rdf:Description/dc:creator'],
22
- multivalue: true,
23
- format: Helpers::NcdcPaleoFormat.method(:author)
24
- },
25
- keywords: {
26
- xpaths: ['/rdf:RDF/rdf:Description/dc:subject'],
27
- multivalue: true
28
- },
29
- last_revision_date: {
30
- xpaths: ['/rdf:RDF/rdf:Description/dc:date'],
31
- default_values: [''], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
32
- multivalue: false,
33
- format: Helpers::SolrFormat::DATE
34
- },
35
- spatial_coverages: {
36
- xpaths: ['/rdf:RDF/rdf:Description/ows:WGS84BoundingBox'],
37
- multivalue: true,
38
- format: Helpers::NcdcPaleoFormat.method(:spatial_display_str)
39
- },
40
- spatial: {
41
- xpaths: ['/rdf:RDF/rdf:Description/ows:WGS84BoundingBox'],
42
- multivalue: true,
43
- format: Helpers::NcdcPaleoFormat.method(:spatial_index_str)
44
- },
45
- spatial_area: {
46
- xpaths: ['/rdf:RDF/rdf:Description/ows:WGS84BoundingBox'],
47
- multivalue: false,
48
- reduce: Helpers::NcdcPaleoFormat.method(:get_max_spatial_area),
49
- format: Helpers::NcdcPaleoFormat.method(:spatial_area_str)
50
- },
51
- temporal: {
52
- xpaths: ['/rdf:RDF/rdf:Description/dc:coverage'],
53
- multivalue: true,
54
- format: Helpers::NcdcPaleoFormat.method(:temporal_index_str)
55
- },
56
- temporal_coverages: {
57
- xpaths: ['/rdf:RDF/rdf:Description/dc:coverage'],
58
- multivalue: true,
59
- format: Helpers::NcdcPaleoFormat.method(:temporal_display_str)
60
- },
61
- temporal_duration: {
62
- xpaths: ['/rdf:RDF/rdf:Description/dc:coverage'],
63
- multivalue: false,
64
- reduce: Helpers::SolrFormat::REDUCE_TEMPORAL_DURATION,
65
- format: Helpers::NcdcPaleoFormat.method(:get_temporal_duration)
66
- },
67
- source: {
68
- xpaths: [''],
69
- default_values: ['ADE'],
70
- multivalue: false
71
- },
72
- facet_data_center: {
73
- xpaths: [''],
74
- default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:NCDC_PALEO][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:NCDC_PALEO][:short_name]}"],
75
- multivalue: false
76
- },
77
- facet_spatial_scope: {
78
- xpaths: ['/rdf:RDF/rdf:Description/ows:WGS84BoundingBox'],
79
- multivalue: true,
80
- format: Helpers::NcdcPaleoFormat.method(:get_spatial_scope_facet)
81
- },
82
- facet_temporal_duration: {
83
- xpaths: ['/rdf:RDF/rdf:Description/dc:coverage'],
84
- default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
85
- format: Helpers::NcdcPaleoFormat.method(:get_temporal_duration_facet),
86
- multivalue: true
87
- }
88
- }
89
- end
90
- end