search_solr_tools 6.1.0 → 6.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +11 -2
  3. data/bin/search_solr_tools +5 -17
  4. data/lib/search_solr_tools/config/environments.rb +3 -1
  5. data/lib/search_solr_tools/config/environments.yaml +0 -32
  6. data/lib/search_solr_tools/errors/harvest_error.rb +44 -31
  7. data/lib/search_solr_tools/harvesters/auto_suggest.rb +5 -3
  8. data/lib/search_solr_tools/harvesters/base.rb +21 -20
  9. data/lib/search_solr_tools/harvesters/nsidc_auto_suggest.rb +7 -5
  10. data/lib/search_solr_tools/harvesters/nsidc_json.rb +9 -8
  11. data/lib/search_solr_tools/helpers/bounding_box_util.rb +8 -8
  12. data/lib/search_solr_tools/helpers/facet_configuration.rb +3 -1
  13. data/lib/search_solr_tools/helpers/harvest_status.rb +10 -8
  14. data/lib/search_solr_tools/helpers/iso_namespaces.rb +3 -1
  15. data/lib/search_solr_tools/helpers/solr_format.rb +25 -45
  16. data/lib/search_solr_tools/helpers/translate_spatial_coverage.rb +13 -10
  17. data/lib/search_solr_tools/helpers/translate_temporal_coverage.rb +2 -0
  18. data/lib/search_solr_tools/translators/nsidc_json.rb +48 -44
  19. data/lib/search_solr_tools/version.rb +3 -1
  20. data/lib/search_solr_tools.rb +3 -2
  21. metadata +3 -45
  22. data/lib/search_solr_tools/harvesters/adc.rb +0 -49
  23. data/lib/search_solr_tools/harvesters/ade_auto_suggest.rb +0 -46
  24. data/lib/search_solr_tools/harvesters/bcodmo.rb +0 -64
  25. data/lib/search_solr_tools/harvesters/data_one.rb +0 -49
  26. data/lib/search_solr_tools/harvesters/echo.rb +0 -52
  27. data/lib/search_solr_tools/harvesters/eol.rb +0 -51
  28. data/lib/search_solr_tools/harvesters/gtnp.rb +0 -67
  29. data/lib/search_solr_tools/harvesters/ices.rb +0 -58
  30. data/lib/search_solr_tools/harvesters/ncdc_paleo.rb +0 -62
  31. data/lib/search_solr_tools/harvesters/nmi.rb +0 -34
  32. data/lib/search_solr_tools/harvesters/nodc.rb +0 -75
  33. data/lib/search_solr_tools/harvesters/oai.rb +0 -62
  34. data/lib/search_solr_tools/harvesters/pdc.rb +0 -40
  35. data/lib/search_solr_tools/harvesters/r2r.rb +0 -61
  36. data/lib/search_solr_tools/harvesters/rda.rb +0 -35
  37. data/lib/search_solr_tools/harvesters/tdar.rb +0 -71
  38. data/lib/search_solr_tools/harvesters/usgs.rb +0 -76
  39. data/lib/search_solr_tools/helpers/csw_iso_query_builder.rb +0 -29
  40. data/lib/search_solr_tools/helpers/data_one_format.rb +0 -74
  41. data/lib/search_solr_tools/helpers/iso_to_solr.rb +0 -97
  42. data/lib/search_solr_tools/helpers/iso_to_solr_format.rb +0 -197
  43. data/lib/search_solr_tools/helpers/ncdc_paleo_format.rb +0 -61
  44. data/lib/search_solr_tools/helpers/query_builder.rb +0 -13
  45. data/lib/search_solr_tools/helpers/r2r_format.rb +0 -25
  46. data/lib/search_solr_tools/helpers/selectors.rb +0 -22
  47. data/lib/search_solr_tools/helpers/tdar_format.rb +0 -70
  48. data/lib/search_solr_tools/helpers/usgs_format.rb +0 -50
  49. data/lib/search_solr_tools/selectors/adc.rb +0 -96
  50. data/lib/search_solr_tools/selectors/data_one.rb +0 -96
  51. data/lib/search_solr_tools/selectors/echo_iso.rb +0 -112
  52. data/lib/search_solr_tools/selectors/ices_iso.rb +0 -108
  53. data/lib/search_solr_tools/selectors/ncdc_paleo.rb +0 -90
  54. data/lib/search_solr_tools/selectors/nmi.rb +0 -107
  55. data/lib/search_solr_tools/selectors/nodc_iso.rb +0 -108
  56. data/lib/search_solr_tools/selectors/pdc_iso.rb +0 -109
  57. data/lib/search_solr_tools/selectors/r2r.rb +0 -115
  58. data/lib/search_solr_tools/selectors/rda.rb +0 -107
  59. data/lib/search_solr_tools/selectors/tdar_opensearch.rb +0 -91
  60. data/lib/search_solr_tools/selectors/usgs_iso.rb +0 -107
  61. data/lib/search_solr_tools/translators/bcodmo_json.rb +0 -89
  62. data/lib/search_solr_tools/translators/eol_to_solr.rb +0 -84
  63. data/lib/search_solr_tools/translators/gtnp_json.rb +0 -59
@@ -1,107 +0,0 @@
1
- require_relative '../helpers/solr_format'
2
- require_relative '../helpers/iso_to_solr_format'
3
-
4
- module SearchSolrTools
5
- module Selectors
6
- # The hash contains keys that should map to the fields in the solr schema,
7
- # the keys are called selectors and are in charge of selecting the nodes
8
- # from the ISO document, applying the default value if none of the xpaths
9
- # resolved to a value and formatting the field. xpaths and multivalue are
10
- # required, default_value and format are optional.
11
- NMI = {
12
- authoritative_id: {
13
- xpaths: ['.//oai:header/oai:identifier'],
14
- multivalue: false
15
- },
16
- title: {
17
- xpaths: ['.//dif:Entry_Title'],
18
- multivalue: false
19
- },
20
- summary: {
21
- xpaths: ['.//dif:Summary'],
22
- multivalue: false
23
- },
24
- data_centers: {
25
- xpaths: [''],
26
- default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:NMI][:long_name]],
27
- multivalue: false
28
- },
29
- authors: {
30
- xpaths: [''],
31
- multivalue: true
32
- },
33
- keywords: {
34
- xpaths: [
35
- './/dif:Parameters/dif:Category',
36
- './/dif:Parameters/dif:Topic',
37
- './/dif:Parameters/dif:Term',
38
- './/dif:Parameters/dif:Variable_Level_1'
39
- ].reverse,
40
- multivalue: true
41
- },
42
- last_revision_date: {
43
- xpaths: ['.//dif:Last_DIF_Revision_Date'],
44
- default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
45
- multivalue: false,
46
- format: Helpers::SolrFormat::DATE
47
- },
48
- dataset_url: {
49
- xpaths: ['.//dif:Related_URL/dif:URL'],
50
- multivalue: false
51
- },
52
- spatial_coverages: {
53
- xpaths: ['.//dif:Spatial_Coverage'],
54
- multivalue: true,
55
- format: Helpers::IsoToSolrFormat::SPATIAL_DISPLAY
56
- },
57
- spatial: {
58
- xpaths: ['.//dif:Spatial_Coverage'],
59
- multivalue: true,
60
- format: Helpers::IsoToSolrFormat::SPATIAL_INDEX
61
- },
62
- spatial_area: {
63
- xpaths: ['.//dif:Spatial_Coverage'],
64
- multivalue: false,
65
- reduce: Helpers::IsoToSolrFormat::MAX_SPATIAL_AREA,
66
- format: Helpers::IsoToSolrFormat::SPATIAL_AREA
67
- },
68
- temporal: {
69
- xpaths: ['.//dif:Temporal_Coverage'],
70
- multivalue: true,
71
- format: Helpers::IsoToSolrFormat::TEMPORAL_INDEX_STRING
72
- },
73
- temporal_coverages: {
74
- xpaths: ['.//dif:Temporal_Coverage'],
75
- multivalue: true,
76
- format: Helpers::IsoToSolrFormat::TEMPORAL_DISPLAY_STRING
77
- },
78
- temporal_duration: {
79
- xpaths: ['.//dif:Temporal_Coverage'],
80
- multivalue: false,
81
- reduce: Helpers::SolrFormat::REDUCE_TEMPORAL_DURATION,
82
- format: Helpers::IsoToSolrFormat::TEMPORAL_DURATION
83
- },
84
- source: {
85
- xpaths: [''],
86
- default_values: ['ADE'],
87
- multivalue: false
88
- },
89
- facet_data_center: {
90
- xpaths: [''],
91
- default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:NMI][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:NMI][:short_name]}"],
92
- multivalue: false
93
- },
94
- facet_spatial_scope: {
95
- xpaths: ['.//dif:Spatial_Coverage'],
96
- multivalue: true,
97
- format: Helpers::IsoToSolrFormat::FACET_SPATIAL_SCOPE
98
- },
99
- facet_temporal_duration: {
100
- xpaths: ['.//dif:Temporal_Coverage'],
101
- default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
102
- format: Helpers::IsoToSolrFormat::FACET_TEMPORAL_DURATION,
103
- multivalue: true
104
- }
105
- }
106
- end
107
- end
@@ -1,108 +0,0 @@
1
- require_relative '../helpers/solr_format'
2
- require_relative '../helpers/iso_to_solr_format'
3
-
4
- module SearchSolrTools
5
- module Selectors
6
- # The hash contains keys that should map to the fields in the solr schema,
7
- # the keys are called selectors and are in charge of selecting the nodes
8
- # from the ISO document, applying the default value if none of the xpaths
9
- # resolved to a value and formatting the field. xpaths and multivalue are
10
- # required, default_value and format are optional
11
- NODC = {
12
- authoritative_id: {
13
- xpaths: ['.//gmd:fileIdentifier/gco:CharacterString'],
14
- multivalue: false
15
- },
16
- title: {
17
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:title/gco:CharacterString'],
18
- multivalue: false
19
- },
20
- summary: {
21
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:abstract/gco:CharacterString'],
22
- multivalue: false
23
- },
24
- data_centers: {
25
- xpaths: [''],
26
- default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:NODC][:long_name]],
27
- multivalue: false
28
- },
29
- authors: {
30
- xpaths: [".//gmd:CI_ResponsibleParty[./gmd:role/gmd:CI_RoleCode[@codeListValue='principalInvestigator']]/gmd:individualName/gco:CharacterString"],
31
- multivalue: true
32
- },
33
- keywords: {
34
- xpaths: ['.//gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gco:CharacterString',
35
- './/gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gmx:Anchor'],
36
- multivalue: true
37
- },
38
- last_revision_date: {
39
- xpaths: ['.//gmd:dateStamp/gco:Date'],
40
- default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
41
- multivalue: false,
42
- format: Helpers::SolrFormat::DATE
43
- },
44
- dataset_url: {
45
- xpaths: ['.//gmd:MD_DigitalTransferOptions/gmd:onLine/gmd:CI_OnlineResource[contains(./gmd:protocol/gco:CharacterString/text(),"ftp")]/gmd:linkage/gmd:URL',
46
- './/gmd:MD_DigitalTransferOptions/gmd:onLine/gmd:CI_OnlineResource[contains(./gmd:protocol/gco:CharacterString/text(),"FTP")]/gmd:linkage/gmd:URL'],
47
- multivalue: false
48
- },
49
- spatial_coverages: {
50
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
51
- multivalue: true,
52
- format: Helpers::IsoToSolrFormat::SPATIAL_DISPLAY
53
- },
54
- spatial: {
55
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
56
- multivalue: true,
57
- format: Helpers::IsoToSolrFormat::SPATIAL_INDEX
58
- },
59
- spatial_area: {
60
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
61
- multivalue: false,
62
- reduce: Helpers::IsoToSolrFormat::MAX_SPATIAL_AREA,
63
- format: Helpers::IsoToSolrFormat::SPATIAL_AREA
64
- },
65
- temporal_coverages: {
66
- xpaths: ['.//gmd:EX_TemporalExtent'],
67
- multivalue: true,
68
- format: Helpers::IsoToSolrFormat::TEMPORAL_DISPLAY_STRING_FORMATTED
69
- },
70
- temporal_duration: {
71
- xpaths: ['.//gmd:EX_TemporalExtent'],
72
- multivalue: false,
73
- reduce: Helpers::SolrFormat::REDUCE_TEMPORAL_DURATION,
74
- format: Helpers::IsoToSolrFormat::TEMPORAL_DURATION
75
- },
76
- temporal: {
77
- xpaths: ['.//gmd:EX_TemporalExtent'],
78
- multivalue: true,
79
- format: Helpers::IsoToSolrFormat::TEMPORAL_INDEX_STRING
80
- },
81
- sensors: {
82
- xpaths: ['.//gmi:acquisitionInformation/gmi:MI_AcquisitionInformation/gmi:instrument/gmi:MI_Instrument/gmi:citation/gmd:CI_Citation/gmd:title/gco:CharacterString'],
83
- multivalue: true
84
- },
85
- source: {
86
- xpaths: [''],
87
- default_values: ['ADE'],
88
- multivalue: false
89
- },
90
- facet_data_center: {
91
- xpaths: [''],
92
- default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:NODC][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:NODC][:short_name]}"],
93
- multivalue: false
94
- },
95
- facet_spatial_scope: {
96
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
97
- multivalue: true,
98
- format: Helpers::IsoToSolrFormat::FACET_SPATIAL_SCOPE
99
- },
100
- facet_temporal_duration: {
101
- xpaths: ['.//gmd:EX_TemporalExtent'],
102
- default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
103
- format: Helpers::IsoToSolrFormat::FACET_TEMPORAL_DURATION,
104
- multivalue: true
105
- }
106
- }
107
- end
108
- end
@@ -1,109 +0,0 @@
1
- require_relative '../helpers/solr_format'
2
- require_relative '../helpers/iso_to_solr_format'
3
-
4
- module SearchSolrTools
5
- module Selectors
6
- # The hash contains keys that should map to the fields in the solr schema,
7
- # the keys are called selectors and are in charge of selecting the nodes
8
- # from the ISO document, applying the default value if none of the xpaths
9
- # resolved to a value and formatting the field. xpaths and multivalue are
10
- # required, default_value, format, and reduce are optional.
11
- #
12
- # reduce takes the formatted result of multiple nodes and produces a single
13
- # result. This is for fields that are not multivalued, but their value
14
- # should consider information from all the nodes (for example, storing
15
- # only the maximum duration from multiple temporal coverage fields, taking
16
- # the sum of multiple spatial areas)
17
- PDC = {
18
- authoritative_id: {
19
- xpaths: ['.//oai:header/oai:identifier'],
20
- multivalue: false
21
- },
22
- title: {
23
- xpaths: ['.//gmd:citation//gmd:title/gco:CharacterString'],
24
- multivalue: false
25
- },
26
- summary: {
27
- xpaths: ['.//gmd:abstract/gco:CharacterString'],
28
- multivalue: false
29
- },
30
- data_centers: {
31
- xpaths: [''],
32
- default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:PDC][:long_name]],
33
- multivalue: false
34
- },
35
- authors: {
36
- xpaths: ['.//gmd:identificationInfo//gmd:citedResponsibleParty//gmd:individualName/gco:CharacterString'],
37
- multivalue: true
38
- },
39
- keywords: {
40
- xpaths: ['.//gmd:descriptiveKeywords//gmd:keyword/gco:CharacterString'],
41
- multivalue: true
42
- },
43
- last_revision_date: {
44
- xpaths: ['.//oai:header/oai:datestamp'],
45
- default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
46
- multivalue: false,
47
- format: Helpers::SolrFormat::DATE
48
- },
49
- dataset_url: {
50
- xpaths: ['.//gmd:dataSetURI/gco:CharacterString'],
51
- multivalue: false,
52
- format: Helpers::SolrFormat::HTTP_URL_FORMAT
53
- },
54
- spatial_coverages: {
55
- xpaths: ['.//gmd:extent//gmd:EX_GeographicBoundingBox'],
56
- multivalue: true,
57
- format: Helpers::IsoToSolrFormat::SPATIAL_DISPLAY
58
- },
59
- spatial: {
60
- xpaths: ['.//gmd:extent//gmd:EX_GeographicBoundingBox'],
61
- multivalue: true,
62
- format: Helpers::IsoToSolrFormat::SPATIAL_INDEX
63
- },
64
- spatial_area: {
65
- xpaths: ['.//gmd:extent//gmd:EX_GeographicBoundingBox'],
66
- multivalue: false,
67
- reduce: Helpers::IsoToSolrFormat::MAX_SPATIAL_AREA,
68
- format: Helpers::IsoToSolrFormat::SPATIAL_AREA
69
- },
70
- temporal: {
71
- xpaths: ['.//gmd:EX_TemporalExtent/gmd:extent'],
72
- multivalue: true,
73
- format: Helpers::IsoToSolrFormat::TEMPORAL_INDEX_STRING
74
- },
75
- temporal_coverages: {
76
- xpaths: ['.//gmd:EX_TemporalExtent/gmd:extent'],
77
- multivalue: true,
78
- format: Helpers::IsoToSolrFormat::TEMPORAL_DISPLAY_STRING
79
- },
80
- temporal_duration: {
81
- xpaths: ['.//gmd:EX_TemporalExtent/gmd:extent'],
82
- multivalue: false,
83
- reduce: Helpers::SolrFormat::REDUCE_TEMPORAL_DURATION,
84
- format: Helpers::IsoToSolrFormat::TEMPORAL_DURATION
85
- },
86
- source: {
87
- xpaths: [''],
88
- default_values: ['ADE'],
89
- multivalue: false
90
- },
91
- facet_data_center: {
92
- xpaths: [''],
93
- default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:PDC][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:PDC][:short_name]}"],
94
- multivalue: false
95
- },
96
- facet_spatial_scope: {
97
- xpaths: ['.//gmd:extent//gmd:EX_GeographicBoundingBox'],
98
- multivalue: true,
99
- format: Helpers::IsoToSolrFormat::FACET_SPATIAL_SCOPE
100
- },
101
- facet_temporal_duration: {
102
- xpaths: ['.//gmd:EX_TemporalExtent/gmd:extent'],
103
- default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
104
- format: Helpers::IsoToSolrFormat::FACET_TEMPORAL_DURATION,
105
- multivalue: true
106
- }
107
- }
108
- end
109
- end
@@ -1,115 +0,0 @@
1
- require_relative '../helpers/solr_format'
2
- require_relative '../helpers/iso_to_solr_format'
3
- require_relative '../helpers/r2r_format'
4
-
5
- module SearchSolrTools
6
- module Selectors
7
- # The hash contains keys that should map to the fields in the solr schema,
8
- # the keys are called selectors and are in charge of selecting the nodes
9
- # from the ISO document, applying the default value if none of the xpaths
10
- # resolved to a value and formatting the field. xpaths and multivalue are
11
- # required, default_value, format, and reduce are optional.
12
- #
13
- # reduce takes the formatted result of multiple nodes and produces a single
14
- # result. This is for fields that are not multivalued, but their value
15
- # should consider information from all the nodes (for example, storing
16
- # only the maximum duration from multiple temporal coverage fields, taking
17
- # the sum of multiple spatial areas)
18
- R2R = {
19
- authoritative_id: {
20
- xpaths: ['.//gmd:fileIdentifier/gco:CharacterString'],
21
- multivalue: false
22
- },
23
- title: {
24
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:title/gmx:Anchor'],
25
- multivalue: false
26
- },
27
- summary: {
28
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:abstract/gco:CharacterString'],
29
- multivalue: false
30
- },
31
- data_centers: {
32
- xpaths: [''],
33
- default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:R2R][:long_name]],
34
- multivalue: false
35
- },
36
- authors: {
37
- xpaths: [".//gmd:CI_ResponsibleParty[./gmd:role/gmd:CI_RoleCode[@codeListValue='contributor']]/gmd:individualName/gmx:Anchor"],
38
- multivalue: true
39
- },
40
- keywords: {
41
- xpaths: ['.//gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gco:CharacterString',
42
- './/gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gmx:Anchor'],
43
- multivalue: true
44
- },
45
- last_revision_date: {
46
- xpaths: ['.//gmd:dateStamp/gco:Date', './/gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:date/gmd:CI_Date/gmd:date/gco:DateTime'],
47
- default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
48
- multivalue: false,
49
- format: Helpers::SolrFormat::DATE
50
- },
51
- dataset_url: {
52
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:title/gmx:Anchor/@xlink:href'],
53
- multivalue: false,
54
- format: Helpers::IsoToSolrFormat::DATASET_URL
55
- },
56
- spatial_coverages: {
57
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
58
- multivalue: true,
59
- format: Helpers::IsoToSolrFormat::SPATIAL_DISPLAY
60
- },
61
- spatial: {
62
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
63
- multivalue: true,
64
- format: Helpers::IsoToSolrFormat::SPATIAL_INDEX
65
- },
66
- spatial_area: {
67
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
68
- multivalue: false,
69
- reduce: Helpers::IsoToSolrFormat::MAX_SPATIAL_AREA,
70
- format: Helpers::IsoToSolrFormat::SPATIAL_AREA
71
- },
72
- temporal_coverages: {
73
- xpaths: ['.//gmd:EX_Extent[@id="temporalExtent"]'],
74
- multivalue: false,
75
- format: Helpers::R2RFormat::TEMPORAL_DISPLAY_STRING
76
- },
77
- temporal_duration: {
78
- xpaths: ['.//gmd:EX_Extent[@id="temporalExtent"]'],
79
- multivalue: false,
80
- reduce: Helpers::SolrFormat::REDUCE_TEMPORAL_DURATION,
81
- format: Helpers::R2RFormat::TEMPORAL_DURATION
82
- },
83
- temporal: {
84
- xpaths: ['.//gmd:EX_Extent[@id="temporalExtent"]'],
85
- multivalue: false,
86
- format: Helpers::R2RFormat::TEMPORAL_INDEX_STRING
87
- },
88
- sensors: {
89
- xpaths: ['.//gmi:acquisitionInformation/gmi:MI_AcquisitionInformation/gmi:instrument/gmi:MI_Instrument/gmi:type/gmx:Anchor'],
90
- multivalue: true
91
- },
92
- source: {
93
- xpaths: [''],
94
- default_values: ['ADE'],
95
- multivalue: false
96
- },
97
- facet_data_center: {
98
- xpaths: [''],
99
- default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:R2R][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:R2R][:short_name]}"],
100
- multivalue: false
101
- },
102
- facet_spatial_scope: {
103
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
104
- multivalue: true,
105
- format: Helpers::IsoToSolrFormat::FACET_SPATIAL_SCOPE
106
- },
107
- facet_temporal_duration: {
108
- xpaths: ['.//gmd:EX_Extent[@id="temporalExtent"]'],
109
- default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
110
- format: Helpers::R2RFormat::FACET_TEMPORAL_DURATION,
111
- multivalue: true
112
- }
113
- }
114
- end
115
- end
@@ -1,107 +0,0 @@
1
- require_relative '../helpers/solr_format'
2
- require_relative '../helpers/iso_to_solr_format'
3
-
4
- module SearchSolrTools
5
- module Selectors
6
- # The hash contains keys that should map to the fields in the solr schema,
7
- # the keys are called selectors and are in charge of selecting the nodes
8
- # from the ISO document, applying the default value if none of the xpaths
9
- # resolved to a value and formatting the field. xpaths and multivalue are
10
- # required, default_value and format are optional.
11
- RDA = {
12
- authoritative_id: {
13
- xpaths: ['.//oai:header/oai:identifier'],
14
- multivalue: false
15
- },
16
- title: {
17
- xpaths: ['.//dif:Entry_Title'],
18
- multivalue: false
19
- },
20
- summary: {
21
- xpaths: ['.//dif:Summary'],
22
- multivalue: false
23
- },
24
- data_centers: {
25
- xpaths: [''],
26
- default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:RDA][:long_name]],
27
- multivalue: false
28
- },
29
- authors: {
30
- xpaths: [''],
31
- multivalue: true
32
- },
33
- keywords: {
34
- xpaths: [
35
- './/dif:Parameters/dif:Category',
36
- './/dif:Parameters/dif:Topic',
37
- './/dif:Parameters/dif:Term',
38
- './/dif:Parameters/dif:Variable_Level_1'
39
- ].reverse,
40
- multivalue: true
41
- },
42
- last_revision_date: {
43
- xpaths: ['.//dif:Last_DIF_Revision_Date'],
44
- default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
45
- multivalue: false,
46
- format: Helpers::SolrFormat::DATE
47
- },
48
- dataset_url: {
49
- xpaths: ['.//dif:Related_URL/dif:URL'],
50
- multivalue: false
51
- },
52
- spatial_coverages: {
53
- xpaths: ['.//dif:Spatial_Coverage'],
54
- multivalue: true,
55
- format: Helpers::IsoToSolrFormat::SPATIAL_DISPLAY
56
- },
57
- spatial: {
58
- xpaths: ['.//dif:Spatial_Coverage'],
59
- multivalue: true,
60
- format: Helpers::IsoToSolrFormat::SPATIAL_INDEX
61
- },
62
- spatial_area: {
63
- xpaths: ['.//dif:Spatial_Coverage'],
64
- multivalue: false,
65
- reduce: Helpers::IsoToSolrFormat::MAX_SPATIAL_AREA,
66
- format: Helpers::IsoToSolrFormat::SPATIAL_AREA
67
- },
68
- temporal: {
69
- xpaths: ['.//dif:Temporal_Coverage'],
70
- multivalue: true,
71
- format: Helpers::IsoToSolrFormat::TEMPORAL_INDEX_STRING
72
- },
73
- temporal_coverages: {
74
- xpaths: ['.//dif:Temporal_Coverage'],
75
- multivalue: true,
76
- format: Helpers::IsoToSolrFormat::TEMPORAL_DISPLAY_STRING
77
- },
78
- temporal_duration: {
79
- xpaths: ['.//dif:Temporal_Coverage'],
80
- multivalue: false,
81
- reduce: Helpers::SolrFormat::REDUCE_TEMPORAL_DURATION,
82
- format: Helpers::IsoToSolrFormat::TEMPORAL_DURATION
83
- },
84
- source: {
85
- xpaths: [''],
86
- default_values: ['ADE'],
87
- multivalue: false
88
- },
89
- facet_data_center: {
90
- xpaths: [''],
91
- default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:RDA][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:RDA][:short_name]}"],
92
- multivalue: false
93
- },
94
- facet_spatial_scope: {
95
- xpaths: ['.//dif:Spatial_Coverage'],
96
- multivalue: true,
97
- format: Helpers::IsoToSolrFormat::FACET_SPATIAL_SCOPE
98
- },
99
- facet_temporal_duration: {
100
- xpaths: ['.//dif:Temporal_Coverage'],
101
- default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
102
- format: Helpers::IsoToSolrFormat::FACET_TEMPORAL_DURATION,
103
- multivalue: true
104
- }
105
- }
106
- end
107
- end
@@ -1,91 +0,0 @@
1
- require_relative '../helpers/solr_format'
2
- require_relative '../helpers/iso_to_solr_format'
3
- require_relative '../helpers/tdar_format'
4
-
5
- module SearchSolrTools
6
- module Selectors
7
- # The hash contains keys that should map to the fields in the solr schema,
8
- # the keys are called selectors and are in charge of selecting the nodes
9
- # from the ISO document, applying the default value if none of the xpaths
10
- # resolved to a value and formatting the field. xpaths and multivalue are
11
- # required, default_value and format are optional
12
- TDAR = {
13
- authoritative_id: {
14
- xpaths: ['.//atom:link/@href'],
15
- multivalue: false,
16
- format: proc do |node|
17
- 'TDAR-' << node.text.split('/')[4] || ''
18
- end
19
- },
20
- title: {
21
- xpaths: ['.//atom:title'],
22
- multivalue: false
23
- },
24
- summary: {
25
- xpaths: ['.//atom:summary'],
26
- multivalue: false
27
- },
28
- data_centers: {
29
- xpaths: [''],
30
- default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:TDAR][:long_name]],
31
- multivalue: false
32
- },
33
- authors: {
34
- xpaths: ['.//atom:author/atom:name'],
35
- multivalue: true
36
- },
37
- keywords: {
38
- xpaths: [''],
39
- multivalue: true,
40
- format: Helpers::IsoToSolrFormat::KEYWORDS
41
- },
42
- last_revision_date: {
43
- xpaths: ['.//atom:updated'],
44
- default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
45
- multivalue: false,
46
- format: Helpers::SolrFormat::DATE
47
- },
48
- dataset_url: {
49
- xpaths: ['.//atom:link/@href'],
50
- multivalue: false
51
- },
52
- spatial_coverages: {
53
- xpaths: ['.//georss:box'],
54
- multivalue: true,
55
- format: Helpers::TdarFormat::SPATIAL_DISPLAY
56
- },
57
- spatial: {
58
- xpaths: ['.//georss:box'],
59
- multivalue: true,
60
- format: Helpers::TdarFormat::SPATIAL_INDEX
61
- },
62
- spatial_area: {
63
- xpaths: ['.//georss:box'],
64
- multivalue: false,
65
- reduce: Helpers::TdarFormat::MAX_SPATIAL_AREA,
66
- format: Helpers::TdarFormat::SPATIAL_AREA
67
- },
68
- source: {
69
- xpaths: [''],
70
- default_values: ['ADE'],
71
- multivalue: false
72
- },
73
- facet_data_center: {
74
- xpaths: [''],
75
- default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:TDAR][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:TDAR][:short_name]}"],
76
- multivalue: false
77
- },
78
- facet_spatial_scope: {
79
- xpaths: ['.//georss:box'],
80
- multivalue: true,
81
- format: Helpers::TdarFormat::FACET_SPATIAL_SCOPE
82
- },
83
- facet_temporal_duration: {
84
- xpaths: [''],
85
- default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
86
- format: Helpers::IsoToSolrFormat::FACET_TEMPORAL_DURATION,
87
- multivalue: true
88
- }
89
- }
90
- end
91
- end