search_solr_tools 6.1.0 → 6.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +11 -2
  3. data/bin/search_solr_tools +5 -17
  4. data/lib/search_solr_tools/config/environments.rb +3 -1
  5. data/lib/search_solr_tools/config/environments.yaml +0 -32
  6. data/lib/search_solr_tools/errors/harvest_error.rb +44 -31
  7. data/lib/search_solr_tools/harvesters/auto_suggest.rb +5 -3
  8. data/lib/search_solr_tools/harvesters/base.rb +21 -20
  9. data/lib/search_solr_tools/harvesters/nsidc_auto_suggest.rb +7 -5
  10. data/lib/search_solr_tools/harvesters/nsidc_json.rb +9 -8
  11. data/lib/search_solr_tools/helpers/bounding_box_util.rb +8 -8
  12. data/lib/search_solr_tools/helpers/facet_configuration.rb +3 -1
  13. data/lib/search_solr_tools/helpers/harvest_status.rb +10 -8
  14. data/lib/search_solr_tools/helpers/iso_namespaces.rb +3 -1
  15. data/lib/search_solr_tools/helpers/solr_format.rb +25 -45
  16. data/lib/search_solr_tools/helpers/translate_spatial_coverage.rb +13 -10
  17. data/lib/search_solr_tools/helpers/translate_temporal_coverage.rb +2 -0
  18. data/lib/search_solr_tools/translators/nsidc_json.rb +48 -44
  19. data/lib/search_solr_tools/version.rb +3 -1
  20. data/lib/search_solr_tools.rb +3 -2
  21. metadata +3 -45
  22. data/lib/search_solr_tools/harvesters/adc.rb +0 -49
  23. data/lib/search_solr_tools/harvesters/ade_auto_suggest.rb +0 -46
  24. data/lib/search_solr_tools/harvesters/bcodmo.rb +0 -64
  25. data/lib/search_solr_tools/harvesters/data_one.rb +0 -49
  26. data/lib/search_solr_tools/harvesters/echo.rb +0 -52
  27. data/lib/search_solr_tools/harvesters/eol.rb +0 -51
  28. data/lib/search_solr_tools/harvesters/gtnp.rb +0 -67
  29. data/lib/search_solr_tools/harvesters/ices.rb +0 -58
  30. data/lib/search_solr_tools/harvesters/ncdc_paleo.rb +0 -62
  31. data/lib/search_solr_tools/harvesters/nmi.rb +0 -34
  32. data/lib/search_solr_tools/harvesters/nodc.rb +0 -75
  33. data/lib/search_solr_tools/harvesters/oai.rb +0 -62
  34. data/lib/search_solr_tools/harvesters/pdc.rb +0 -40
  35. data/lib/search_solr_tools/harvesters/r2r.rb +0 -61
  36. data/lib/search_solr_tools/harvesters/rda.rb +0 -35
  37. data/lib/search_solr_tools/harvesters/tdar.rb +0 -71
  38. data/lib/search_solr_tools/harvesters/usgs.rb +0 -76
  39. data/lib/search_solr_tools/helpers/csw_iso_query_builder.rb +0 -29
  40. data/lib/search_solr_tools/helpers/data_one_format.rb +0 -74
  41. data/lib/search_solr_tools/helpers/iso_to_solr.rb +0 -97
  42. data/lib/search_solr_tools/helpers/iso_to_solr_format.rb +0 -197
  43. data/lib/search_solr_tools/helpers/ncdc_paleo_format.rb +0 -61
  44. data/lib/search_solr_tools/helpers/query_builder.rb +0 -13
  45. data/lib/search_solr_tools/helpers/r2r_format.rb +0 -25
  46. data/lib/search_solr_tools/helpers/selectors.rb +0 -22
  47. data/lib/search_solr_tools/helpers/tdar_format.rb +0 -70
  48. data/lib/search_solr_tools/helpers/usgs_format.rb +0 -50
  49. data/lib/search_solr_tools/selectors/adc.rb +0 -96
  50. data/lib/search_solr_tools/selectors/data_one.rb +0 -96
  51. data/lib/search_solr_tools/selectors/echo_iso.rb +0 -112
  52. data/lib/search_solr_tools/selectors/ices_iso.rb +0 -108
  53. data/lib/search_solr_tools/selectors/ncdc_paleo.rb +0 -90
  54. data/lib/search_solr_tools/selectors/nmi.rb +0 -107
  55. data/lib/search_solr_tools/selectors/nodc_iso.rb +0 -108
  56. data/lib/search_solr_tools/selectors/pdc_iso.rb +0 -109
  57. data/lib/search_solr_tools/selectors/r2r.rb +0 -115
  58. data/lib/search_solr_tools/selectors/rda.rb +0 -107
  59. data/lib/search_solr_tools/selectors/tdar_opensearch.rb +0 -91
  60. data/lib/search_solr_tools/selectors/usgs_iso.rb +0 -107
  61. data/lib/search_solr_tools/translators/bcodmo_json.rb +0 -89
  62. data/lib/search_solr_tools/translators/eol_to_solr.rb +0 -84
  63. data/lib/search_solr_tools/translators/gtnp_json.rb +0 -59
@@ -1,107 +0,0 @@
1
- require_relative '../helpers/solr_format'
2
- require_relative '../helpers/iso_to_solr_format'
3
-
4
- module SearchSolrTools
5
- module Selectors
6
- # The hash contains keys that should map to the fields in the solr schema,
7
- # the keys are called selectors and are in charge of selecting the nodes
8
- # from the ISO document, applying the default value if none of the xpaths
9
- # resolved to a value and formatting the field. xpaths and multivalue are
10
- # required, default_value and format are optional.
11
- NMI = {
12
- authoritative_id: {
13
- xpaths: ['.//oai:header/oai:identifier'],
14
- multivalue: false
15
- },
16
- title: {
17
- xpaths: ['.//dif:Entry_Title'],
18
- multivalue: false
19
- },
20
- summary: {
21
- xpaths: ['.//dif:Summary'],
22
- multivalue: false
23
- },
24
- data_centers: {
25
- xpaths: [''],
26
- default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:NMI][:long_name]],
27
- multivalue: false
28
- },
29
- authors: {
30
- xpaths: [''],
31
- multivalue: true
32
- },
33
- keywords: {
34
- xpaths: [
35
- './/dif:Parameters/dif:Category',
36
- './/dif:Parameters/dif:Topic',
37
- './/dif:Parameters/dif:Term',
38
- './/dif:Parameters/dif:Variable_Level_1'
39
- ].reverse,
40
- multivalue: true
41
- },
42
- last_revision_date: {
43
- xpaths: ['.//dif:Last_DIF_Revision_Date'],
44
- default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
45
- multivalue: false,
46
- format: Helpers::SolrFormat::DATE
47
- },
48
- dataset_url: {
49
- xpaths: ['.//dif:Related_URL/dif:URL'],
50
- multivalue: false
51
- },
52
- spatial_coverages: {
53
- xpaths: ['.//dif:Spatial_Coverage'],
54
- multivalue: true,
55
- format: Helpers::IsoToSolrFormat::SPATIAL_DISPLAY
56
- },
57
- spatial: {
58
- xpaths: ['.//dif:Spatial_Coverage'],
59
- multivalue: true,
60
- format: Helpers::IsoToSolrFormat::SPATIAL_INDEX
61
- },
62
- spatial_area: {
63
- xpaths: ['.//dif:Spatial_Coverage'],
64
- multivalue: false,
65
- reduce: Helpers::IsoToSolrFormat::MAX_SPATIAL_AREA,
66
- format: Helpers::IsoToSolrFormat::SPATIAL_AREA
67
- },
68
- temporal: {
69
- xpaths: ['.//dif:Temporal_Coverage'],
70
- multivalue: true,
71
- format: Helpers::IsoToSolrFormat::TEMPORAL_INDEX_STRING
72
- },
73
- temporal_coverages: {
74
- xpaths: ['.//dif:Temporal_Coverage'],
75
- multivalue: true,
76
- format: Helpers::IsoToSolrFormat::TEMPORAL_DISPLAY_STRING
77
- },
78
- temporal_duration: {
79
- xpaths: ['.//dif:Temporal_Coverage'],
80
- multivalue: false,
81
- reduce: Helpers::SolrFormat::REDUCE_TEMPORAL_DURATION,
82
- format: Helpers::IsoToSolrFormat::TEMPORAL_DURATION
83
- },
84
- source: {
85
- xpaths: [''],
86
- default_values: ['ADE'],
87
- multivalue: false
88
- },
89
- facet_data_center: {
90
- xpaths: [''],
91
- default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:NMI][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:NMI][:short_name]}"],
92
- multivalue: false
93
- },
94
- facet_spatial_scope: {
95
- xpaths: ['.//dif:Spatial_Coverage'],
96
- multivalue: true,
97
- format: Helpers::IsoToSolrFormat::FACET_SPATIAL_SCOPE
98
- },
99
- facet_temporal_duration: {
100
- xpaths: ['.//dif:Temporal_Coverage'],
101
- default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
102
- format: Helpers::IsoToSolrFormat::FACET_TEMPORAL_DURATION,
103
- multivalue: true
104
- }
105
- }
106
- end
107
- end
@@ -1,108 +0,0 @@
1
- require_relative '../helpers/solr_format'
2
- require_relative '../helpers/iso_to_solr_format'
3
-
4
- module SearchSolrTools
5
- module Selectors
6
- # The hash contains keys that should map to the fields in the solr schema,
7
- # the keys are called selectors and are in charge of selecting the nodes
8
- # from the ISO document, applying the default value if none of the xpaths
9
- # resolved to a value and formatting the field. xpaths and multivalue are
10
- # required, default_value and format are optional
11
- NODC = {
12
- authoritative_id: {
13
- xpaths: ['.//gmd:fileIdentifier/gco:CharacterString'],
14
- multivalue: false
15
- },
16
- title: {
17
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:title/gco:CharacterString'],
18
- multivalue: false
19
- },
20
- summary: {
21
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:abstract/gco:CharacterString'],
22
- multivalue: false
23
- },
24
- data_centers: {
25
- xpaths: [''],
26
- default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:NODC][:long_name]],
27
- multivalue: false
28
- },
29
- authors: {
30
- xpaths: [".//gmd:CI_ResponsibleParty[./gmd:role/gmd:CI_RoleCode[@codeListValue='principalInvestigator']]/gmd:individualName/gco:CharacterString"],
31
- multivalue: true
32
- },
33
- keywords: {
34
- xpaths: ['.//gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gco:CharacterString',
35
- './/gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gmx:Anchor'],
36
- multivalue: true
37
- },
38
- last_revision_date: {
39
- xpaths: ['.//gmd:dateStamp/gco:Date'],
40
- default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
41
- multivalue: false,
42
- format: Helpers::SolrFormat::DATE
43
- },
44
- dataset_url: {
45
- xpaths: ['.//gmd:MD_DigitalTransferOptions/gmd:onLine/gmd:CI_OnlineResource[contains(./gmd:protocol/gco:CharacterString/text(),"ftp")]/gmd:linkage/gmd:URL',
46
- './/gmd:MD_DigitalTransferOptions/gmd:onLine/gmd:CI_OnlineResource[contains(./gmd:protocol/gco:CharacterString/text(),"FTP")]/gmd:linkage/gmd:URL'],
47
- multivalue: false
48
- },
49
- spatial_coverages: {
50
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
51
- multivalue: true,
52
- format: Helpers::IsoToSolrFormat::SPATIAL_DISPLAY
53
- },
54
- spatial: {
55
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
56
- multivalue: true,
57
- format: Helpers::IsoToSolrFormat::SPATIAL_INDEX
58
- },
59
- spatial_area: {
60
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
61
- multivalue: false,
62
- reduce: Helpers::IsoToSolrFormat::MAX_SPATIAL_AREA,
63
- format: Helpers::IsoToSolrFormat::SPATIAL_AREA
64
- },
65
- temporal_coverages: {
66
- xpaths: ['.//gmd:EX_TemporalExtent'],
67
- multivalue: true,
68
- format: Helpers::IsoToSolrFormat::TEMPORAL_DISPLAY_STRING_FORMATTED
69
- },
70
- temporal_duration: {
71
- xpaths: ['.//gmd:EX_TemporalExtent'],
72
- multivalue: false,
73
- reduce: Helpers::SolrFormat::REDUCE_TEMPORAL_DURATION,
74
- format: Helpers::IsoToSolrFormat::TEMPORAL_DURATION
75
- },
76
- temporal: {
77
- xpaths: ['.//gmd:EX_TemporalExtent'],
78
- multivalue: true,
79
- format: Helpers::IsoToSolrFormat::TEMPORAL_INDEX_STRING
80
- },
81
- sensors: {
82
- xpaths: ['.//gmi:acquisitionInformation/gmi:MI_AcquisitionInformation/gmi:instrument/gmi:MI_Instrument/gmi:citation/gmd:CI_Citation/gmd:title/gco:CharacterString'],
83
- multivalue: true
84
- },
85
- source: {
86
- xpaths: [''],
87
- default_values: ['ADE'],
88
- multivalue: false
89
- },
90
- facet_data_center: {
91
- xpaths: [''],
92
- default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:NODC][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:NODC][:short_name]}"],
93
- multivalue: false
94
- },
95
- facet_spatial_scope: {
96
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
97
- multivalue: true,
98
- format: Helpers::IsoToSolrFormat::FACET_SPATIAL_SCOPE
99
- },
100
- facet_temporal_duration: {
101
- xpaths: ['.//gmd:EX_TemporalExtent'],
102
- default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
103
- format: Helpers::IsoToSolrFormat::FACET_TEMPORAL_DURATION,
104
- multivalue: true
105
- }
106
- }
107
- end
108
- end
@@ -1,109 +0,0 @@
1
- require_relative '../helpers/solr_format'
2
- require_relative '../helpers/iso_to_solr_format'
3
-
4
- module SearchSolrTools
5
- module Selectors
6
- # The hash contains keys that should map to the fields in the solr schema,
7
- # the keys are called selectors and are in charge of selecting the nodes
8
- # from the ISO document, applying the default value if none of the xpaths
9
- # resolved to a value and formatting the field. xpaths and multivalue are
10
- # required, default_value, format, and reduce are optional.
11
- #
12
- # reduce takes the formatted result of multiple nodes and produces a single
13
- # result. This is for fields that are not multivalued, but their value
14
- # should consider information from all the nodes (for example, storing
15
- # only the maximum duration from multiple temporal coverage fields, taking
16
- # the sum of multiple spatial areas)
17
- PDC = {
18
- authoritative_id: {
19
- xpaths: ['.//oai:header/oai:identifier'],
20
- multivalue: false
21
- },
22
- title: {
23
- xpaths: ['.//gmd:citation//gmd:title/gco:CharacterString'],
24
- multivalue: false
25
- },
26
- summary: {
27
- xpaths: ['.//gmd:abstract/gco:CharacterString'],
28
- multivalue: false
29
- },
30
- data_centers: {
31
- xpaths: [''],
32
- default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:PDC][:long_name]],
33
- multivalue: false
34
- },
35
- authors: {
36
- xpaths: ['.//gmd:identificationInfo//gmd:citedResponsibleParty//gmd:individualName/gco:CharacterString'],
37
- multivalue: true
38
- },
39
- keywords: {
40
- xpaths: ['.//gmd:descriptiveKeywords//gmd:keyword/gco:CharacterString'],
41
- multivalue: true
42
- },
43
- last_revision_date: {
44
- xpaths: ['.//oai:header/oai:datestamp'],
45
- default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
46
- multivalue: false,
47
- format: Helpers::SolrFormat::DATE
48
- },
49
- dataset_url: {
50
- xpaths: ['.//gmd:dataSetURI/gco:CharacterString'],
51
- multivalue: false,
52
- format: Helpers::SolrFormat::HTTP_URL_FORMAT
53
- },
54
- spatial_coverages: {
55
- xpaths: ['.//gmd:extent//gmd:EX_GeographicBoundingBox'],
56
- multivalue: true,
57
- format: Helpers::IsoToSolrFormat::SPATIAL_DISPLAY
58
- },
59
- spatial: {
60
- xpaths: ['.//gmd:extent//gmd:EX_GeographicBoundingBox'],
61
- multivalue: true,
62
- format: Helpers::IsoToSolrFormat::SPATIAL_INDEX
63
- },
64
- spatial_area: {
65
- xpaths: ['.//gmd:extent//gmd:EX_GeographicBoundingBox'],
66
- multivalue: false,
67
- reduce: Helpers::IsoToSolrFormat::MAX_SPATIAL_AREA,
68
- format: Helpers::IsoToSolrFormat::SPATIAL_AREA
69
- },
70
- temporal: {
71
- xpaths: ['.//gmd:EX_TemporalExtent/gmd:extent'],
72
- multivalue: true,
73
- format: Helpers::IsoToSolrFormat::TEMPORAL_INDEX_STRING
74
- },
75
- temporal_coverages: {
76
- xpaths: ['.//gmd:EX_TemporalExtent/gmd:extent'],
77
- multivalue: true,
78
- format: Helpers::IsoToSolrFormat::TEMPORAL_DISPLAY_STRING
79
- },
80
- temporal_duration: {
81
- xpaths: ['.//gmd:EX_TemporalExtent/gmd:extent'],
82
- multivalue: false,
83
- reduce: Helpers::SolrFormat::REDUCE_TEMPORAL_DURATION,
84
- format: Helpers::IsoToSolrFormat::TEMPORAL_DURATION
85
- },
86
- source: {
87
- xpaths: [''],
88
- default_values: ['ADE'],
89
- multivalue: false
90
- },
91
- facet_data_center: {
92
- xpaths: [''],
93
- default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:PDC][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:PDC][:short_name]}"],
94
- multivalue: false
95
- },
96
- facet_spatial_scope: {
97
- xpaths: ['.//gmd:extent//gmd:EX_GeographicBoundingBox'],
98
- multivalue: true,
99
- format: Helpers::IsoToSolrFormat::FACET_SPATIAL_SCOPE
100
- },
101
- facet_temporal_duration: {
102
- xpaths: ['.//gmd:EX_TemporalExtent/gmd:extent'],
103
- default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
104
- format: Helpers::IsoToSolrFormat::FACET_TEMPORAL_DURATION,
105
- multivalue: true
106
- }
107
- }
108
- end
109
- end
@@ -1,115 +0,0 @@
1
- require_relative '../helpers/solr_format'
2
- require_relative '../helpers/iso_to_solr_format'
3
- require_relative '../helpers/r2r_format'
4
-
5
- module SearchSolrTools
6
- module Selectors
7
- # The hash contains keys that should map to the fields in the solr schema,
8
- # the keys are called selectors and are in charge of selecting the nodes
9
- # from the ISO document, applying the default value if none of the xpaths
10
- # resolved to a value and formatting the field. xpaths and multivalue are
11
- # required, default_value, format, and reduce are optional.
12
- #
13
- # reduce takes the formatted result of multiple nodes and produces a single
14
- # result. This is for fields that are not multivalued, but their value
15
- # should consider information from all the nodes (for example, storing
16
- # only the maximum duration from multiple temporal coverage fields, taking
17
- # the sum of multiple spatial areas)
18
- R2R = {
19
- authoritative_id: {
20
- xpaths: ['.//gmd:fileIdentifier/gco:CharacterString'],
21
- multivalue: false
22
- },
23
- title: {
24
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:title/gmx:Anchor'],
25
- multivalue: false
26
- },
27
- summary: {
28
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:abstract/gco:CharacterString'],
29
- multivalue: false
30
- },
31
- data_centers: {
32
- xpaths: [''],
33
- default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:R2R][:long_name]],
34
- multivalue: false
35
- },
36
- authors: {
37
- xpaths: [".//gmd:CI_ResponsibleParty[./gmd:role/gmd:CI_RoleCode[@codeListValue='contributor']]/gmd:individualName/gmx:Anchor"],
38
- multivalue: true
39
- },
40
- keywords: {
41
- xpaths: ['.//gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gco:CharacterString',
42
- './/gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gmx:Anchor'],
43
- multivalue: true
44
- },
45
- last_revision_date: {
46
- xpaths: ['.//gmd:dateStamp/gco:Date', './/gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:date/gmd:CI_Date/gmd:date/gco:DateTime'],
47
- default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
48
- multivalue: false,
49
- format: Helpers::SolrFormat::DATE
50
- },
51
- dataset_url: {
52
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:title/gmx:Anchor/@xlink:href'],
53
- multivalue: false,
54
- format: Helpers::IsoToSolrFormat::DATASET_URL
55
- },
56
- spatial_coverages: {
57
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
58
- multivalue: true,
59
- format: Helpers::IsoToSolrFormat::SPATIAL_DISPLAY
60
- },
61
- spatial: {
62
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
63
- multivalue: true,
64
- format: Helpers::IsoToSolrFormat::SPATIAL_INDEX
65
- },
66
- spatial_area: {
67
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
68
- multivalue: false,
69
- reduce: Helpers::IsoToSolrFormat::MAX_SPATIAL_AREA,
70
- format: Helpers::IsoToSolrFormat::SPATIAL_AREA
71
- },
72
- temporal_coverages: {
73
- xpaths: ['.//gmd:EX_Extent[@id="temporalExtent"]'],
74
- multivalue: false,
75
- format: Helpers::R2RFormat::TEMPORAL_DISPLAY_STRING
76
- },
77
- temporal_duration: {
78
- xpaths: ['.//gmd:EX_Extent[@id="temporalExtent"]'],
79
- multivalue: false,
80
- reduce: Helpers::SolrFormat::REDUCE_TEMPORAL_DURATION,
81
- format: Helpers::R2RFormat::TEMPORAL_DURATION
82
- },
83
- temporal: {
84
- xpaths: ['.//gmd:EX_Extent[@id="temporalExtent"]'],
85
- multivalue: false,
86
- format: Helpers::R2RFormat::TEMPORAL_INDEX_STRING
87
- },
88
- sensors: {
89
- xpaths: ['.//gmi:acquisitionInformation/gmi:MI_AcquisitionInformation/gmi:instrument/gmi:MI_Instrument/gmi:type/gmx:Anchor'],
90
- multivalue: true
91
- },
92
- source: {
93
- xpaths: [''],
94
- default_values: ['ADE'],
95
- multivalue: false
96
- },
97
- facet_data_center: {
98
- xpaths: [''],
99
- default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:R2R][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:R2R][:short_name]}"],
100
- multivalue: false
101
- },
102
- facet_spatial_scope: {
103
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
104
- multivalue: true,
105
- format: Helpers::IsoToSolrFormat::FACET_SPATIAL_SCOPE
106
- },
107
- facet_temporal_duration: {
108
- xpaths: ['.//gmd:EX_Extent[@id="temporalExtent"]'],
109
- default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
110
- format: Helpers::R2RFormat::FACET_TEMPORAL_DURATION,
111
- multivalue: true
112
- }
113
- }
114
- end
115
- end
@@ -1,107 +0,0 @@
1
- require_relative '../helpers/solr_format'
2
- require_relative '../helpers/iso_to_solr_format'
3
-
4
- module SearchSolrTools
5
- module Selectors
6
- # The hash contains keys that should map to the fields in the solr schema,
7
- # the keys are called selectors and are in charge of selecting the nodes
8
- # from the ISO document, applying the default value if none of the xpaths
9
- # resolved to a value and formatting the field. xpaths and multivalue are
10
- # required, default_value and format are optional.
11
- RDA = {
12
- authoritative_id: {
13
- xpaths: ['.//oai:header/oai:identifier'],
14
- multivalue: false
15
- },
16
- title: {
17
- xpaths: ['.//dif:Entry_Title'],
18
- multivalue: false
19
- },
20
- summary: {
21
- xpaths: ['.//dif:Summary'],
22
- multivalue: false
23
- },
24
- data_centers: {
25
- xpaths: [''],
26
- default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:RDA][:long_name]],
27
- multivalue: false
28
- },
29
- authors: {
30
- xpaths: [''],
31
- multivalue: true
32
- },
33
- keywords: {
34
- xpaths: [
35
- './/dif:Parameters/dif:Category',
36
- './/dif:Parameters/dif:Topic',
37
- './/dif:Parameters/dif:Term',
38
- './/dif:Parameters/dif:Variable_Level_1'
39
- ].reverse,
40
- multivalue: true
41
- },
42
- last_revision_date: {
43
- xpaths: ['.//dif:Last_DIF_Revision_Date'],
44
- default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
45
- multivalue: false,
46
- format: Helpers::SolrFormat::DATE
47
- },
48
- dataset_url: {
49
- xpaths: ['.//dif:Related_URL/dif:URL'],
50
- multivalue: false
51
- },
52
- spatial_coverages: {
53
- xpaths: ['.//dif:Spatial_Coverage'],
54
- multivalue: true,
55
- format: Helpers::IsoToSolrFormat::SPATIAL_DISPLAY
56
- },
57
- spatial: {
58
- xpaths: ['.//dif:Spatial_Coverage'],
59
- multivalue: true,
60
- format: Helpers::IsoToSolrFormat::SPATIAL_INDEX
61
- },
62
- spatial_area: {
63
- xpaths: ['.//dif:Spatial_Coverage'],
64
- multivalue: false,
65
- reduce: Helpers::IsoToSolrFormat::MAX_SPATIAL_AREA,
66
- format: Helpers::IsoToSolrFormat::SPATIAL_AREA
67
- },
68
- temporal: {
69
- xpaths: ['.//dif:Temporal_Coverage'],
70
- multivalue: true,
71
- format: Helpers::IsoToSolrFormat::TEMPORAL_INDEX_STRING
72
- },
73
- temporal_coverages: {
74
- xpaths: ['.//dif:Temporal_Coverage'],
75
- multivalue: true,
76
- format: Helpers::IsoToSolrFormat::TEMPORAL_DISPLAY_STRING
77
- },
78
- temporal_duration: {
79
- xpaths: ['.//dif:Temporal_Coverage'],
80
- multivalue: false,
81
- reduce: Helpers::SolrFormat::REDUCE_TEMPORAL_DURATION,
82
- format: Helpers::IsoToSolrFormat::TEMPORAL_DURATION
83
- },
84
- source: {
85
- xpaths: [''],
86
- default_values: ['ADE'],
87
- multivalue: false
88
- },
89
- facet_data_center: {
90
- xpaths: [''],
91
- default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:RDA][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:RDA][:short_name]}"],
92
- multivalue: false
93
- },
94
- facet_spatial_scope: {
95
- xpaths: ['.//dif:Spatial_Coverage'],
96
- multivalue: true,
97
- format: Helpers::IsoToSolrFormat::FACET_SPATIAL_SCOPE
98
- },
99
- facet_temporal_duration: {
100
- xpaths: ['.//dif:Temporal_Coverage'],
101
- default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
102
- format: Helpers::IsoToSolrFormat::FACET_TEMPORAL_DURATION,
103
- multivalue: true
104
- }
105
- }
106
- end
107
- end
@@ -1,91 +0,0 @@
1
- require_relative '../helpers/solr_format'
2
- require_relative '../helpers/iso_to_solr_format'
3
- require_relative '../helpers/tdar_format'
4
-
5
- module SearchSolrTools
6
- module Selectors
7
- # The hash contains keys that should map to the fields in the solr schema,
8
- # the keys are called selectors and are in charge of selecting the nodes
9
- # from the ISO document, applying the default value if none of the xpaths
10
- # resolved to a value and formatting the field. xpaths and multivalue are
11
- # required, default_value and format are optional
12
- TDAR = {
13
- authoritative_id: {
14
- xpaths: ['.//atom:link/@href'],
15
- multivalue: false,
16
- format: proc do |node|
17
- 'TDAR-' << node.text.split('/')[4] || ''
18
- end
19
- },
20
- title: {
21
- xpaths: ['.//atom:title'],
22
- multivalue: false
23
- },
24
- summary: {
25
- xpaths: ['.//atom:summary'],
26
- multivalue: false
27
- },
28
- data_centers: {
29
- xpaths: [''],
30
- default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:TDAR][:long_name]],
31
- multivalue: false
32
- },
33
- authors: {
34
- xpaths: ['.//atom:author/atom:name'],
35
- multivalue: true
36
- },
37
- keywords: {
38
- xpaths: [''],
39
- multivalue: true,
40
- format: Helpers::IsoToSolrFormat::KEYWORDS
41
- },
42
- last_revision_date: {
43
- xpaths: ['.//atom:updated'],
44
- default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
45
- multivalue: false,
46
- format: Helpers::SolrFormat::DATE
47
- },
48
- dataset_url: {
49
- xpaths: ['.//atom:link/@href'],
50
- multivalue: false
51
- },
52
- spatial_coverages: {
53
- xpaths: ['.//georss:box'],
54
- multivalue: true,
55
- format: Helpers::TdarFormat::SPATIAL_DISPLAY
56
- },
57
- spatial: {
58
- xpaths: ['.//georss:box'],
59
- multivalue: true,
60
- format: Helpers::TdarFormat::SPATIAL_INDEX
61
- },
62
- spatial_area: {
63
- xpaths: ['.//georss:box'],
64
- multivalue: false,
65
- reduce: Helpers::TdarFormat::MAX_SPATIAL_AREA,
66
- format: Helpers::TdarFormat::SPATIAL_AREA
67
- },
68
- source: {
69
- xpaths: [''],
70
- default_values: ['ADE'],
71
- multivalue: false
72
- },
73
- facet_data_center: {
74
- xpaths: [''],
75
- default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:TDAR][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:TDAR][:short_name]}"],
76
- multivalue: false
77
- },
78
- facet_spatial_scope: {
79
- xpaths: ['.//georss:box'],
80
- multivalue: true,
81
- format: Helpers::TdarFormat::FACET_SPATIAL_SCOPE
82
- },
83
- facet_temporal_duration: {
84
- xpaths: [''],
85
- default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
86
- format: Helpers::IsoToSolrFormat::FACET_TEMPORAL_DURATION,
87
- multivalue: true
88
- }
89
- }
90
- end
91
- end