search_solr_tools 6.1.0 → 6.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/bin/search_solr_tools +1 -13
  4. data/lib/search_solr_tools/config/environments.yaml +0 -32
  5. data/lib/search_solr_tools/harvesters/base.rb +0 -1
  6. data/lib/search_solr_tools/helpers/solr_format.rb +0 -15
  7. data/lib/search_solr_tools/helpers/translate_spatial_coverage.rb +0 -1
  8. data/lib/search_solr_tools/version.rb +1 -1
  9. data/lib/search_solr_tools.rb +1 -2
  10. metadata +2 -44
  11. data/lib/search_solr_tools/harvesters/adc.rb +0 -49
  12. data/lib/search_solr_tools/harvesters/ade_auto_suggest.rb +0 -46
  13. data/lib/search_solr_tools/harvesters/bcodmo.rb +0 -64
  14. data/lib/search_solr_tools/harvesters/data_one.rb +0 -49
  15. data/lib/search_solr_tools/harvesters/echo.rb +0 -52
  16. data/lib/search_solr_tools/harvesters/eol.rb +0 -51
  17. data/lib/search_solr_tools/harvesters/gtnp.rb +0 -67
  18. data/lib/search_solr_tools/harvesters/ices.rb +0 -58
  19. data/lib/search_solr_tools/harvesters/ncdc_paleo.rb +0 -62
  20. data/lib/search_solr_tools/harvesters/nmi.rb +0 -34
  21. data/lib/search_solr_tools/harvesters/nodc.rb +0 -75
  22. data/lib/search_solr_tools/harvesters/oai.rb +0 -62
  23. data/lib/search_solr_tools/harvesters/pdc.rb +0 -40
  24. data/lib/search_solr_tools/harvesters/r2r.rb +0 -61
  25. data/lib/search_solr_tools/harvesters/rda.rb +0 -35
  26. data/lib/search_solr_tools/harvesters/tdar.rb +0 -71
  27. data/lib/search_solr_tools/harvesters/usgs.rb +0 -76
  28. data/lib/search_solr_tools/helpers/csw_iso_query_builder.rb +0 -29
  29. data/lib/search_solr_tools/helpers/data_one_format.rb +0 -74
  30. data/lib/search_solr_tools/helpers/iso_to_solr.rb +0 -97
  31. data/lib/search_solr_tools/helpers/iso_to_solr_format.rb +0 -197
  32. data/lib/search_solr_tools/helpers/ncdc_paleo_format.rb +0 -61
  33. data/lib/search_solr_tools/helpers/query_builder.rb +0 -13
  34. data/lib/search_solr_tools/helpers/r2r_format.rb +0 -25
  35. data/lib/search_solr_tools/helpers/selectors.rb +0 -22
  36. data/lib/search_solr_tools/helpers/tdar_format.rb +0 -70
  37. data/lib/search_solr_tools/helpers/usgs_format.rb +0 -50
  38. data/lib/search_solr_tools/selectors/adc.rb +0 -96
  39. data/lib/search_solr_tools/selectors/data_one.rb +0 -96
  40. data/lib/search_solr_tools/selectors/echo_iso.rb +0 -112
  41. data/lib/search_solr_tools/selectors/ices_iso.rb +0 -108
  42. data/lib/search_solr_tools/selectors/ncdc_paleo.rb +0 -90
  43. data/lib/search_solr_tools/selectors/nmi.rb +0 -107
  44. data/lib/search_solr_tools/selectors/nodc_iso.rb +0 -108
  45. data/lib/search_solr_tools/selectors/pdc_iso.rb +0 -109
  46. data/lib/search_solr_tools/selectors/r2r.rb +0 -115
  47. data/lib/search_solr_tools/selectors/rda.rb +0 -107
  48. data/lib/search_solr_tools/selectors/tdar_opensearch.rb +0 -91
  49. data/lib/search_solr_tools/selectors/usgs_iso.rb +0 -107
  50. data/lib/search_solr_tools/translators/bcodmo_json.rb +0 -89
  51. data/lib/search_solr_tools/translators/eol_to_solr.rb +0 -84
  52. data/lib/search_solr_tools/translators/gtnp_json.rb +0 -59
@@ -1,109 +0,0 @@
1
- require_relative '../helpers/solr_format'
2
- require_relative '../helpers/iso_to_solr_format'
3
-
4
- module SearchSolrTools
5
- module Selectors
6
- # The hash contains keys that should map to the fields in the solr schema,
7
- # the keys are called selectors and are in charge of selecting the nodes
8
- # from the ISO document, applying the default value if none of the xpaths
9
- # resolved to a value and formatting the field. xpaths and multivalue are
10
- # required, default_value, format, and reduce are optional.
11
- #
12
- # reduce takes the formatted result of multiple nodes and produces a single
13
- # result. This is for fields that are not multivalued, but their value
14
- # should consider information from all the nodes (for example, storing
15
- # only the maximum duration from multiple temporal coverage fields, taking
16
- # the sum of multiple spatial areas)
17
- PDC = {
18
- authoritative_id: {
19
- xpaths: ['.//oai:header/oai:identifier'],
20
- multivalue: false
21
- },
22
- title: {
23
- xpaths: ['.//gmd:citation//gmd:title/gco:CharacterString'],
24
- multivalue: false
25
- },
26
- summary: {
27
- xpaths: ['.//gmd:abstract/gco:CharacterString'],
28
- multivalue: false
29
- },
30
- data_centers: {
31
- xpaths: [''],
32
- default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:PDC][:long_name]],
33
- multivalue: false
34
- },
35
- authors: {
36
- xpaths: ['.//gmd:identificationInfo//gmd:citedResponsibleParty//gmd:individualName/gco:CharacterString'],
37
- multivalue: true
38
- },
39
- keywords: {
40
- xpaths: ['.//gmd:descriptiveKeywords//gmd:keyword/gco:CharacterString'],
41
- multivalue: true
42
- },
43
- last_revision_date: {
44
- xpaths: ['.//oai:header/oai:datestamp'],
45
- default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
46
- multivalue: false,
47
- format: Helpers::SolrFormat::DATE
48
- },
49
- dataset_url: {
50
- xpaths: ['.//gmd:dataSetURI/gco:CharacterString'],
51
- multivalue: false,
52
- format: Helpers::SolrFormat::HTTP_URL_FORMAT
53
- },
54
- spatial_coverages: {
55
- xpaths: ['.//gmd:extent//gmd:EX_GeographicBoundingBox'],
56
- multivalue: true,
57
- format: Helpers::IsoToSolrFormat::SPATIAL_DISPLAY
58
- },
59
- spatial: {
60
- xpaths: ['.//gmd:extent//gmd:EX_GeographicBoundingBox'],
61
- multivalue: true,
62
- format: Helpers::IsoToSolrFormat::SPATIAL_INDEX
63
- },
64
- spatial_area: {
65
- xpaths: ['.//gmd:extent//gmd:EX_GeographicBoundingBox'],
66
- multivalue: false,
67
- reduce: Helpers::IsoToSolrFormat::MAX_SPATIAL_AREA,
68
- format: Helpers::IsoToSolrFormat::SPATIAL_AREA
69
- },
70
- temporal: {
71
- xpaths: ['.//gmd:EX_TemporalExtent/gmd:extent'],
72
- multivalue: true,
73
- format: Helpers::IsoToSolrFormat::TEMPORAL_INDEX_STRING
74
- },
75
- temporal_coverages: {
76
- xpaths: ['.//gmd:EX_TemporalExtent/gmd:extent'],
77
- multivalue: true,
78
- format: Helpers::IsoToSolrFormat::TEMPORAL_DISPLAY_STRING
79
- },
80
- temporal_duration: {
81
- xpaths: ['.//gmd:EX_TemporalExtent/gmd:extent'],
82
- multivalue: false,
83
- reduce: Helpers::SolrFormat::REDUCE_TEMPORAL_DURATION,
84
- format: Helpers::IsoToSolrFormat::TEMPORAL_DURATION
85
- },
86
- source: {
87
- xpaths: [''],
88
- default_values: ['ADE'],
89
- multivalue: false
90
- },
91
- facet_data_center: {
92
- xpaths: [''],
93
- default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:PDC][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:PDC][:short_name]}"],
94
- multivalue: false
95
- },
96
- facet_spatial_scope: {
97
- xpaths: ['.//gmd:extent//gmd:EX_GeographicBoundingBox'],
98
- multivalue: true,
99
- format: Helpers::IsoToSolrFormat::FACET_SPATIAL_SCOPE
100
- },
101
- facet_temporal_duration: {
102
- xpaths: ['.//gmd:EX_TemporalExtent/gmd:extent'],
103
- default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
104
- format: Helpers::IsoToSolrFormat::FACET_TEMPORAL_DURATION,
105
- multivalue: true
106
- }
107
- }
108
- end
109
- end
@@ -1,115 +0,0 @@
1
- require_relative '../helpers/solr_format'
2
- require_relative '../helpers/iso_to_solr_format'
3
- require_relative '../helpers/r2r_format'
4
-
5
- module SearchSolrTools
6
- module Selectors
7
- # The hash contains keys that should map to the fields in the solr schema,
8
- # the keys are called selectors and are in charge of selecting the nodes
9
- # from the ISO document, applying the default value if none of the xpaths
10
- # resolved to a value and formatting the field. xpaths and multivalue are
11
- # required, default_value, format, and reduce are optional.
12
- #
13
- # reduce takes the formatted result of multiple nodes and produces a single
14
- # result. This is for fields that are not multivalued, but their value
15
- # should consider information from all the nodes (for example, storing
16
- # only the maximum duration from multiple temporal coverage fields, taking
17
- # the sum of multiple spatial areas)
18
- R2R = {
19
- authoritative_id: {
20
- xpaths: ['.//gmd:fileIdentifier/gco:CharacterString'],
21
- multivalue: false
22
- },
23
- title: {
24
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:title/gmx:Anchor'],
25
- multivalue: false
26
- },
27
- summary: {
28
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:abstract/gco:CharacterString'],
29
- multivalue: false
30
- },
31
- data_centers: {
32
- xpaths: [''],
33
- default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:R2R][:long_name]],
34
- multivalue: false
35
- },
36
- authors: {
37
- xpaths: [".//gmd:CI_ResponsibleParty[./gmd:role/gmd:CI_RoleCode[@codeListValue='contributor']]/gmd:individualName/gmx:Anchor"],
38
- multivalue: true
39
- },
40
- keywords: {
41
- xpaths: ['.//gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gco:CharacterString',
42
- './/gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gmx:Anchor'],
43
- multivalue: true
44
- },
45
- last_revision_date: {
46
- xpaths: ['.//gmd:dateStamp/gco:Date', './/gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:date/gmd:CI_Date/gmd:date/gco:DateTime'],
47
- default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
48
- multivalue: false,
49
- format: Helpers::SolrFormat::DATE
50
- },
51
- dataset_url: {
52
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:title/gmx:Anchor/@xlink:href'],
53
- multivalue: false,
54
- format: Helpers::IsoToSolrFormat::DATASET_URL
55
- },
56
- spatial_coverages: {
57
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
58
- multivalue: true,
59
- format: Helpers::IsoToSolrFormat::SPATIAL_DISPLAY
60
- },
61
- spatial: {
62
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
63
- multivalue: true,
64
- format: Helpers::IsoToSolrFormat::SPATIAL_INDEX
65
- },
66
- spatial_area: {
67
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
68
- multivalue: false,
69
- reduce: Helpers::IsoToSolrFormat::MAX_SPATIAL_AREA,
70
- format: Helpers::IsoToSolrFormat::SPATIAL_AREA
71
- },
72
- temporal_coverages: {
73
- xpaths: ['.//gmd:EX_Extent[@id="temporalExtent"]'],
74
- multivalue: false,
75
- format: Helpers::R2RFormat::TEMPORAL_DISPLAY_STRING
76
- },
77
- temporal_duration: {
78
- xpaths: ['.//gmd:EX_Extent[@id="temporalExtent"]'],
79
- multivalue: false,
80
- reduce: Helpers::SolrFormat::REDUCE_TEMPORAL_DURATION,
81
- format: Helpers::R2RFormat::TEMPORAL_DURATION
82
- },
83
- temporal: {
84
- xpaths: ['.//gmd:EX_Extent[@id="temporalExtent"]'],
85
- multivalue: false,
86
- format: Helpers::R2RFormat::TEMPORAL_INDEX_STRING
87
- },
88
- sensors: {
89
- xpaths: ['.//gmi:acquisitionInformation/gmi:MI_AcquisitionInformation/gmi:instrument/gmi:MI_Instrument/gmi:type/gmx:Anchor'],
90
- multivalue: true
91
- },
92
- source: {
93
- xpaths: [''],
94
- default_values: ['ADE'],
95
- multivalue: false
96
- },
97
- facet_data_center: {
98
- xpaths: [''],
99
- default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:R2R][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:R2R][:short_name]}"],
100
- multivalue: false
101
- },
102
- facet_spatial_scope: {
103
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
104
- multivalue: true,
105
- format: Helpers::IsoToSolrFormat::FACET_SPATIAL_SCOPE
106
- },
107
- facet_temporal_duration: {
108
- xpaths: ['.//gmd:EX_Extent[@id="temporalExtent"]'],
109
- default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
110
- format: Helpers::R2RFormat::FACET_TEMPORAL_DURATION,
111
- multivalue: true
112
- }
113
- }
114
- end
115
- end
@@ -1,107 +0,0 @@
1
- require_relative '../helpers/solr_format'
2
- require_relative '../helpers/iso_to_solr_format'
3
-
4
- module SearchSolrTools
5
- module Selectors
6
- # The hash contains keys that should map to the fields in the solr schema,
7
- # the keys are called selectors and are in charge of selecting the nodes
8
- # from the ISO document, applying the default value if none of the xpaths
9
- # resolved to a value and formatting the field. xpaths and multivalue are
10
- # required, default_value and format are optional.
11
- RDA = {
12
- authoritative_id: {
13
- xpaths: ['.//oai:header/oai:identifier'],
14
- multivalue: false
15
- },
16
- title: {
17
- xpaths: ['.//dif:Entry_Title'],
18
- multivalue: false
19
- },
20
- summary: {
21
- xpaths: ['.//dif:Summary'],
22
- multivalue: false
23
- },
24
- data_centers: {
25
- xpaths: [''],
26
- default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:RDA][:long_name]],
27
- multivalue: false
28
- },
29
- authors: {
30
- xpaths: [''],
31
- multivalue: true
32
- },
33
- keywords: {
34
- xpaths: [
35
- './/dif:Parameters/dif:Category',
36
- './/dif:Parameters/dif:Topic',
37
- './/dif:Parameters/dif:Term',
38
- './/dif:Parameters/dif:Variable_Level_1'
39
- ].reverse,
40
- multivalue: true
41
- },
42
- last_revision_date: {
43
- xpaths: ['.//dif:Last_DIF_Revision_Date'],
44
- default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
45
- multivalue: false,
46
- format: Helpers::SolrFormat::DATE
47
- },
48
- dataset_url: {
49
- xpaths: ['.//dif:Related_URL/dif:URL'],
50
- multivalue: false
51
- },
52
- spatial_coverages: {
53
- xpaths: ['.//dif:Spatial_Coverage'],
54
- multivalue: true,
55
- format: Helpers::IsoToSolrFormat::SPATIAL_DISPLAY
56
- },
57
- spatial: {
58
- xpaths: ['.//dif:Spatial_Coverage'],
59
- multivalue: true,
60
- format: Helpers::IsoToSolrFormat::SPATIAL_INDEX
61
- },
62
- spatial_area: {
63
- xpaths: ['.//dif:Spatial_Coverage'],
64
- multivalue: false,
65
- reduce: Helpers::IsoToSolrFormat::MAX_SPATIAL_AREA,
66
- format: Helpers::IsoToSolrFormat::SPATIAL_AREA
67
- },
68
- temporal: {
69
- xpaths: ['.//dif:Temporal_Coverage'],
70
- multivalue: true,
71
- format: Helpers::IsoToSolrFormat::TEMPORAL_INDEX_STRING
72
- },
73
- temporal_coverages: {
74
- xpaths: ['.//dif:Temporal_Coverage'],
75
- multivalue: true,
76
- format: Helpers::IsoToSolrFormat::TEMPORAL_DISPLAY_STRING
77
- },
78
- temporal_duration: {
79
- xpaths: ['.//dif:Temporal_Coverage'],
80
- multivalue: false,
81
- reduce: Helpers::SolrFormat::REDUCE_TEMPORAL_DURATION,
82
- format: Helpers::IsoToSolrFormat::TEMPORAL_DURATION
83
- },
84
- source: {
85
- xpaths: [''],
86
- default_values: ['ADE'],
87
- multivalue: false
88
- },
89
- facet_data_center: {
90
- xpaths: [''],
91
- default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:RDA][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:RDA][:short_name]}"],
92
- multivalue: false
93
- },
94
- facet_spatial_scope: {
95
- xpaths: ['.//dif:Spatial_Coverage'],
96
- multivalue: true,
97
- format: Helpers::IsoToSolrFormat::FACET_SPATIAL_SCOPE
98
- },
99
- facet_temporal_duration: {
100
- xpaths: ['.//dif:Temporal_Coverage'],
101
- default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
102
- format: Helpers::IsoToSolrFormat::FACET_TEMPORAL_DURATION,
103
- multivalue: true
104
- }
105
- }
106
- end
107
- end
@@ -1,91 +0,0 @@
1
- require_relative '../helpers/solr_format'
2
- require_relative '../helpers/iso_to_solr_format'
3
- require_relative '../helpers/tdar_format'
4
-
5
- module SearchSolrTools
6
- module Selectors
7
- # The hash contains keys that should map to the fields in the solr schema,
8
- # the keys are called selectors and are in charge of selecting the nodes
9
- # from the ISO document, applying the default value if none of the xpaths
10
- # resolved to a value and formatting the field. xpaths and multivalue are
11
- # required, default_value and format are optional
12
- TDAR = {
13
- authoritative_id: {
14
- xpaths: ['.//atom:link/@href'],
15
- multivalue: false,
16
- format: proc do |node|
17
- 'TDAR-' << node.text.split('/')[4] || ''
18
- end
19
- },
20
- title: {
21
- xpaths: ['.//atom:title'],
22
- multivalue: false
23
- },
24
- summary: {
25
- xpaths: ['.//atom:summary'],
26
- multivalue: false
27
- },
28
- data_centers: {
29
- xpaths: [''],
30
- default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:TDAR][:long_name]],
31
- multivalue: false
32
- },
33
- authors: {
34
- xpaths: ['.//atom:author/atom:name'],
35
- multivalue: true
36
- },
37
- keywords: {
38
- xpaths: [''],
39
- multivalue: true,
40
- format: Helpers::IsoToSolrFormat::KEYWORDS
41
- },
42
- last_revision_date: {
43
- xpaths: ['.//atom:updated'],
44
- default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
45
- multivalue: false,
46
- format: Helpers::SolrFormat::DATE
47
- },
48
- dataset_url: {
49
- xpaths: ['.//atom:link/@href'],
50
- multivalue: false
51
- },
52
- spatial_coverages: {
53
- xpaths: ['.//georss:box'],
54
- multivalue: true,
55
- format: Helpers::TdarFormat::SPATIAL_DISPLAY
56
- },
57
- spatial: {
58
- xpaths: ['.//georss:box'],
59
- multivalue: true,
60
- format: Helpers::TdarFormat::SPATIAL_INDEX
61
- },
62
- spatial_area: {
63
- xpaths: ['.//georss:box'],
64
- multivalue: false,
65
- reduce: Helpers::TdarFormat::MAX_SPATIAL_AREA,
66
- format: Helpers::TdarFormat::SPATIAL_AREA
67
- },
68
- source: {
69
- xpaths: [''],
70
- default_values: ['ADE'],
71
- multivalue: false
72
- },
73
- facet_data_center: {
74
- xpaths: [''],
75
- default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:TDAR][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:TDAR][:short_name]}"],
76
- multivalue: false
77
- },
78
- facet_spatial_scope: {
79
- xpaths: ['.//georss:box'],
80
- multivalue: true,
81
- format: Helpers::TdarFormat::FACET_SPATIAL_SCOPE
82
- },
83
- facet_temporal_duration: {
84
- xpaths: [''],
85
- default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
86
- format: Helpers::IsoToSolrFormat::FACET_TEMPORAL_DURATION,
87
- multivalue: true
88
- }
89
- }
90
- end
91
- end
@@ -1,107 +0,0 @@
1
- require_relative '../helpers/solr_format'
2
- require_relative '../helpers/iso_to_solr_format'
3
- require_relative '../helpers/usgs_format'
4
-
5
- module SearchSolrTools
6
- module Selectors
7
- # The hash contains keys that should map to the fields in the solr schema,
8
- # the keys are called selectors and are in charge of selecting the nodes
9
- # from the ISO document, applying the default value if none of the xpaths
10
- # resolved to a value and formatting the field. xpaths and multivalue are
11
- # required, default_value and format are optional
12
- USGS = {
13
- authoritative_id: {
14
- xpaths: ['.//gmd:fileIdentifier/gco:CharacterString'],
15
- multivalue: false
16
- },
17
- title: {
18
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:title/gco:CharacterString'],
19
- multivalue: false
20
- },
21
- summary: {
22
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:abstract/gco:CharacterString'],
23
- multivalue: false
24
- },
25
- data_centers: {
26
- xpaths: [''],
27
- default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:USGS][:long_name]],
28
- multivalue: false
29
- },
30
- authors: {
31
- xpaths: [".//gmd:contact/gmd:CI_ResponsibleParty[./gmd:role/gmd:CI_RoleCode[@codeListValue='originator']]/gmd:organisationName/gco:CharacterString"],
32
- multivalue: true
33
- },
34
- keywords: {
35
- xpaths: ['.//gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gco:CharacterString'],
36
- multivalue: true
37
- },
38
- last_revision_date: {
39
- xpaths: ['.//gmd:dateStamp/gco:DateTime'],
40
- default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
41
- multivalue: false,
42
- format: Helpers::SolrFormat::DATE
43
- },
44
- dataset_url: {
45
- xpaths: ['.//gmd:transferOptions/gmd:MD_DigitalTransferOptions/gmd:onLine/gmd:CI_OnlineResource[contains(./gmd:name/gco:CharacterString/text(),"Summary")]/gmd:linkage/gmd:URL'],
46
- multivalue: false
47
- },
48
- spatial_coverages: {
49
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
50
- multivalue: true,
51
- format: Helpers::IsoToSolrFormat::SPATIAL_DISPLAY
52
- },
53
- spatial: {
54
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
55
- multivalue: true,
56
- format: Helpers::IsoToSolrFormat::SPATIAL_INDEX
57
- },
58
- spatial_area: {
59
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
60
- multivalue: false,
61
- reduce: Helpers::IsoToSolrFormat::MAX_SPATIAL_AREA,
62
- format: Helpers::IsoToSolrFormat::SPATIAL_AREA
63
- },
64
- temporal: {
65
- xpaths: [".//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:date/gmd:CI_Date[./gmd:dateType/gmd:CI_DateTypeCode[@codeListValue='Time Period']]/gmd:date"],
66
- multivalue: true,
67
- format: Helpers::UsgsFormat::TEMPORAL_INDEX_STRING
68
- },
69
- temporal_coverages: {
70
- xpaths: [".//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:date/gmd:CI_Date[./gmd:dateType/gmd:CI_DateTypeCode[@codeListValue='Time Period']]/gmd:date"],
71
- multivalue: true,
72
- format: Helpers::UsgsFormat::TEMPORAL_DISPLAY_STRING
73
- },
74
- temporal_duration: {
75
- xpaths: [".//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:date/gmd:CI_Date[./gmd:dateType/gmd:CI_DateTypeCode[@codeListValue='Time Period']]/gmd:date"],
76
- multivalue: false,
77
- reduce: Helpers::SolrFormat::REDUCE_TEMPORAL_DURATION,
78
- format: Helpers::UsgsFormat::TEMPORAL_DURATION
79
- },
80
- sensors: {
81
- xpaths: [''],
82
- multivalue: true
83
- },
84
- source: {
85
- xpaths: [''],
86
- default_values: ['ADE'],
87
- multivalue: false
88
- },
89
- facet_data_center: {
90
- xpaths: [''],
91
- default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:USGS][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:USGS][:short_name]}"],
92
- multivalue: false
93
- },
94
- facet_spatial_scope: {
95
- xpaths: ['.//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox'],
96
- multivalue: true,
97
- format: Helpers::IsoToSolrFormat::FACET_SPATIAL_SCOPE
98
- },
99
- facet_temporal_duration: {
100
- xpaths: [".//gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:date/gmd:CI_Date[./gmd:dateType/gmd:CI_DateTypeCode[@codeListValue='Time Period']]/gmd:date"],
101
- default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
102
- format: Helpers::UsgsFormat::FACET_TEMPORAL_DURATION,
103
- multivalue: true
104
- }
105
- }
106
- end
107
- end
@@ -1,89 +0,0 @@
1
- require 'json'
2
- require 'rest-client'
3
- require 'rgeo/geo_json'
4
- require 'rgeo/wkrep/wkt_parser'
5
-
6
- require 'search_solr_tools'
7
- require_relative '../helpers/solr_format'
8
- require_relative '../helpers/translate_temporal_coverage'
9
- require_relative '../helpers/translate_spatial_coverage'
10
-
11
- module SearchSolrTools
12
- module Translators
13
- # Translates Bcodmo json to solr json format
14
- class BcodmoJsonToSolr
15
- # rubocop:disable MethodLength
16
- # rubocop:disable AbcSize
17
- def translate(json_doc, json_record, geometry)
18
- originators = json_doc.key?('people') ? JSON.parse(RestClient.get((json_doc['people']))) : []
19
- spatial_values = translate_geometry geometry
20
- temporal_coverage_values = Helpers::TranslateTemporalCoverage.translate_coverages [{ 'start' => "#{json_record['startDate']}", 'end' => "#{json_record['endDate']}" }]
21
- {
22
- 'title' => json_doc['dataset_name'],
23
- 'authoritative_id' => json_record['id'] + json_doc['dataset_nid'],
24
- 'dataset_version' => translate_dataset_version(json_doc['dataset_version']),
25
- 'data_centers' => Helpers::SolrFormat::DATA_CENTER_NAMES[:BCODMO][:long_name],
26
- 'facet_data_center' => "#{Helpers::SolrFormat::DATA_CENTER_NAMES[:BCODMO][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:BCODMO][:short_name]}",
27
- 'summary' => json_doc['dataset_description'].to_s.empty? ? json_doc['dataset_brief_description'] : json_doc['dataset_description'],
28
- 'temporal_coverages' => temporal_coverage_values['temporal_coverages'],
29
- 'temporal_duration' => temporal_coverage_values['temporal_duration'],
30
- 'temporal' => temporal_coverage_values['temporal'],
31
- 'facet_temporal_duration' => temporal_coverage_values['facet_temporal_duration'],
32
- 'last_revision_date' => json_doc['dataset_deployment_version_date'].to_s.empty? ? nil : Time.parse(json_doc['dataset_deployment_version_date']).strftime('%Y-%m-%dT%H:%M:%SZ'),
33
- 'dataset_url' => json_doc['dataset_url'],
34
- 'source' => 'ADE',
35
- 'facet_spatial_coverage' => spatial_values[:global_facet],
36
- 'facet_spatial_scope' => spatial_values[:spatial_scope_facet],
37
- 'spatial_coverages' => spatial_values[:spatial_display],
38
- 'spatial_area' => spatial_values[:spatial_area],
39
- 'spatial' => spatial_values[:spatial_index],
40
- 'data_access_urls' => json_doc.key?('dataset_deployment_url') ? json_doc['dataset_deployment_url'] : [],
41
- 'authors' => parse_people(originators)
42
- }
43
- end
44
- # rubocop:enable MethodLength
45
-
46
- def translate_dataset_version(dataset_version)
47
- version_translation = dataset_version.to_s.gsub(/\D/, '')
48
- version_translation.empty? ? nil : version_translation
49
- end
50
-
51
- def parse_people(people_json)
52
- people_json.map { |entry| entry['person_name'] } unless people_json.empty?
53
- end
54
-
55
- def translate_geometry(wkt_geom)
56
- if wkt_geom['geometry']['type'] == 'LineString'
57
- wkt_geom['geometry']['type'] = 'MultiPoint'
58
- end
59
- geometry = RGeo::GeoJSON.decode(wkt_geom).geometry
60
- geometry = RGeo::Feature.cast(geometry, RGeo::Feature::MultiPoint)
61
-
62
- # This feed sometimes returns MultiLineString but wrongly calls them 'LineString'
63
- # If the above fails, we assume this is why. If the feed gets fixed, this code
64
- # should still handle that.
65
- if geometry.nil? || geometry.num_geometries == 0
66
- # Try to decode as an actual MultiLineString.
67
- wkt_geom['geometry']['type'] = 'MultiLineString'
68
- geometry = RGeo::GeoJSON.decode(wkt_geom).geometry
69
-
70
- # Convert to a MultiPoint, for passing into the helper functions below.
71
- coords = geometry.coordinates.flatten
72
- coords = coords.each_slice(2).to_a
73
- f = RGeo::Geos.factory
74
- points = []
75
- coords.each { |x, y| points << f.point(x, y) }
76
- geometry = f.multi_point(points)
77
- end
78
-
79
- {
80
- spatial_display: Helpers::TranslateSpatialCoverage.geojson_to_spatial_display_str([geometry]),
81
- spatial_index: Helpers::TranslateSpatialCoverage.geojson_to_spatial_index_str([geometry]),
82
- spatial_area: Helpers::TranslateSpatialCoverage.geojson_to_spatial_area([geometry]),
83
- global_facet: Helpers::TranslateSpatialCoverage.geojson_to_global_facet([geometry]),
84
- spatial_scope_facet: Helpers::TranslateSpatialCoverage.geojson_to_spatial_scope_facet([geometry])
85
- }
86
- end
87
- end
88
- end
89
- end