search_solr_tools 6.1.0 → 6.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -2
- data/bin/search_solr_tools +5 -17
- data/lib/search_solr_tools/config/environments.rb +3 -1
- data/lib/search_solr_tools/config/environments.yaml +0 -32
- data/lib/search_solr_tools/errors/harvest_error.rb +44 -31
- data/lib/search_solr_tools/harvesters/auto_suggest.rb +5 -3
- data/lib/search_solr_tools/harvesters/base.rb +21 -20
- data/lib/search_solr_tools/harvesters/nsidc_auto_suggest.rb +7 -5
- data/lib/search_solr_tools/harvesters/nsidc_json.rb +9 -8
- data/lib/search_solr_tools/helpers/bounding_box_util.rb +8 -8
- data/lib/search_solr_tools/helpers/facet_configuration.rb +3 -1
- data/lib/search_solr_tools/helpers/harvest_status.rb +10 -8
- data/lib/search_solr_tools/helpers/iso_namespaces.rb +3 -1
- data/lib/search_solr_tools/helpers/solr_format.rb +25 -45
- data/lib/search_solr_tools/helpers/translate_spatial_coverage.rb +13 -10
- data/lib/search_solr_tools/helpers/translate_temporal_coverage.rb +2 -0
- data/lib/search_solr_tools/translators/nsidc_json.rb +48 -44
- data/lib/search_solr_tools/version.rb +3 -1
- data/lib/search_solr_tools.rb +3 -2
- metadata +3 -45
- data/lib/search_solr_tools/harvesters/adc.rb +0 -49
- data/lib/search_solr_tools/harvesters/ade_auto_suggest.rb +0 -46
- data/lib/search_solr_tools/harvesters/bcodmo.rb +0 -64
- data/lib/search_solr_tools/harvesters/data_one.rb +0 -49
- data/lib/search_solr_tools/harvesters/echo.rb +0 -52
- data/lib/search_solr_tools/harvesters/eol.rb +0 -51
- data/lib/search_solr_tools/harvesters/gtnp.rb +0 -67
- data/lib/search_solr_tools/harvesters/ices.rb +0 -58
- data/lib/search_solr_tools/harvesters/ncdc_paleo.rb +0 -62
- data/lib/search_solr_tools/harvesters/nmi.rb +0 -34
- data/lib/search_solr_tools/harvesters/nodc.rb +0 -75
- data/lib/search_solr_tools/harvesters/oai.rb +0 -62
- data/lib/search_solr_tools/harvesters/pdc.rb +0 -40
- data/lib/search_solr_tools/harvesters/r2r.rb +0 -61
- data/lib/search_solr_tools/harvesters/rda.rb +0 -35
- data/lib/search_solr_tools/harvesters/tdar.rb +0 -71
- data/lib/search_solr_tools/harvesters/usgs.rb +0 -76
- data/lib/search_solr_tools/helpers/csw_iso_query_builder.rb +0 -29
- data/lib/search_solr_tools/helpers/data_one_format.rb +0 -74
- data/lib/search_solr_tools/helpers/iso_to_solr.rb +0 -97
- data/lib/search_solr_tools/helpers/iso_to_solr_format.rb +0 -197
- data/lib/search_solr_tools/helpers/ncdc_paleo_format.rb +0 -61
- data/lib/search_solr_tools/helpers/query_builder.rb +0 -13
- data/lib/search_solr_tools/helpers/r2r_format.rb +0 -25
- data/lib/search_solr_tools/helpers/selectors.rb +0 -22
- data/lib/search_solr_tools/helpers/tdar_format.rb +0 -70
- data/lib/search_solr_tools/helpers/usgs_format.rb +0 -50
- data/lib/search_solr_tools/selectors/adc.rb +0 -96
- data/lib/search_solr_tools/selectors/data_one.rb +0 -96
- data/lib/search_solr_tools/selectors/echo_iso.rb +0 -112
- data/lib/search_solr_tools/selectors/ices_iso.rb +0 -108
- data/lib/search_solr_tools/selectors/ncdc_paleo.rb +0 -90
- data/lib/search_solr_tools/selectors/nmi.rb +0 -107
- data/lib/search_solr_tools/selectors/nodc_iso.rb +0 -108
- data/lib/search_solr_tools/selectors/pdc_iso.rb +0 -109
- data/lib/search_solr_tools/selectors/r2r.rb +0 -115
- data/lib/search_solr_tools/selectors/rda.rb +0 -107
- data/lib/search_solr_tools/selectors/tdar_opensearch.rb +0 -91
- data/lib/search_solr_tools/selectors/usgs_iso.rb +0 -107
- data/lib/search_solr_tools/translators/bcodmo_json.rb +0 -89
- data/lib/search_solr_tools/translators/eol_to_solr.rb +0 -84
- data/lib/search_solr_tools/translators/gtnp_json.rb +0 -59
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'date'
|
2
4
|
require 'iso8601'
|
3
5
|
|
@@ -7,30 +9,14 @@ require_relative 'facet_configuration'
|
|
7
9
|
module SearchSolrTools
|
8
10
|
module Helpers
|
9
11
|
# Methods for generating formatted values that can be indexed by SOLR
|
10
|
-
# rubocop:disable Metrics/ModuleLength
|
11
12
|
module SolrFormat
|
12
13
|
DATA_CENTER_NAMES = {
|
13
|
-
|
14
|
-
|
15
|
-
DATA_ONE: { short_name: 'DataONE', long_name: 'DataONE' },
|
16
|
-
ECHO: { short_name: 'NASA ECHO', long_name: 'NASA Earth Observing System (EOS) Clearing House (ECHO)' },
|
17
|
-
EOL: { short_name: 'UCAR NCAR EOL', long_name: 'UCAR NCAR - Earth Observing Laboratory' },
|
18
|
-
GTNP: { short_name: 'GTN-P', long_name: 'Global Terrestrial Network for Permafrost' },
|
19
|
-
ICES: { short_name: 'ICES', long_name: 'International Council for the Exploration of the Sea' },
|
20
|
-
NCDC_PALEO: { short_name: 'NOAA WDS Paleo', long_name: 'NOAA’s National Centers for Environmental Information, World Data Service for Paleoclimatology' },
|
21
|
-
NMI: { short_name: 'Met.no', long_name: 'Norwegian Meteorological Institute' },
|
22
|
-
NODC: { short_name: 'NOAA NODC', long_name: 'NOAA National Oceanographic Data Center' },
|
23
|
-
NSIDC: { short_name: 'NSIDC', long_name: 'National Snow and Ice Data Center' },
|
24
|
-
PDC: { short_name: 'PDC', long_name: 'Polar Data Catalogue' },
|
25
|
-
R2R: { short_name: 'R2R', long_name: 'Rolling Deck to Repository' },
|
26
|
-
RDA: { short_name: 'UCAR NCAR RDA', long_name: 'UCAR NCAR Research Data Archive' },
|
27
|
-
TDAR: { short_name: 'tDAR', long_name: 'tDAR: The Digital Archaeological Record' },
|
28
|
-
USGS: { short_name: 'USGS ScienceBase', long_name: 'U.S. Geological Survey ScienceBase' }
|
29
|
-
}
|
14
|
+
NSIDC: { short_name: 'NSIDC', long_name: 'National Snow and Ice Data Center' }
|
15
|
+
}.freeze
|
30
16
|
|
31
17
|
NOT_SPECIFIED = 'Not specified'
|
32
18
|
|
33
|
-
TEMPORAL_RESOLUTION_FACET_VALUES = %w
|
19
|
+
TEMPORAL_RESOLUTION_FACET_VALUES = %w[Subhourly Hourly Subdaily Daily Weekly Submonthly Monthly Subyearly Yearly Multiyearly].freeze
|
34
20
|
SUBHOURLY_INDEX = 0
|
35
21
|
HOURLY_INDEX = 1
|
36
22
|
SUBDAILY_INDEX = 2
|
@@ -42,7 +28,7 @@ module SearchSolrTools
|
|
42
28
|
YEARLY_INDEX = 8
|
43
29
|
MULTIYEARLY_INDEX = 9
|
44
30
|
|
45
|
-
SPATIAL_RESOLUTION_FACET_VALUES = ['0 - 500 m', '501 m - 1 km', '2 - 5 km', '6 - 15 km', '16 - 30 km', '>30 km']
|
31
|
+
SPATIAL_RESOLUTION_FACET_VALUES = ['0 - 500 m', '501 m - 1 km', '2 - 5 km', '6 - 15 km', '16 - 30 km', '>30 km'].freeze
|
46
32
|
SPATIAL_0_500_INDEX = 0
|
47
33
|
SPATIAL_501_1_INDEX = 1
|
48
34
|
SPATIAL_2_5_INDEX = 2
|
@@ -59,7 +45,7 @@ module SearchSolrTools
|
|
59
45
|
end
|
60
46
|
|
61
47
|
def self.temporal_display_str(date_range)
|
62
|
-
temporal_str =
|
48
|
+
temporal_str = (date_range[:start]).to_s
|
63
49
|
temporal_str += ",#{date_range[:end]}" unless date_range[:end].nil?
|
64
50
|
temporal_str
|
65
51
|
end
|
@@ -82,6 +68,7 @@ module SearchSolrTools
|
|
82
68
|
|
83
69
|
def self.get_temporal_duration_facet(duration)
|
84
70
|
return NOT_SPECIFIED if duration.nil?
|
71
|
+
|
85
72
|
years = duration.to_i / 365
|
86
73
|
temporal_duration_range(years)
|
87
74
|
end
|
@@ -101,31 +88,28 @@ module SearchSolrTools
|
|
101
88
|
def self.facet_binning(type, format_string)
|
102
89
|
binned_facet = bin(FacetConfiguration.get_facet_bin(type), format_string)
|
103
90
|
if binned_facet.nil?
|
104
|
-
|
91
|
+
format_string
|
105
92
|
elsif binned_facet.eql?('exclude')
|
106
|
-
|
93
|
+
nil
|
107
94
|
else
|
108
|
-
|
95
|
+
binned_facet
|
109
96
|
end
|
110
|
-
|
111
|
-
nil
|
112
97
|
end
|
113
98
|
|
114
99
|
def self.parameter_binning(parameter_string)
|
115
100
|
binned_parameter = bin(FacetConfiguration.get_facet_bin('parameter'), parameter_string)
|
116
101
|
# use variable_level_1 if no mapping exists
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
return binned_parameter
|
122
|
-
end
|
102
|
+
return binned_parameter unless binned_parameter.nil?
|
103
|
+
|
104
|
+
parts = parameter_string.split '>'
|
105
|
+
return parts[3].strip if parts.length >= 4
|
123
106
|
|
124
107
|
nil
|
125
108
|
end
|
126
109
|
|
127
110
|
def self.resolution_value(resolution, find_index_method, resolution_values)
|
128
|
-
return NOT_SPECIFIED if
|
111
|
+
return NOT_SPECIFIED if resolution_not_specified? resolution
|
112
|
+
|
129
113
|
if resolution['type'] == 'single'
|
130
114
|
i = send(find_index_method, resolution['resolution'])
|
131
115
|
return resolution_values[i]
|
@@ -135,12 +119,12 @@ module SearchSolrTools
|
|
135
119
|
j = send(find_index_method, resolution['max_resolution'])
|
136
120
|
return resolution_values[i..j]
|
137
121
|
end
|
138
|
-
|
122
|
+
raise "Invalid resolution #{resolution['type']}"
|
139
123
|
end
|
140
124
|
|
141
125
|
def self.resolution_not_specified?(resolution)
|
142
126
|
return true if resolution.to_s.empty?
|
143
|
-
return true unless %w
|
127
|
+
return true unless %w[single range].include? resolution['type']
|
144
128
|
return true if resolution['type'] == 'single' && resolution['resolution'].to_s.empty?
|
145
129
|
return true if resolution['type'] == 'range' && resolution['min_resolution'].to_s.empty?
|
146
130
|
end
|
@@ -155,6 +139,7 @@ module SearchSolrTools
|
|
155
139
|
else
|
156
140
|
facet = 'Between 1 and 170 degrees of latitude change | Regional'
|
157
141
|
end
|
142
|
+
|
158
143
|
facet
|
159
144
|
end
|
160
145
|
|
@@ -167,8 +152,6 @@ module SearchSolrTools
|
|
167
152
|
"#{d.iso8601[0..-7]}Z" unless d.nil?
|
168
153
|
end
|
169
154
|
|
170
|
-
private
|
171
|
-
|
172
155
|
MIN_DATE = '00010101'
|
173
156
|
MAX_DATE = Time.now.strftime('%Y%m%d')
|
174
157
|
|
@@ -181,7 +164,6 @@ module SearchSolrTools
|
|
181
164
|
nil
|
182
165
|
end
|
183
166
|
|
184
|
-
# rubocop:disable CyclomaticComplexity
|
185
167
|
def self.find_index_for_single_temporal_resolution_value(string_duration)
|
186
168
|
iso8601_duration = ISO8601::Duration.new(string_duration)
|
187
169
|
|
@@ -201,10 +183,9 @@ module SearchSolrTools
|
|
201
183
|
MULTIYEARLY_INDEX
|
202
184
|
end
|
203
185
|
end
|
204
|
-
# rubocop:enable CyclomaticComplexity
|
205
186
|
|
206
187
|
def self.find_index_for_single_spatial_resolution_value(string_duration)
|
207
|
-
value, units = string_duration.split
|
188
|
+
value, units = string_duration.split
|
208
189
|
|
209
190
|
if units == 'deg'
|
210
191
|
spatial_resolution_index_degrees(value)
|
@@ -249,11 +230,10 @@ module SearchSolrTools
|
|
249
230
|
end
|
250
231
|
|
251
232
|
def self.date?(date)
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
valid_date
|
233
|
+
return false unless date.is_a? String
|
234
|
+
|
235
|
+
d = DateTime.parse(date.strip) rescue false
|
236
|
+
DateTime.valid_date?(d.year, d.mon, d.day) unless d.eql?(false)
|
257
237
|
end
|
258
238
|
|
259
239
|
def self.format_date_for_index(date_str, default)
|
@@ -1,7 +1,8 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'rgeo/geo_json'
|
2
4
|
|
3
5
|
require_relative 'bounding_box_util'
|
4
|
-
require_relative 'iso_to_solr_format'
|
5
6
|
|
6
7
|
module SearchSolrTools
|
7
8
|
module Helpers
|
@@ -43,7 +44,7 @@ module SearchSolrTools
|
|
43
44
|
|
44
45
|
def self.geojson_to_spatial_area(spatial_coverage_geom)
|
45
46
|
spatial_areas = spatial_coverage_geom.map do |geo_json|
|
46
|
-
if %w
|
47
|
+
if %w[point].include?(geo_json.geometry_type.to_s.downcase)
|
47
48
|
0.0
|
48
49
|
else
|
49
50
|
bbox = RGeo::Cartesian::BoundingBox.create_from_geometry(geo_json)
|
@@ -51,11 +52,13 @@ module SearchSolrTools
|
|
51
52
|
end
|
52
53
|
end
|
53
54
|
return nil if spatial_areas.empty?
|
54
|
-
|
55
|
+
|
56
|
+
spatial_areas.max
|
55
57
|
end
|
56
58
|
|
57
59
|
def self.geojson_to_global_facet(spatial_coverage_geom)
|
58
60
|
return nil if spatial_coverage_geom.nil?
|
61
|
+
|
59
62
|
spatial_coverage_geom.each do |geo_json|
|
60
63
|
bbox_hash = BoundingBoxUtil.bounding_box_hash_from_geo_json(geo_json)
|
61
64
|
return 'Show Global Only' if BoundingBoxUtil.box_global?(bbox_hash)
|
@@ -64,13 +67,13 @@ module SearchSolrTools
|
|
64
67
|
end
|
65
68
|
|
66
69
|
def self.geojson_to_spatial_scope_facet(spatial_coverage_geom)
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
end
|
70
|
+
return if spatial_coverage_geom.nil?
|
71
|
+
|
72
|
+
spatial_coverage_geom.map do |geo_json|
|
73
|
+
bbox_hash = BoundingBoxUtil.bounding_box_hash_from_geo_json(geo_json)
|
74
|
+
scope = SolrFormat.get_spatial_scope_facet_with_bounding_box(bbox_hash)
|
75
|
+
scope unless scope.nil?
|
76
|
+
end.uniq
|
74
77
|
end
|
75
78
|
end
|
76
79
|
end
|
@@ -1,4 +1,5 @@
|
|
1
|
-
#
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
2
3
|
require 'rgeo/geo_json'
|
3
4
|
|
4
5
|
require 'search_solr_tools'
|
@@ -10,50 +11,50 @@ module SearchSolrTools
|
|
10
11
|
module Translators
|
11
12
|
# Translates NSIDC JSON format to Solr JSON add format
|
12
13
|
class NsidcJsonToSolr
|
13
|
-
PARAMETER_PARTS = %w
|
14
|
+
PARAMETER_PARTS = %w[category topic term variableLevel1 variableLevel2 variableLevel3 detailedVariable].freeze
|
14
15
|
|
15
16
|
# rubocop:disable Metrics/MethodLength
|
16
17
|
# rubocop:disable Metrics/AbcSize
|
17
18
|
def translate(json_doc)
|
18
|
-
copy_keys = %w
|
19
|
+
copy_keys = %w[title summary keywords brokered]
|
19
20
|
temporal_coverage_values = Helpers::TranslateTemporalCoverage.translate_coverages json_doc['temporalCoverages']
|
20
21
|
spatial_coverages = convert_spatial_coverages(json_doc['spatialCoverages'])
|
21
22
|
|
22
23
|
solr_add_hash = json_doc.select { |k, _v| copy_keys.include?(k) }
|
23
24
|
solr_add_hash.merge!(
|
24
|
-
'authoritative_id'
|
25
|
-
'dataset_version'
|
26
|
-
'data_centers'
|
27
|
-
'facet_data_center'
|
28
|
-
'authors'
|
29
|
-
'topics'
|
30
|
-
'parameters'
|
31
|
-
'full_parameters'
|
32
|
-
'facet_parameter'
|
33
|
-
'platforms'
|
34
|
-
'sensors'
|
35
|
-
'facet_sensor'
|
36
|
-
'published_date'
|
37
|
-
'spatial_coverages'
|
38
|
-
'spatial'
|
39
|
-
'spatial_area'
|
40
|
-
'facet_spatial_coverage'
|
41
|
-
'facet_spatial_scope'
|
42
|
-
'temporal_coverages'
|
43
|
-
'temporal_duration'
|
44
|
-
'temporal'
|
45
|
-
'facet_temporal_duration'
|
46
|
-
'last_revision_date'
|
47
|
-
'dataset_url'
|
48
|
-
'distribution_formats'
|
49
|
-
'facet_format'
|
50
|
-
'source'
|
51
|
-
'popularity'
|
52
|
-
'data_access_urls'
|
53
|
-
'facet_sponsored_program'
|
25
|
+
'authoritative_id' => json_doc['authoritativeId'],
|
26
|
+
'dataset_version' => json_doc['majorVersion']['version'],
|
27
|
+
'data_centers' => Helpers::SolrFormat::DATA_CENTER_NAMES[:NSIDC][:long_name],
|
28
|
+
'facet_data_center' => "#{Helpers::SolrFormat::DATA_CENTER_NAMES[:NSIDC][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:NSIDC][:short_name]}",
|
29
|
+
'authors' => translate_personnel_and_creators_to_authors(json_doc['personnel'], generate_data_citation_creators(json_doc['dataCitation'])),
|
30
|
+
'topics' => translate_iso_topic_categories(json_doc['isoTopicCategories']),
|
31
|
+
'parameters' => translate_parameters(json_doc['parameters']),
|
32
|
+
'full_parameters' => translate_json_string(json_doc['parameters'], PARAMETER_PARTS),
|
33
|
+
'facet_parameter' => translate_parameters_to_facet_parameters(json_doc['parameters']),
|
34
|
+
'platforms' => translate_json_string(json_doc['platforms']),
|
35
|
+
'sensors' => translate_json_string(json_doc['instruments']),
|
36
|
+
'facet_sensor' => translate_sensor_to_facet_sensor(json_doc['instruments']),
|
37
|
+
'published_date' => (Helpers::SolrFormat.date_str json_doc['releaseDate']),
|
38
|
+
'spatial_coverages' => Helpers::TranslateSpatialCoverage.geojson_to_spatial_display_str(spatial_coverages),
|
39
|
+
'spatial' => Helpers::TranslateSpatialCoverage.geojson_to_spatial_index_str(spatial_coverages),
|
40
|
+
'spatial_area' => Helpers::TranslateSpatialCoverage.geojson_to_spatial_area(spatial_coverages),
|
41
|
+
'facet_spatial_coverage' => Helpers::TranslateSpatialCoverage.geojson_to_global_facet(spatial_coverages),
|
42
|
+
'facet_spatial_scope' => Helpers::TranslateSpatialCoverage.geojson_to_spatial_scope_facet(spatial_coverages),
|
43
|
+
'temporal_coverages' => temporal_coverage_values['temporal_coverages'],
|
44
|
+
'temporal_duration' => temporal_coverage_values['temporal_duration'],
|
45
|
+
'temporal' => temporal_coverage_values['temporal'],
|
46
|
+
'facet_temporal_duration' => temporal_coverage_values['facet_temporal_duration'],
|
47
|
+
'last_revision_date' => (Helpers::SolrFormat.date_str json_doc['lastRevisionDate']),
|
48
|
+
'dataset_url' => json_doc['datasetUrl'],
|
49
|
+
'distribution_formats' => json_doc['distributionFormats'],
|
50
|
+
'facet_format' => json_doc['distributionFormats'].empty? ? [Helpers::SolrFormat::NOT_SPECIFIED] : translate_format_to_facet_format(json_doc['distributionFormats']),
|
51
|
+
'source' => %w[NSIDC ADE],
|
52
|
+
'popularity' => json_doc['popularity'],
|
53
|
+
'data_access_urls' => translate_data_access_urls(json_doc['dataAccessLinks']),
|
54
|
+
'facet_sponsored_program' => translate_short_long_names_to_facet_value(json_doc['internalDataCenters']),
|
54
55
|
'facet_temporal_resolution' => translate_temporal_resolution_facet_values(json_doc['parameters']),
|
55
|
-
'facet_spatial_resolution'
|
56
|
-
'sponsored_programs'
|
56
|
+
'facet_spatial_resolution' => translate_spatial_resolution_facet_values(json_doc['parameters']),
|
57
|
+
'sponsored_programs' => translate_internal_datacenters(json_doc['internalDataCenters'])
|
57
58
|
)
|
58
59
|
end
|
59
60
|
# rubocop:enable Metrics/MethodLength
|
@@ -70,13 +71,14 @@ module SearchSolrTools
|
|
70
71
|
def translate_sensor_to_facet_sensor(json)
|
71
72
|
facet_values = []
|
72
73
|
return facet_values if json.nil?
|
74
|
+
|
73
75
|
json.each do |json_entry|
|
74
76
|
sensor_bin = Helpers::SolrFormat.facet_binning('sensor', json_entry['shortName'].to_s)
|
75
|
-
if sensor_bin.eql? json_entry['shortName']
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
77
|
+
facet_values << if sensor_bin.eql? json_entry['shortName']
|
78
|
+
"#{json_entry['longName']} | #{json_entry['shortName']}"
|
79
|
+
else
|
80
|
+
" | #{sensor_bin}"
|
81
|
+
end
|
80
82
|
end
|
81
83
|
facet_values
|
82
84
|
end
|
@@ -100,12 +102,13 @@ module SearchSolrTools
|
|
100
102
|
end
|
101
103
|
|
102
104
|
def translate_iso_topic_categories(iso_topic_categories_json)
|
103
|
-
iso_topic_categories_json
|
105
|
+
iso_topic_categories_json&.map { |t| t['name'] }
|
104
106
|
end
|
105
107
|
|
106
108
|
def translate_data_access_urls(json)
|
107
109
|
values = []
|
108
110
|
return values if json.nil?
|
111
|
+
|
109
112
|
json.each do |json_entry|
|
110
113
|
link_display = json_entry['displayText'].nil? ? '' : json_entry['displayText']
|
111
114
|
link_type = json_entry['type'].nil? ? '' : json_entry['type']
|
@@ -120,6 +123,7 @@ module SearchSolrTools
|
|
120
123
|
def translate_internal_datacenters(json)
|
121
124
|
values = []
|
122
125
|
return values if json.nil?
|
126
|
+
|
123
127
|
json.each do |json_entry|
|
124
128
|
short_name = json_entry['shortName'].nil? ? '' : json_entry['shortName']
|
125
129
|
values << short_name
|
@@ -130,6 +134,7 @@ module SearchSolrTools
|
|
130
134
|
def translate_short_long_names_to_facet_value(json)
|
131
135
|
facet_values = []
|
132
136
|
return facet_values if json.nil?
|
137
|
+
|
133
138
|
json.each do |json_entry|
|
134
139
|
long_name = json_entry['longName'].nil? ? '' : json_entry['longName']
|
135
140
|
short_name = json_entry['shortName'].nil? ? '' : json_entry['shortName']
|
@@ -169,6 +174,7 @@ module SearchSolrTools
|
|
169
174
|
def translate_parameters_to_facet_parameters(parameters_json)
|
170
175
|
parameters_strings = translate_json_string(parameters_json, PARAMETER_PARTS)
|
171
176
|
return [] if parameters_strings.nil?
|
177
|
+
|
172
178
|
facet_params = []
|
173
179
|
parameters_strings.each do |str|
|
174
180
|
facet_params << Helpers::SolrFormat.parameter_binning(str)
|
@@ -199,8 +205,7 @@ module SearchSolrTools
|
|
199
205
|
end
|
200
206
|
|
201
207
|
def generate_data_citation_creators(data_citation)
|
202
|
-
data_citation.nil? ?
|
203
|
-
creators
|
208
|
+
data_citation.nil? ? [] : data_citation['creators']
|
204
209
|
end
|
205
210
|
|
206
211
|
def generate_part_array(json, limit_values = nil)
|
@@ -214,6 +219,5 @@ module SearchSolrTools
|
|
214
219
|
parts
|
215
220
|
end
|
216
221
|
end
|
217
|
-
# rubocop:enable Metrics/ClassLength
|
218
222
|
end
|
219
223
|
end
|
data/lib/search_solr_tools.rb
CHANGED
@@ -1,10 +1,11 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'search_solr_tools/config/environments'
|
2
4
|
require_relative 'search_solr_tools/version'
|
3
5
|
|
4
|
-
require_relative 'search_solr_tools/helpers/selectors'
|
5
6
|
require_relative 'search_solr_tools/helpers/harvest_status'
|
6
7
|
require_relative 'search_solr_tools/errors/harvest_error'
|
7
8
|
|
8
|
-
%w
|
9
|
+
%w[harvesters translators].each do |subdir|
|
9
10
|
Dir[File.join(__dir__, 'search_solr_tools', subdir, '*.rb')].each { |file| require file }
|
10
11
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: search_solr_tools
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 6.
|
4
|
+
version: 6.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Chris Chalstrom
|
@@ -14,7 +14,7 @@ authors:
|
|
14
14
|
autorequire:
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
|
-
date: 2023-07-
|
17
|
+
date: 2023-07-24 00:00:00.000000000 Z
|
18
18
|
dependencies:
|
19
19
|
- !ruby/object:Gem::Dependency
|
20
20
|
name: ffi-geos
|
@@ -321,59 +321,17 @@ files:
|
|
321
321
|
- lib/search_solr_tools/config/environments.rb
|
322
322
|
- lib/search_solr_tools/config/environments.yaml
|
323
323
|
- lib/search_solr_tools/errors/harvest_error.rb
|
324
|
-
- lib/search_solr_tools/harvesters/adc.rb
|
325
|
-
- lib/search_solr_tools/harvesters/ade_auto_suggest.rb
|
326
324
|
- lib/search_solr_tools/harvesters/auto_suggest.rb
|
327
325
|
- lib/search_solr_tools/harvesters/base.rb
|
328
|
-
- lib/search_solr_tools/harvesters/bcodmo.rb
|
329
|
-
- lib/search_solr_tools/harvesters/data_one.rb
|
330
|
-
- lib/search_solr_tools/harvesters/echo.rb
|
331
|
-
- lib/search_solr_tools/harvesters/eol.rb
|
332
|
-
- lib/search_solr_tools/harvesters/gtnp.rb
|
333
|
-
- lib/search_solr_tools/harvesters/ices.rb
|
334
|
-
- lib/search_solr_tools/harvesters/ncdc_paleo.rb
|
335
|
-
- lib/search_solr_tools/harvesters/nmi.rb
|
336
|
-
- lib/search_solr_tools/harvesters/nodc.rb
|
337
326
|
- lib/search_solr_tools/harvesters/nsidc_auto_suggest.rb
|
338
327
|
- lib/search_solr_tools/harvesters/nsidc_json.rb
|
339
|
-
- lib/search_solr_tools/harvesters/oai.rb
|
340
|
-
- lib/search_solr_tools/harvesters/pdc.rb
|
341
|
-
- lib/search_solr_tools/harvesters/r2r.rb
|
342
|
-
- lib/search_solr_tools/harvesters/rda.rb
|
343
|
-
- lib/search_solr_tools/harvesters/tdar.rb
|
344
|
-
- lib/search_solr_tools/harvesters/usgs.rb
|
345
328
|
- lib/search_solr_tools/helpers/bounding_box_util.rb
|
346
|
-
- lib/search_solr_tools/helpers/csw_iso_query_builder.rb
|
347
|
-
- lib/search_solr_tools/helpers/data_one_format.rb
|
348
329
|
- lib/search_solr_tools/helpers/facet_configuration.rb
|
349
330
|
- lib/search_solr_tools/helpers/harvest_status.rb
|
350
331
|
- lib/search_solr_tools/helpers/iso_namespaces.rb
|
351
|
-
- lib/search_solr_tools/helpers/iso_to_solr.rb
|
352
|
-
- lib/search_solr_tools/helpers/iso_to_solr_format.rb
|
353
|
-
- lib/search_solr_tools/helpers/ncdc_paleo_format.rb
|
354
|
-
- lib/search_solr_tools/helpers/query_builder.rb
|
355
|
-
- lib/search_solr_tools/helpers/r2r_format.rb
|
356
|
-
- lib/search_solr_tools/helpers/selectors.rb
|
357
332
|
- lib/search_solr_tools/helpers/solr_format.rb
|
358
|
-
- lib/search_solr_tools/helpers/tdar_format.rb
|
359
333
|
- lib/search_solr_tools/helpers/translate_spatial_coverage.rb
|
360
334
|
- lib/search_solr_tools/helpers/translate_temporal_coverage.rb
|
361
|
-
- lib/search_solr_tools/helpers/usgs_format.rb
|
362
|
-
- lib/search_solr_tools/selectors/adc.rb
|
363
|
-
- lib/search_solr_tools/selectors/data_one.rb
|
364
|
-
- lib/search_solr_tools/selectors/echo_iso.rb
|
365
|
-
- lib/search_solr_tools/selectors/ices_iso.rb
|
366
|
-
- lib/search_solr_tools/selectors/ncdc_paleo.rb
|
367
|
-
- lib/search_solr_tools/selectors/nmi.rb
|
368
|
-
- lib/search_solr_tools/selectors/nodc_iso.rb
|
369
|
-
- lib/search_solr_tools/selectors/pdc_iso.rb
|
370
|
-
- lib/search_solr_tools/selectors/r2r.rb
|
371
|
-
- lib/search_solr_tools/selectors/rda.rb
|
372
|
-
- lib/search_solr_tools/selectors/tdar_opensearch.rb
|
373
|
-
- lib/search_solr_tools/selectors/usgs_iso.rb
|
374
|
-
- lib/search_solr_tools/translators/bcodmo_json.rb
|
375
|
-
- lib/search_solr_tools/translators/eol_to_solr.rb
|
376
|
-
- lib/search_solr_tools/translators/gtnp_json.rb
|
377
335
|
- lib/search_solr_tools/translators/nsidc_json.rb
|
378
336
|
- lib/search_solr_tools/version.rb
|
379
337
|
- search_solr_tools.gemspec
|
@@ -396,7 +354,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
396
354
|
- !ruby/object:Gem::Version
|
397
355
|
version: '0'
|
398
356
|
requirements: []
|
399
|
-
rubygems_version: 3.4.
|
357
|
+
rubygems_version: 3.4.17
|
400
358
|
signing_key:
|
401
359
|
specification_version: 4
|
402
360
|
summary: Tools to harvest and manage various scientific dataset feeds in a Solr instance.
|
@@ -1,49 +0,0 @@
|
|
1
|
-
require_relative 'base'
|
2
|
-
|
3
|
-
module SearchSolrTools
|
4
|
-
module Harvesters
|
5
|
-
class Adc < Base
|
6
|
-
def initialize(env = 'development', die_on_failure = false)
|
7
|
-
super
|
8
|
-
@page_size = 250
|
9
|
-
@translator = Helpers::IsoToSolr.new :adc
|
10
|
-
end
|
11
|
-
|
12
|
-
def harvest_and_delete
|
13
|
-
puts "Running harvest of adc catalog from #{metadata_url}"
|
14
|
-
super(method(:harvest_adc_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:ADC][:long_name]}\"")
|
15
|
-
end
|
16
|
-
|
17
|
-
def harvest_adc_into_solr
|
18
|
-
start = 0
|
19
|
-
while (entries = get_results_from_adc(start)) && (entries.length > 0)
|
20
|
-
begin
|
21
|
-
insert_solr_docs(get_docs_with_translated_entries_from_adc(entries))
|
22
|
-
rescue => e
|
23
|
-
puts "ERROR: #{e}\n\n"
|
24
|
-
raise e if @die_on_failure
|
25
|
-
end
|
26
|
-
start += @page_size
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
def get_results_from_adc(start)
|
31
|
-
get_results(build_request(start, @page_size), './response/result/doc')
|
32
|
-
end
|
33
|
-
|
34
|
-
def metadata_url
|
35
|
-
SolrEnvironments[@environment][:adc_url]
|
36
|
-
end
|
37
|
-
|
38
|
-
def get_docs_with_translated_entries_from_adc(entries)
|
39
|
-
entries.map do |e|
|
40
|
-
create_new_solr_add_doc_with_child(@translator.translate(e).root)
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
def build_request(start = 0, max_records = 100)
|
45
|
-
"#{metadata_url}&start=#{start}&rows=#{max_records}"
|
46
|
-
end
|
47
|
-
end
|
48
|
-
end
|
49
|
-
end
|
@@ -1,46 +0,0 @@
|
|
1
|
-
require_relative 'auto_suggest'
|
2
|
-
|
3
|
-
module SearchSolrTools
|
4
|
-
module Harvesters
|
5
|
-
class AdeAutoSuggest < AutoSuggest
|
6
|
-
def harvest_and_delete
|
7
|
-
puts 'Building auto-suggest indexes for ADE'
|
8
|
-
super(method(:harvest), 'source:"ADE"', @env_settings[:auto_suggest_collection_name])
|
9
|
-
end
|
10
|
-
|
11
|
-
def harvest
|
12
|
-
url = "#{solr_url}/#{@env_settings[:collection_name]}/select?q=*%3A*&fq=source%3AADE&fq=spatial:[45.0,-180.0+TO+90.0,180.0]&rows=0&wt=json&indent=true&facet=true&facet.mincount=1&facet.sort=count&facet.limit=-1"
|
13
|
-
super url, fields
|
14
|
-
end
|
15
|
-
|
16
|
-
def fields
|
17
|
-
{
|
18
|
-
'full_keywords_and_parameters' => { weight: 2, source: 'ADE', creator: method(:keyword_creator) },
|
19
|
-
'full_authors' => { weight: 1, source: 'ADE', creator: method(:author_creator) }
|
20
|
-
}
|
21
|
-
end
|
22
|
-
|
23
|
-
def split_creator(value, count, field_weight, source, split_regex)
|
24
|
-
add_docs = []
|
25
|
-
value.downcase.split(split_regex).each do |v|
|
26
|
-
v = v.strip.chomp('/')
|
27
|
-
add_docs.concat(ade_length_limit_creator(v, count, field_weight, source)) unless v.nil? || v.empty?
|
28
|
-
end
|
29
|
-
add_docs
|
30
|
-
end
|
31
|
-
|
32
|
-
def keyword_creator(value, count, field_weight, source)
|
33
|
-
split_creator value, count, field_weight, source, %r{/ [\/ \>]+ /}
|
34
|
-
end
|
35
|
-
|
36
|
-
def author_creator(value, count, field_weight, source)
|
37
|
-
split_creator value, count, field_weight, source, %r{/;/}
|
38
|
-
end
|
39
|
-
|
40
|
-
def ade_length_limit_creator(value, count, field_weight, source)
|
41
|
-
return [] if value.length > 80
|
42
|
-
standard_add_creator value, count, field_weight, source
|
43
|
-
end
|
44
|
-
end
|
45
|
-
end
|
46
|
-
end
|