search_solr_tools 6.1.0 → 6.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -2
- data/bin/search_solr_tools +5 -17
- data/lib/search_solr_tools/config/environments.rb +3 -1
- data/lib/search_solr_tools/config/environments.yaml +0 -32
- data/lib/search_solr_tools/errors/harvest_error.rb +44 -31
- data/lib/search_solr_tools/harvesters/auto_suggest.rb +5 -3
- data/lib/search_solr_tools/harvesters/base.rb +21 -20
- data/lib/search_solr_tools/harvesters/nsidc_auto_suggest.rb +7 -5
- data/lib/search_solr_tools/harvesters/nsidc_json.rb +9 -8
- data/lib/search_solr_tools/helpers/bounding_box_util.rb +8 -8
- data/lib/search_solr_tools/helpers/facet_configuration.rb +3 -1
- data/lib/search_solr_tools/helpers/harvest_status.rb +10 -8
- data/lib/search_solr_tools/helpers/iso_namespaces.rb +3 -1
- data/lib/search_solr_tools/helpers/solr_format.rb +25 -45
- data/lib/search_solr_tools/helpers/translate_spatial_coverage.rb +13 -10
- data/lib/search_solr_tools/helpers/translate_temporal_coverage.rb +2 -0
- data/lib/search_solr_tools/translators/nsidc_json.rb +48 -44
- data/lib/search_solr_tools/version.rb +3 -1
- data/lib/search_solr_tools.rb +3 -2
- metadata +3 -45
- data/lib/search_solr_tools/harvesters/adc.rb +0 -49
- data/lib/search_solr_tools/harvesters/ade_auto_suggest.rb +0 -46
- data/lib/search_solr_tools/harvesters/bcodmo.rb +0 -64
- data/lib/search_solr_tools/harvesters/data_one.rb +0 -49
- data/lib/search_solr_tools/harvesters/echo.rb +0 -52
- data/lib/search_solr_tools/harvesters/eol.rb +0 -51
- data/lib/search_solr_tools/harvesters/gtnp.rb +0 -67
- data/lib/search_solr_tools/harvesters/ices.rb +0 -58
- data/lib/search_solr_tools/harvesters/ncdc_paleo.rb +0 -62
- data/lib/search_solr_tools/harvesters/nmi.rb +0 -34
- data/lib/search_solr_tools/harvesters/nodc.rb +0 -75
- data/lib/search_solr_tools/harvesters/oai.rb +0 -62
- data/lib/search_solr_tools/harvesters/pdc.rb +0 -40
- data/lib/search_solr_tools/harvesters/r2r.rb +0 -61
- data/lib/search_solr_tools/harvesters/rda.rb +0 -35
- data/lib/search_solr_tools/harvesters/tdar.rb +0 -71
- data/lib/search_solr_tools/harvesters/usgs.rb +0 -76
- data/lib/search_solr_tools/helpers/csw_iso_query_builder.rb +0 -29
- data/lib/search_solr_tools/helpers/data_one_format.rb +0 -74
- data/lib/search_solr_tools/helpers/iso_to_solr.rb +0 -97
- data/lib/search_solr_tools/helpers/iso_to_solr_format.rb +0 -197
- data/lib/search_solr_tools/helpers/ncdc_paleo_format.rb +0 -61
- data/lib/search_solr_tools/helpers/query_builder.rb +0 -13
- data/lib/search_solr_tools/helpers/r2r_format.rb +0 -25
- data/lib/search_solr_tools/helpers/selectors.rb +0 -22
- data/lib/search_solr_tools/helpers/tdar_format.rb +0 -70
- data/lib/search_solr_tools/helpers/usgs_format.rb +0 -50
- data/lib/search_solr_tools/selectors/adc.rb +0 -96
- data/lib/search_solr_tools/selectors/data_one.rb +0 -96
- data/lib/search_solr_tools/selectors/echo_iso.rb +0 -112
- data/lib/search_solr_tools/selectors/ices_iso.rb +0 -108
- data/lib/search_solr_tools/selectors/ncdc_paleo.rb +0 -90
- data/lib/search_solr_tools/selectors/nmi.rb +0 -107
- data/lib/search_solr_tools/selectors/nodc_iso.rb +0 -108
- data/lib/search_solr_tools/selectors/pdc_iso.rb +0 -109
- data/lib/search_solr_tools/selectors/r2r.rb +0 -115
- data/lib/search_solr_tools/selectors/rda.rb +0 -107
- data/lib/search_solr_tools/selectors/tdar_opensearch.rb +0 -91
- data/lib/search_solr_tools/selectors/usgs_iso.rb +0 -107
- data/lib/search_solr_tools/translators/bcodmo_json.rb +0 -89
- data/lib/search_solr_tools/translators/eol_to_solr.rb +0 -84
- data/lib/search_solr_tools/translators/gtnp_json.rb +0 -59
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'date'
|
2
4
|
require 'iso8601'
|
3
5
|
|
@@ -7,30 +9,14 @@ require_relative 'facet_configuration'
|
|
7
9
|
module SearchSolrTools
|
8
10
|
module Helpers
|
9
11
|
# Methods for generating formatted values that can be indexed by SOLR
|
10
|
-
# rubocop:disable Metrics/ModuleLength
|
11
12
|
module SolrFormat
|
12
13
|
DATA_CENTER_NAMES = {
|
13
|
-
|
14
|
-
|
15
|
-
DATA_ONE: { short_name: 'DataONE', long_name: 'DataONE' },
|
16
|
-
ECHO: { short_name: 'NASA ECHO', long_name: 'NASA Earth Observing System (EOS) Clearing House (ECHO)' },
|
17
|
-
EOL: { short_name: 'UCAR NCAR EOL', long_name: 'UCAR NCAR - Earth Observing Laboratory' },
|
18
|
-
GTNP: { short_name: 'GTN-P', long_name: 'Global Terrestrial Network for Permafrost' },
|
19
|
-
ICES: { short_name: 'ICES', long_name: 'International Council for the Exploration of the Sea' },
|
20
|
-
NCDC_PALEO: { short_name: 'NOAA WDS Paleo', long_name: 'NOAA’s National Centers for Environmental Information, World Data Service for Paleoclimatology' },
|
21
|
-
NMI: { short_name: 'Met.no', long_name: 'Norwegian Meteorological Institute' },
|
22
|
-
NODC: { short_name: 'NOAA NODC', long_name: 'NOAA National Oceanographic Data Center' },
|
23
|
-
NSIDC: { short_name: 'NSIDC', long_name: 'National Snow and Ice Data Center' },
|
24
|
-
PDC: { short_name: 'PDC', long_name: 'Polar Data Catalogue' },
|
25
|
-
R2R: { short_name: 'R2R', long_name: 'Rolling Deck to Repository' },
|
26
|
-
RDA: { short_name: 'UCAR NCAR RDA', long_name: 'UCAR NCAR Research Data Archive' },
|
27
|
-
TDAR: { short_name: 'tDAR', long_name: 'tDAR: The Digital Archaeological Record' },
|
28
|
-
USGS: { short_name: 'USGS ScienceBase', long_name: 'U.S. Geological Survey ScienceBase' }
|
29
|
-
}
|
14
|
+
NSIDC: { short_name: 'NSIDC', long_name: 'National Snow and Ice Data Center' }
|
15
|
+
}.freeze
|
30
16
|
|
31
17
|
NOT_SPECIFIED = 'Not specified'
|
32
18
|
|
33
|
-
TEMPORAL_RESOLUTION_FACET_VALUES = %w
|
19
|
+
TEMPORAL_RESOLUTION_FACET_VALUES = %w[Subhourly Hourly Subdaily Daily Weekly Submonthly Monthly Subyearly Yearly Multiyearly].freeze
|
34
20
|
SUBHOURLY_INDEX = 0
|
35
21
|
HOURLY_INDEX = 1
|
36
22
|
SUBDAILY_INDEX = 2
|
@@ -42,7 +28,7 @@ module SearchSolrTools
|
|
42
28
|
YEARLY_INDEX = 8
|
43
29
|
MULTIYEARLY_INDEX = 9
|
44
30
|
|
45
|
-
SPATIAL_RESOLUTION_FACET_VALUES = ['0 - 500 m', '501 m - 1 km', '2 - 5 km', '6 - 15 km', '16 - 30 km', '>30 km']
|
31
|
+
SPATIAL_RESOLUTION_FACET_VALUES = ['0 - 500 m', '501 m - 1 km', '2 - 5 km', '6 - 15 km', '16 - 30 km', '>30 km'].freeze
|
46
32
|
SPATIAL_0_500_INDEX = 0
|
47
33
|
SPATIAL_501_1_INDEX = 1
|
48
34
|
SPATIAL_2_5_INDEX = 2
|
@@ -59,7 +45,7 @@ module SearchSolrTools
|
|
59
45
|
end
|
60
46
|
|
61
47
|
def self.temporal_display_str(date_range)
|
62
|
-
temporal_str =
|
48
|
+
temporal_str = (date_range[:start]).to_s
|
63
49
|
temporal_str += ",#{date_range[:end]}" unless date_range[:end].nil?
|
64
50
|
temporal_str
|
65
51
|
end
|
@@ -82,6 +68,7 @@ module SearchSolrTools
|
|
82
68
|
|
83
69
|
def self.get_temporal_duration_facet(duration)
|
84
70
|
return NOT_SPECIFIED if duration.nil?
|
71
|
+
|
85
72
|
years = duration.to_i / 365
|
86
73
|
temporal_duration_range(years)
|
87
74
|
end
|
@@ -101,31 +88,28 @@ module SearchSolrTools
|
|
101
88
|
def self.facet_binning(type, format_string)
|
102
89
|
binned_facet = bin(FacetConfiguration.get_facet_bin(type), format_string)
|
103
90
|
if binned_facet.nil?
|
104
|
-
|
91
|
+
format_string
|
105
92
|
elsif binned_facet.eql?('exclude')
|
106
|
-
|
93
|
+
nil
|
107
94
|
else
|
108
|
-
|
95
|
+
binned_facet
|
109
96
|
end
|
110
|
-
|
111
|
-
nil
|
112
97
|
end
|
113
98
|
|
114
99
|
def self.parameter_binning(parameter_string)
|
115
100
|
binned_parameter = bin(FacetConfiguration.get_facet_bin('parameter'), parameter_string)
|
116
101
|
# use variable_level_1 if no mapping exists
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
return binned_parameter
|
122
|
-
end
|
102
|
+
return binned_parameter unless binned_parameter.nil?
|
103
|
+
|
104
|
+
parts = parameter_string.split '>'
|
105
|
+
return parts[3].strip if parts.length >= 4
|
123
106
|
|
124
107
|
nil
|
125
108
|
end
|
126
109
|
|
127
110
|
def self.resolution_value(resolution, find_index_method, resolution_values)
|
128
|
-
return NOT_SPECIFIED if
|
111
|
+
return NOT_SPECIFIED if resolution_not_specified? resolution
|
112
|
+
|
129
113
|
if resolution['type'] == 'single'
|
130
114
|
i = send(find_index_method, resolution['resolution'])
|
131
115
|
return resolution_values[i]
|
@@ -135,12 +119,12 @@ module SearchSolrTools
|
|
135
119
|
j = send(find_index_method, resolution['max_resolution'])
|
136
120
|
return resolution_values[i..j]
|
137
121
|
end
|
138
|
-
|
122
|
+
raise "Invalid resolution #{resolution['type']}"
|
139
123
|
end
|
140
124
|
|
141
125
|
def self.resolution_not_specified?(resolution)
|
142
126
|
return true if resolution.to_s.empty?
|
143
|
-
return true unless %w
|
127
|
+
return true unless %w[single range].include? resolution['type']
|
144
128
|
return true if resolution['type'] == 'single' && resolution['resolution'].to_s.empty?
|
145
129
|
return true if resolution['type'] == 'range' && resolution['min_resolution'].to_s.empty?
|
146
130
|
end
|
@@ -155,6 +139,7 @@ module SearchSolrTools
|
|
155
139
|
else
|
156
140
|
facet = 'Between 1 and 170 degrees of latitude change | Regional'
|
157
141
|
end
|
142
|
+
|
158
143
|
facet
|
159
144
|
end
|
160
145
|
|
@@ -167,8 +152,6 @@ module SearchSolrTools
|
|
167
152
|
"#{d.iso8601[0..-7]}Z" unless d.nil?
|
168
153
|
end
|
169
154
|
|
170
|
-
private
|
171
|
-
|
172
155
|
MIN_DATE = '00010101'
|
173
156
|
MAX_DATE = Time.now.strftime('%Y%m%d')
|
174
157
|
|
@@ -181,7 +164,6 @@ module SearchSolrTools
|
|
181
164
|
nil
|
182
165
|
end
|
183
166
|
|
184
|
-
# rubocop:disable CyclomaticComplexity
|
185
167
|
def self.find_index_for_single_temporal_resolution_value(string_duration)
|
186
168
|
iso8601_duration = ISO8601::Duration.new(string_duration)
|
187
169
|
|
@@ -201,10 +183,9 @@ module SearchSolrTools
|
|
201
183
|
MULTIYEARLY_INDEX
|
202
184
|
end
|
203
185
|
end
|
204
|
-
# rubocop:enable CyclomaticComplexity
|
205
186
|
|
206
187
|
def self.find_index_for_single_spatial_resolution_value(string_duration)
|
207
|
-
value, units = string_duration.split
|
188
|
+
value, units = string_duration.split
|
208
189
|
|
209
190
|
if units == 'deg'
|
210
191
|
spatial_resolution_index_degrees(value)
|
@@ -249,11 +230,10 @@ module SearchSolrTools
|
|
249
230
|
end
|
250
231
|
|
251
232
|
def self.date?(date)
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
valid_date
|
233
|
+
return false unless date.is_a? String
|
234
|
+
|
235
|
+
d = DateTime.parse(date.strip) rescue false
|
236
|
+
DateTime.valid_date?(d.year, d.mon, d.day) unless d.eql?(false)
|
257
237
|
end
|
258
238
|
|
259
239
|
def self.format_date_for_index(date_str, default)
|
@@ -1,7 +1,8 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'rgeo/geo_json'
|
2
4
|
|
3
5
|
require_relative 'bounding_box_util'
|
4
|
-
require_relative 'iso_to_solr_format'
|
5
6
|
|
6
7
|
module SearchSolrTools
|
7
8
|
module Helpers
|
@@ -43,7 +44,7 @@ module SearchSolrTools
|
|
43
44
|
|
44
45
|
def self.geojson_to_spatial_area(spatial_coverage_geom)
|
45
46
|
spatial_areas = spatial_coverage_geom.map do |geo_json|
|
46
|
-
if %w
|
47
|
+
if %w[point].include?(geo_json.geometry_type.to_s.downcase)
|
47
48
|
0.0
|
48
49
|
else
|
49
50
|
bbox = RGeo::Cartesian::BoundingBox.create_from_geometry(geo_json)
|
@@ -51,11 +52,13 @@ module SearchSolrTools
|
|
51
52
|
end
|
52
53
|
end
|
53
54
|
return nil if spatial_areas.empty?
|
54
|
-
|
55
|
+
|
56
|
+
spatial_areas.max
|
55
57
|
end
|
56
58
|
|
57
59
|
def self.geojson_to_global_facet(spatial_coverage_geom)
|
58
60
|
return nil if spatial_coverage_geom.nil?
|
61
|
+
|
59
62
|
spatial_coverage_geom.each do |geo_json|
|
60
63
|
bbox_hash = BoundingBoxUtil.bounding_box_hash_from_geo_json(geo_json)
|
61
64
|
return 'Show Global Only' if BoundingBoxUtil.box_global?(bbox_hash)
|
@@ -64,13 +67,13 @@ module SearchSolrTools
|
|
64
67
|
end
|
65
68
|
|
66
69
|
def self.geojson_to_spatial_scope_facet(spatial_coverage_geom)
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
end
|
70
|
+
return if spatial_coverage_geom.nil?
|
71
|
+
|
72
|
+
spatial_coverage_geom.map do |geo_json|
|
73
|
+
bbox_hash = BoundingBoxUtil.bounding_box_hash_from_geo_json(geo_json)
|
74
|
+
scope = SolrFormat.get_spatial_scope_facet_with_bounding_box(bbox_hash)
|
75
|
+
scope unless scope.nil?
|
76
|
+
end.uniq
|
74
77
|
end
|
75
78
|
end
|
76
79
|
end
|
@@ -1,4 +1,5 @@
|
|
1
|
-
#
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
2
3
|
require 'rgeo/geo_json'
|
3
4
|
|
4
5
|
require 'search_solr_tools'
|
@@ -10,50 +11,50 @@ module SearchSolrTools
|
|
10
11
|
module Translators
|
11
12
|
# Translates NSIDC JSON format to Solr JSON add format
|
12
13
|
class NsidcJsonToSolr
|
13
|
-
PARAMETER_PARTS = %w
|
14
|
+
PARAMETER_PARTS = %w[category topic term variableLevel1 variableLevel2 variableLevel3 detailedVariable].freeze
|
14
15
|
|
15
16
|
# rubocop:disable Metrics/MethodLength
|
16
17
|
# rubocop:disable Metrics/AbcSize
|
17
18
|
def translate(json_doc)
|
18
|
-
copy_keys = %w
|
19
|
+
copy_keys = %w[title summary keywords brokered]
|
19
20
|
temporal_coverage_values = Helpers::TranslateTemporalCoverage.translate_coverages json_doc['temporalCoverages']
|
20
21
|
spatial_coverages = convert_spatial_coverages(json_doc['spatialCoverages'])
|
21
22
|
|
22
23
|
solr_add_hash = json_doc.select { |k, _v| copy_keys.include?(k) }
|
23
24
|
solr_add_hash.merge!(
|
24
|
-
'authoritative_id'
|
25
|
-
'dataset_version'
|
26
|
-
'data_centers'
|
27
|
-
'facet_data_center'
|
28
|
-
'authors'
|
29
|
-
'topics'
|
30
|
-
'parameters'
|
31
|
-
'full_parameters'
|
32
|
-
'facet_parameter'
|
33
|
-
'platforms'
|
34
|
-
'sensors'
|
35
|
-
'facet_sensor'
|
36
|
-
'published_date'
|
37
|
-
'spatial_coverages'
|
38
|
-
'spatial'
|
39
|
-
'spatial_area'
|
40
|
-
'facet_spatial_coverage'
|
41
|
-
'facet_spatial_scope'
|
42
|
-
'temporal_coverages'
|
43
|
-
'temporal_duration'
|
44
|
-
'temporal'
|
45
|
-
'facet_temporal_duration'
|
46
|
-
'last_revision_date'
|
47
|
-
'dataset_url'
|
48
|
-
'distribution_formats'
|
49
|
-
'facet_format'
|
50
|
-
'source'
|
51
|
-
'popularity'
|
52
|
-
'data_access_urls'
|
53
|
-
'facet_sponsored_program'
|
25
|
+
'authoritative_id' => json_doc['authoritativeId'],
|
26
|
+
'dataset_version' => json_doc['majorVersion']['version'],
|
27
|
+
'data_centers' => Helpers::SolrFormat::DATA_CENTER_NAMES[:NSIDC][:long_name],
|
28
|
+
'facet_data_center' => "#{Helpers::SolrFormat::DATA_CENTER_NAMES[:NSIDC][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:NSIDC][:short_name]}",
|
29
|
+
'authors' => translate_personnel_and_creators_to_authors(json_doc['personnel'], generate_data_citation_creators(json_doc['dataCitation'])),
|
30
|
+
'topics' => translate_iso_topic_categories(json_doc['isoTopicCategories']),
|
31
|
+
'parameters' => translate_parameters(json_doc['parameters']),
|
32
|
+
'full_parameters' => translate_json_string(json_doc['parameters'], PARAMETER_PARTS),
|
33
|
+
'facet_parameter' => translate_parameters_to_facet_parameters(json_doc['parameters']),
|
34
|
+
'platforms' => translate_json_string(json_doc['platforms']),
|
35
|
+
'sensors' => translate_json_string(json_doc['instruments']),
|
36
|
+
'facet_sensor' => translate_sensor_to_facet_sensor(json_doc['instruments']),
|
37
|
+
'published_date' => (Helpers::SolrFormat.date_str json_doc['releaseDate']),
|
38
|
+
'spatial_coverages' => Helpers::TranslateSpatialCoverage.geojson_to_spatial_display_str(spatial_coverages),
|
39
|
+
'spatial' => Helpers::TranslateSpatialCoverage.geojson_to_spatial_index_str(spatial_coverages),
|
40
|
+
'spatial_area' => Helpers::TranslateSpatialCoverage.geojson_to_spatial_area(spatial_coverages),
|
41
|
+
'facet_spatial_coverage' => Helpers::TranslateSpatialCoverage.geojson_to_global_facet(spatial_coverages),
|
42
|
+
'facet_spatial_scope' => Helpers::TranslateSpatialCoverage.geojson_to_spatial_scope_facet(spatial_coverages),
|
43
|
+
'temporal_coverages' => temporal_coverage_values['temporal_coverages'],
|
44
|
+
'temporal_duration' => temporal_coverage_values['temporal_duration'],
|
45
|
+
'temporal' => temporal_coverage_values['temporal'],
|
46
|
+
'facet_temporal_duration' => temporal_coverage_values['facet_temporal_duration'],
|
47
|
+
'last_revision_date' => (Helpers::SolrFormat.date_str json_doc['lastRevisionDate']),
|
48
|
+
'dataset_url' => json_doc['datasetUrl'],
|
49
|
+
'distribution_formats' => json_doc['distributionFormats'],
|
50
|
+
'facet_format' => json_doc['distributionFormats'].empty? ? [Helpers::SolrFormat::NOT_SPECIFIED] : translate_format_to_facet_format(json_doc['distributionFormats']),
|
51
|
+
'source' => %w[NSIDC ADE],
|
52
|
+
'popularity' => json_doc['popularity'],
|
53
|
+
'data_access_urls' => translate_data_access_urls(json_doc['dataAccessLinks']),
|
54
|
+
'facet_sponsored_program' => translate_short_long_names_to_facet_value(json_doc['internalDataCenters']),
|
54
55
|
'facet_temporal_resolution' => translate_temporal_resolution_facet_values(json_doc['parameters']),
|
55
|
-
'facet_spatial_resolution'
|
56
|
-
'sponsored_programs'
|
56
|
+
'facet_spatial_resolution' => translate_spatial_resolution_facet_values(json_doc['parameters']),
|
57
|
+
'sponsored_programs' => translate_internal_datacenters(json_doc['internalDataCenters'])
|
57
58
|
)
|
58
59
|
end
|
59
60
|
# rubocop:enable Metrics/MethodLength
|
@@ -70,13 +71,14 @@ module SearchSolrTools
|
|
70
71
|
def translate_sensor_to_facet_sensor(json)
|
71
72
|
facet_values = []
|
72
73
|
return facet_values if json.nil?
|
74
|
+
|
73
75
|
json.each do |json_entry|
|
74
76
|
sensor_bin = Helpers::SolrFormat.facet_binning('sensor', json_entry['shortName'].to_s)
|
75
|
-
if sensor_bin.eql? json_entry['shortName']
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
77
|
+
facet_values << if sensor_bin.eql? json_entry['shortName']
|
78
|
+
"#{json_entry['longName']} | #{json_entry['shortName']}"
|
79
|
+
else
|
80
|
+
" | #{sensor_bin}"
|
81
|
+
end
|
80
82
|
end
|
81
83
|
facet_values
|
82
84
|
end
|
@@ -100,12 +102,13 @@ module SearchSolrTools
|
|
100
102
|
end
|
101
103
|
|
102
104
|
def translate_iso_topic_categories(iso_topic_categories_json)
|
103
|
-
iso_topic_categories_json
|
105
|
+
iso_topic_categories_json&.map { |t| t['name'] }
|
104
106
|
end
|
105
107
|
|
106
108
|
def translate_data_access_urls(json)
|
107
109
|
values = []
|
108
110
|
return values if json.nil?
|
111
|
+
|
109
112
|
json.each do |json_entry|
|
110
113
|
link_display = json_entry['displayText'].nil? ? '' : json_entry['displayText']
|
111
114
|
link_type = json_entry['type'].nil? ? '' : json_entry['type']
|
@@ -120,6 +123,7 @@ module SearchSolrTools
|
|
120
123
|
def translate_internal_datacenters(json)
|
121
124
|
values = []
|
122
125
|
return values if json.nil?
|
126
|
+
|
123
127
|
json.each do |json_entry|
|
124
128
|
short_name = json_entry['shortName'].nil? ? '' : json_entry['shortName']
|
125
129
|
values << short_name
|
@@ -130,6 +134,7 @@ module SearchSolrTools
|
|
130
134
|
def translate_short_long_names_to_facet_value(json)
|
131
135
|
facet_values = []
|
132
136
|
return facet_values if json.nil?
|
137
|
+
|
133
138
|
json.each do |json_entry|
|
134
139
|
long_name = json_entry['longName'].nil? ? '' : json_entry['longName']
|
135
140
|
short_name = json_entry['shortName'].nil? ? '' : json_entry['shortName']
|
@@ -169,6 +174,7 @@ module SearchSolrTools
|
|
169
174
|
def translate_parameters_to_facet_parameters(parameters_json)
|
170
175
|
parameters_strings = translate_json_string(parameters_json, PARAMETER_PARTS)
|
171
176
|
return [] if parameters_strings.nil?
|
177
|
+
|
172
178
|
facet_params = []
|
173
179
|
parameters_strings.each do |str|
|
174
180
|
facet_params << Helpers::SolrFormat.parameter_binning(str)
|
@@ -199,8 +205,7 @@ module SearchSolrTools
|
|
199
205
|
end
|
200
206
|
|
201
207
|
def generate_data_citation_creators(data_citation)
|
202
|
-
data_citation.nil? ?
|
203
|
-
creators
|
208
|
+
data_citation.nil? ? [] : data_citation['creators']
|
204
209
|
end
|
205
210
|
|
206
211
|
def generate_part_array(json, limit_values = nil)
|
@@ -214,6 +219,5 @@ module SearchSolrTools
|
|
214
219
|
parts
|
215
220
|
end
|
216
221
|
end
|
217
|
-
# rubocop:enable Metrics/ClassLength
|
218
222
|
end
|
219
223
|
end
|
data/lib/search_solr_tools.rb
CHANGED
@@ -1,10 +1,11 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'search_solr_tools/config/environments'
|
2
4
|
require_relative 'search_solr_tools/version'
|
3
5
|
|
4
|
-
require_relative 'search_solr_tools/helpers/selectors'
|
5
6
|
require_relative 'search_solr_tools/helpers/harvest_status'
|
6
7
|
require_relative 'search_solr_tools/errors/harvest_error'
|
7
8
|
|
8
|
-
%w
|
9
|
+
%w[harvesters translators].each do |subdir|
|
9
10
|
Dir[File.join(__dir__, 'search_solr_tools', subdir, '*.rb')].each { |file| require file }
|
10
11
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: search_solr_tools
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 6.
|
4
|
+
version: 6.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Chris Chalstrom
|
@@ -14,7 +14,7 @@ authors:
|
|
14
14
|
autorequire:
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
|
-
date: 2023-07-
|
17
|
+
date: 2023-07-24 00:00:00.000000000 Z
|
18
18
|
dependencies:
|
19
19
|
- !ruby/object:Gem::Dependency
|
20
20
|
name: ffi-geos
|
@@ -321,59 +321,17 @@ files:
|
|
321
321
|
- lib/search_solr_tools/config/environments.rb
|
322
322
|
- lib/search_solr_tools/config/environments.yaml
|
323
323
|
- lib/search_solr_tools/errors/harvest_error.rb
|
324
|
-
- lib/search_solr_tools/harvesters/adc.rb
|
325
|
-
- lib/search_solr_tools/harvesters/ade_auto_suggest.rb
|
326
324
|
- lib/search_solr_tools/harvesters/auto_suggest.rb
|
327
325
|
- lib/search_solr_tools/harvesters/base.rb
|
328
|
-
- lib/search_solr_tools/harvesters/bcodmo.rb
|
329
|
-
- lib/search_solr_tools/harvesters/data_one.rb
|
330
|
-
- lib/search_solr_tools/harvesters/echo.rb
|
331
|
-
- lib/search_solr_tools/harvesters/eol.rb
|
332
|
-
- lib/search_solr_tools/harvesters/gtnp.rb
|
333
|
-
- lib/search_solr_tools/harvesters/ices.rb
|
334
|
-
- lib/search_solr_tools/harvesters/ncdc_paleo.rb
|
335
|
-
- lib/search_solr_tools/harvesters/nmi.rb
|
336
|
-
- lib/search_solr_tools/harvesters/nodc.rb
|
337
326
|
- lib/search_solr_tools/harvesters/nsidc_auto_suggest.rb
|
338
327
|
- lib/search_solr_tools/harvesters/nsidc_json.rb
|
339
|
-
- lib/search_solr_tools/harvesters/oai.rb
|
340
|
-
- lib/search_solr_tools/harvesters/pdc.rb
|
341
|
-
- lib/search_solr_tools/harvesters/r2r.rb
|
342
|
-
- lib/search_solr_tools/harvesters/rda.rb
|
343
|
-
- lib/search_solr_tools/harvesters/tdar.rb
|
344
|
-
- lib/search_solr_tools/harvesters/usgs.rb
|
345
328
|
- lib/search_solr_tools/helpers/bounding_box_util.rb
|
346
|
-
- lib/search_solr_tools/helpers/csw_iso_query_builder.rb
|
347
|
-
- lib/search_solr_tools/helpers/data_one_format.rb
|
348
329
|
- lib/search_solr_tools/helpers/facet_configuration.rb
|
349
330
|
- lib/search_solr_tools/helpers/harvest_status.rb
|
350
331
|
- lib/search_solr_tools/helpers/iso_namespaces.rb
|
351
|
-
- lib/search_solr_tools/helpers/iso_to_solr.rb
|
352
|
-
- lib/search_solr_tools/helpers/iso_to_solr_format.rb
|
353
|
-
- lib/search_solr_tools/helpers/ncdc_paleo_format.rb
|
354
|
-
- lib/search_solr_tools/helpers/query_builder.rb
|
355
|
-
- lib/search_solr_tools/helpers/r2r_format.rb
|
356
|
-
- lib/search_solr_tools/helpers/selectors.rb
|
357
332
|
- lib/search_solr_tools/helpers/solr_format.rb
|
358
|
-
- lib/search_solr_tools/helpers/tdar_format.rb
|
359
333
|
- lib/search_solr_tools/helpers/translate_spatial_coverage.rb
|
360
334
|
- lib/search_solr_tools/helpers/translate_temporal_coverage.rb
|
361
|
-
- lib/search_solr_tools/helpers/usgs_format.rb
|
362
|
-
- lib/search_solr_tools/selectors/adc.rb
|
363
|
-
- lib/search_solr_tools/selectors/data_one.rb
|
364
|
-
- lib/search_solr_tools/selectors/echo_iso.rb
|
365
|
-
- lib/search_solr_tools/selectors/ices_iso.rb
|
366
|
-
- lib/search_solr_tools/selectors/ncdc_paleo.rb
|
367
|
-
- lib/search_solr_tools/selectors/nmi.rb
|
368
|
-
- lib/search_solr_tools/selectors/nodc_iso.rb
|
369
|
-
- lib/search_solr_tools/selectors/pdc_iso.rb
|
370
|
-
- lib/search_solr_tools/selectors/r2r.rb
|
371
|
-
- lib/search_solr_tools/selectors/rda.rb
|
372
|
-
- lib/search_solr_tools/selectors/tdar_opensearch.rb
|
373
|
-
- lib/search_solr_tools/selectors/usgs_iso.rb
|
374
|
-
- lib/search_solr_tools/translators/bcodmo_json.rb
|
375
|
-
- lib/search_solr_tools/translators/eol_to_solr.rb
|
376
|
-
- lib/search_solr_tools/translators/gtnp_json.rb
|
377
335
|
- lib/search_solr_tools/translators/nsidc_json.rb
|
378
336
|
- lib/search_solr_tools/version.rb
|
379
337
|
- search_solr_tools.gemspec
|
@@ -396,7 +354,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
396
354
|
- !ruby/object:Gem::Version
|
397
355
|
version: '0'
|
398
356
|
requirements: []
|
399
|
-
rubygems_version: 3.4.
|
357
|
+
rubygems_version: 3.4.17
|
400
358
|
signing_key:
|
401
359
|
specification_version: 4
|
402
360
|
summary: Tools to harvest and manage various scientific dataset feeds in a Solr instance.
|
@@ -1,49 +0,0 @@
|
|
1
|
-
require_relative 'base'
|
2
|
-
|
3
|
-
module SearchSolrTools
|
4
|
-
module Harvesters
|
5
|
-
class Adc < Base
|
6
|
-
def initialize(env = 'development', die_on_failure = false)
|
7
|
-
super
|
8
|
-
@page_size = 250
|
9
|
-
@translator = Helpers::IsoToSolr.new :adc
|
10
|
-
end
|
11
|
-
|
12
|
-
def harvest_and_delete
|
13
|
-
puts "Running harvest of adc catalog from #{metadata_url}"
|
14
|
-
super(method(:harvest_adc_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:ADC][:long_name]}\"")
|
15
|
-
end
|
16
|
-
|
17
|
-
def harvest_adc_into_solr
|
18
|
-
start = 0
|
19
|
-
while (entries = get_results_from_adc(start)) && (entries.length > 0)
|
20
|
-
begin
|
21
|
-
insert_solr_docs(get_docs_with_translated_entries_from_adc(entries))
|
22
|
-
rescue => e
|
23
|
-
puts "ERROR: #{e}\n\n"
|
24
|
-
raise e if @die_on_failure
|
25
|
-
end
|
26
|
-
start += @page_size
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
def get_results_from_adc(start)
|
31
|
-
get_results(build_request(start, @page_size), './response/result/doc')
|
32
|
-
end
|
33
|
-
|
34
|
-
def metadata_url
|
35
|
-
SolrEnvironments[@environment][:adc_url]
|
36
|
-
end
|
37
|
-
|
38
|
-
def get_docs_with_translated_entries_from_adc(entries)
|
39
|
-
entries.map do |e|
|
40
|
-
create_new_solr_add_doc_with_child(@translator.translate(e).root)
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
def build_request(start = 0, max_records = 100)
|
45
|
-
"#{metadata_url}&start=#{start}&rows=#{max_records}"
|
46
|
-
end
|
47
|
-
end
|
48
|
-
end
|
49
|
-
end
|
@@ -1,46 +0,0 @@
|
|
1
|
-
require_relative 'auto_suggest'
|
2
|
-
|
3
|
-
module SearchSolrTools
|
4
|
-
module Harvesters
|
5
|
-
class AdeAutoSuggest < AutoSuggest
|
6
|
-
def harvest_and_delete
|
7
|
-
puts 'Building auto-suggest indexes for ADE'
|
8
|
-
super(method(:harvest), 'source:"ADE"', @env_settings[:auto_suggest_collection_name])
|
9
|
-
end
|
10
|
-
|
11
|
-
def harvest
|
12
|
-
url = "#{solr_url}/#{@env_settings[:collection_name]}/select?q=*%3A*&fq=source%3AADE&fq=spatial:[45.0,-180.0+TO+90.0,180.0]&rows=0&wt=json&indent=true&facet=true&facet.mincount=1&facet.sort=count&facet.limit=-1"
|
13
|
-
super url, fields
|
14
|
-
end
|
15
|
-
|
16
|
-
def fields
|
17
|
-
{
|
18
|
-
'full_keywords_and_parameters' => { weight: 2, source: 'ADE', creator: method(:keyword_creator) },
|
19
|
-
'full_authors' => { weight: 1, source: 'ADE', creator: method(:author_creator) }
|
20
|
-
}
|
21
|
-
end
|
22
|
-
|
23
|
-
def split_creator(value, count, field_weight, source, split_regex)
|
24
|
-
add_docs = []
|
25
|
-
value.downcase.split(split_regex).each do |v|
|
26
|
-
v = v.strip.chomp('/')
|
27
|
-
add_docs.concat(ade_length_limit_creator(v, count, field_weight, source)) unless v.nil? || v.empty?
|
28
|
-
end
|
29
|
-
add_docs
|
30
|
-
end
|
31
|
-
|
32
|
-
def keyword_creator(value, count, field_weight, source)
|
33
|
-
split_creator value, count, field_weight, source, %r{/ [\/ \>]+ /}
|
34
|
-
end
|
35
|
-
|
36
|
-
def author_creator(value, count, field_weight, source)
|
37
|
-
split_creator value, count, field_weight, source, %r{/;/}
|
38
|
-
end
|
39
|
-
|
40
|
-
def ade_length_limit_creator(value, count, field_weight, source)
|
41
|
-
return [] if value.length > 80
|
42
|
-
standard_add_creator value, count, field_weight, source
|
43
|
-
end
|
44
|
-
end
|
45
|
-
end
|
46
|
-
end
|