search_solr_tools 6.1.0 → 6.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +11 -2
  3. data/bin/search_solr_tools +5 -17
  4. data/lib/search_solr_tools/config/environments.rb +3 -1
  5. data/lib/search_solr_tools/config/environments.yaml +0 -32
  6. data/lib/search_solr_tools/errors/harvest_error.rb +44 -31
  7. data/lib/search_solr_tools/harvesters/auto_suggest.rb +5 -3
  8. data/lib/search_solr_tools/harvesters/base.rb +21 -20
  9. data/lib/search_solr_tools/harvesters/nsidc_auto_suggest.rb +7 -5
  10. data/lib/search_solr_tools/harvesters/nsidc_json.rb +9 -8
  11. data/lib/search_solr_tools/helpers/bounding_box_util.rb +8 -8
  12. data/lib/search_solr_tools/helpers/facet_configuration.rb +3 -1
  13. data/lib/search_solr_tools/helpers/harvest_status.rb +10 -8
  14. data/lib/search_solr_tools/helpers/iso_namespaces.rb +3 -1
  15. data/lib/search_solr_tools/helpers/solr_format.rb +25 -45
  16. data/lib/search_solr_tools/helpers/translate_spatial_coverage.rb +13 -10
  17. data/lib/search_solr_tools/helpers/translate_temporal_coverage.rb +2 -0
  18. data/lib/search_solr_tools/translators/nsidc_json.rb +48 -44
  19. data/lib/search_solr_tools/version.rb +3 -1
  20. data/lib/search_solr_tools.rb +3 -2
  21. metadata +3 -45
  22. data/lib/search_solr_tools/harvesters/adc.rb +0 -49
  23. data/lib/search_solr_tools/harvesters/ade_auto_suggest.rb +0 -46
  24. data/lib/search_solr_tools/harvesters/bcodmo.rb +0 -64
  25. data/lib/search_solr_tools/harvesters/data_one.rb +0 -49
  26. data/lib/search_solr_tools/harvesters/echo.rb +0 -52
  27. data/lib/search_solr_tools/harvesters/eol.rb +0 -51
  28. data/lib/search_solr_tools/harvesters/gtnp.rb +0 -67
  29. data/lib/search_solr_tools/harvesters/ices.rb +0 -58
  30. data/lib/search_solr_tools/harvesters/ncdc_paleo.rb +0 -62
  31. data/lib/search_solr_tools/harvesters/nmi.rb +0 -34
  32. data/lib/search_solr_tools/harvesters/nodc.rb +0 -75
  33. data/lib/search_solr_tools/harvesters/oai.rb +0 -62
  34. data/lib/search_solr_tools/harvesters/pdc.rb +0 -40
  35. data/lib/search_solr_tools/harvesters/r2r.rb +0 -61
  36. data/lib/search_solr_tools/harvesters/rda.rb +0 -35
  37. data/lib/search_solr_tools/harvesters/tdar.rb +0 -71
  38. data/lib/search_solr_tools/harvesters/usgs.rb +0 -76
  39. data/lib/search_solr_tools/helpers/csw_iso_query_builder.rb +0 -29
  40. data/lib/search_solr_tools/helpers/data_one_format.rb +0 -74
  41. data/lib/search_solr_tools/helpers/iso_to_solr.rb +0 -97
  42. data/lib/search_solr_tools/helpers/iso_to_solr_format.rb +0 -197
  43. data/lib/search_solr_tools/helpers/ncdc_paleo_format.rb +0 -61
  44. data/lib/search_solr_tools/helpers/query_builder.rb +0 -13
  45. data/lib/search_solr_tools/helpers/r2r_format.rb +0 -25
  46. data/lib/search_solr_tools/helpers/selectors.rb +0 -22
  47. data/lib/search_solr_tools/helpers/tdar_format.rb +0 -70
  48. data/lib/search_solr_tools/helpers/usgs_format.rb +0 -50
  49. data/lib/search_solr_tools/selectors/adc.rb +0 -96
  50. data/lib/search_solr_tools/selectors/data_one.rb +0 -96
  51. data/lib/search_solr_tools/selectors/echo_iso.rb +0 -112
  52. data/lib/search_solr_tools/selectors/ices_iso.rb +0 -108
  53. data/lib/search_solr_tools/selectors/ncdc_paleo.rb +0 -90
  54. data/lib/search_solr_tools/selectors/nmi.rb +0 -107
  55. data/lib/search_solr_tools/selectors/nodc_iso.rb +0 -108
  56. data/lib/search_solr_tools/selectors/pdc_iso.rb +0 -109
  57. data/lib/search_solr_tools/selectors/r2r.rb +0 -115
  58. data/lib/search_solr_tools/selectors/rda.rb +0 -107
  59. data/lib/search_solr_tools/selectors/tdar_opensearch.rb +0 -91
  60. data/lib/search_solr_tools/selectors/usgs_iso.rb +0 -107
  61. data/lib/search_solr_tools/translators/bcodmo_json.rb +0 -89
  62. data/lib/search_solr_tools/translators/eol_to_solr.rb +0 -84
  63. data/lib/search_solr_tools/translators/gtnp_json.rb +0 -59
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'date'
2
4
  require 'iso8601'
3
5
 
@@ -7,30 +9,14 @@ require_relative 'facet_configuration'
7
9
  module SearchSolrTools
8
10
  module Helpers
9
11
  # Methods for generating formatted values that can be indexed by SOLR
10
- # rubocop:disable Metrics/ModuleLength
11
12
  module SolrFormat
12
13
  DATA_CENTER_NAMES = {
13
- BCODMO: { short_name: 'BCO-DMO', long_name: 'Biological and Chemical Oceanography Data Management Office' },
14
- ADC: { short_name: 'NSF ADC', long_name: 'NSF Arctic Data Center' },
15
- DATA_ONE: { short_name: 'DataONE', long_name: 'DataONE' },
16
- ECHO: { short_name: 'NASA ECHO', long_name: 'NASA Earth Observing System (EOS) Clearing House (ECHO)' },
17
- EOL: { short_name: 'UCAR NCAR EOL', long_name: 'UCAR NCAR - Earth Observing Laboratory' },
18
- GTNP: { short_name: 'GTN-P', long_name: 'Global Terrestrial Network for Permafrost' },
19
- ICES: { short_name: 'ICES', long_name: 'International Council for the Exploration of the Sea' },
20
- NCDC_PALEO: { short_name: 'NOAA WDS Paleo', long_name: 'NOAA’s National Centers for Environmental Information, World Data Service for Paleoclimatology' },
21
- NMI: { short_name: 'Met.no', long_name: 'Norwegian Meteorological Institute' },
22
- NODC: { short_name: 'NOAA NODC', long_name: 'NOAA National Oceanographic Data Center' },
23
- NSIDC: { short_name: 'NSIDC', long_name: 'National Snow and Ice Data Center' },
24
- PDC: { short_name: 'PDC', long_name: 'Polar Data Catalogue' },
25
- R2R: { short_name: 'R2R', long_name: 'Rolling Deck to Repository' },
26
- RDA: { short_name: 'UCAR NCAR RDA', long_name: 'UCAR NCAR Research Data Archive' },
27
- TDAR: { short_name: 'tDAR', long_name: 'tDAR: The Digital Archaeological Record' },
28
- USGS: { short_name: 'USGS ScienceBase', long_name: 'U.S. Geological Survey ScienceBase' }
29
- }
14
+ NSIDC: { short_name: 'NSIDC', long_name: 'National Snow and Ice Data Center' }
15
+ }.freeze
30
16
 
31
17
  NOT_SPECIFIED = 'Not specified'
32
18
 
33
- TEMPORAL_RESOLUTION_FACET_VALUES = %w(Subhourly Hourly Subdaily Daily Weekly Submonthly Monthly Subyearly Yearly Multiyearly)
19
+ TEMPORAL_RESOLUTION_FACET_VALUES = %w[Subhourly Hourly Subdaily Daily Weekly Submonthly Monthly Subyearly Yearly Multiyearly].freeze
34
20
  SUBHOURLY_INDEX = 0
35
21
  HOURLY_INDEX = 1
36
22
  SUBDAILY_INDEX = 2
@@ -42,7 +28,7 @@ module SearchSolrTools
42
28
  YEARLY_INDEX = 8
43
29
  MULTIYEARLY_INDEX = 9
44
30
 
45
- SPATIAL_RESOLUTION_FACET_VALUES = ['0 - 500 m', '501 m - 1 km', '2 - 5 km', '6 - 15 km', '16 - 30 km', '>30 km']
31
+ SPATIAL_RESOLUTION_FACET_VALUES = ['0 - 500 m', '501 m - 1 km', '2 - 5 km', '6 - 15 km', '16 - 30 km', '>30 km'].freeze
46
32
  SPATIAL_0_500_INDEX = 0
47
33
  SPATIAL_501_1_INDEX = 1
48
34
  SPATIAL_2_5_INDEX = 2
@@ -59,7 +45,7 @@ module SearchSolrTools
59
45
  end
60
46
 
61
47
  def self.temporal_display_str(date_range)
62
- temporal_str = "#{date_range[:start]}"
48
+ temporal_str = (date_range[:start]).to_s
63
49
  temporal_str += ",#{date_range[:end]}" unless date_range[:end].nil?
64
50
  temporal_str
65
51
  end
@@ -82,6 +68,7 @@ module SearchSolrTools
82
68
 
83
69
  def self.get_temporal_duration_facet(duration)
84
70
  return NOT_SPECIFIED if duration.nil?
71
+
85
72
  years = duration.to_i / 365
86
73
  temporal_duration_range(years)
87
74
  end
@@ -101,31 +88,28 @@ module SearchSolrTools
101
88
  def self.facet_binning(type, format_string)
102
89
  binned_facet = bin(FacetConfiguration.get_facet_bin(type), format_string)
103
90
  if binned_facet.nil?
104
- return format_string
91
+ format_string
105
92
  elsif binned_facet.eql?('exclude')
106
- return nil
93
+ nil
107
94
  else
108
- return binned_facet
95
+ binned_facet
109
96
  end
110
-
111
- nil
112
97
  end
113
98
 
114
99
  def self.parameter_binning(parameter_string)
115
100
  binned_parameter = bin(FacetConfiguration.get_facet_bin('parameter'), parameter_string)
116
101
  # use variable_level_1 if no mapping exists
117
- if binned_parameter.nil?
118
- parts = parameter_string.split '>'
119
- return parts[3].strip if parts.length >= 4
120
- else
121
- return binned_parameter
122
- end
102
+ return binned_parameter unless binned_parameter.nil?
103
+
104
+ parts = parameter_string.split '>'
105
+ return parts[3].strip if parts.length >= 4
123
106
 
124
107
  nil
125
108
  end
126
109
 
127
110
  def self.resolution_value(resolution, find_index_method, resolution_values)
128
- return NOT_SPECIFIED if self.resolution_not_specified? resolution
111
+ return NOT_SPECIFIED if resolution_not_specified? resolution
112
+
129
113
  if resolution['type'] == 'single'
130
114
  i = send(find_index_method, resolution['resolution'])
131
115
  return resolution_values[i]
@@ -135,12 +119,12 @@ module SearchSolrTools
135
119
  j = send(find_index_method, resolution['max_resolution'])
136
120
  return resolution_values[i..j]
137
121
  end
138
- fail "Invalid resolution #{resolution['type']}"
122
+ raise "Invalid resolution #{resolution['type']}"
139
123
  end
140
124
 
141
125
  def self.resolution_not_specified?(resolution)
142
126
  return true if resolution.to_s.empty?
143
- return true unless %w(single range).include? resolution['type']
127
+ return true unless %w[single range].include? resolution['type']
144
128
  return true if resolution['type'] == 'single' && resolution['resolution'].to_s.empty?
145
129
  return true if resolution['type'] == 'range' && resolution['min_resolution'].to_s.empty?
146
130
  end
@@ -155,6 +139,7 @@ module SearchSolrTools
155
139
  else
156
140
  facet = 'Between 1 and 170 degrees of latitude change | Regional'
157
141
  end
142
+
158
143
  facet
159
144
  end
160
145
 
@@ -167,8 +152,6 @@ module SearchSolrTools
167
152
  "#{d.iso8601[0..-7]}Z" unless d.nil?
168
153
  end
169
154
 
170
- private
171
-
172
155
  MIN_DATE = '00010101'
173
156
  MAX_DATE = Time.now.strftime('%Y%m%d')
174
157
 
@@ -181,7 +164,6 @@ module SearchSolrTools
181
164
  nil
182
165
  end
183
166
 
184
- # rubocop:disable CyclomaticComplexity
185
167
  def self.find_index_for_single_temporal_resolution_value(string_duration)
186
168
  iso8601_duration = ISO8601::Duration.new(string_duration)
187
169
 
@@ -201,10 +183,9 @@ module SearchSolrTools
201
183
  MULTIYEARLY_INDEX
202
184
  end
203
185
  end
204
- # rubocop:enable CyclomaticComplexity
205
186
 
206
187
  def self.find_index_for_single_spatial_resolution_value(string_duration)
207
- value, units = string_duration.split(' ')
188
+ value, units = string_duration.split
208
189
 
209
190
  if units == 'deg'
210
191
  spatial_resolution_index_degrees(value)
@@ -249,11 +230,10 @@ module SearchSolrTools
249
230
  end
250
231
 
251
232
  def self.date?(date)
252
- valid_date = if date.is_a? String
253
- d = DateTime.parse(date.strip) rescue false
254
- DateTime.valid_date?(d.year, d.mon, d.day) unless d.eql?(false)
255
- end
256
- valid_date
233
+ return false unless date.is_a? String
234
+
235
+ d = DateTime.parse(date.strip) rescue false
236
+ DateTime.valid_date?(d.year, d.mon, d.day) unless d.eql?(false)
257
237
  end
258
238
 
259
239
  def self.format_date_for_index(date_str, default)
@@ -1,7 +1,8 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'rgeo/geo_json'
2
4
 
3
5
  require_relative 'bounding_box_util'
4
- require_relative 'iso_to_solr_format'
5
6
 
6
7
  module SearchSolrTools
7
8
  module Helpers
@@ -43,7 +44,7 @@ module SearchSolrTools
43
44
 
44
45
  def self.geojson_to_spatial_area(spatial_coverage_geom)
45
46
  spatial_areas = spatial_coverage_geom.map do |geo_json|
46
- if %w(point).include?(geo_json.geometry_type.to_s.downcase)
47
+ if %w[point].include?(geo_json.geometry_type.to_s.downcase)
47
48
  0.0
48
49
  else
49
50
  bbox = RGeo::Cartesian::BoundingBox.create_from_geometry(geo_json)
@@ -51,11 +52,13 @@ module SearchSolrTools
51
52
  end
52
53
  end
53
54
  return nil if spatial_areas.empty?
54
- spatial_areas.sort.last
55
+
56
+ spatial_areas.max
55
57
  end
56
58
 
57
59
  def self.geojson_to_global_facet(spatial_coverage_geom)
58
60
  return nil if spatial_coverage_geom.nil?
61
+
59
62
  spatial_coverage_geom.each do |geo_json|
60
63
  bbox_hash = BoundingBoxUtil.bounding_box_hash_from_geo_json(geo_json)
61
64
  return 'Show Global Only' if BoundingBoxUtil.box_global?(bbox_hash)
@@ -64,13 +67,13 @@ module SearchSolrTools
64
67
  end
65
68
 
66
69
  def self.geojson_to_spatial_scope_facet(spatial_coverage_geom)
67
- unless spatial_coverage_geom.nil?
68
- spatial_coverage_geom.map do |geo_json|
69
- bbox_hash = BoundingBoxUtil.bounding_box_hash_from_geo_json(geo_json)
70
- scope = SolrFormat.get_spatial_scope_facet_with_bounding_box(bbox_hash)
71
- scope unless scope.nil?
72
- end.uniq
73
- end
70
+ return if spatial_coverage_geom.nil?
71
+
72
+ spatial_coverage_geom.map do |geo_json|
73
+ bbox_hash = BoundingBoxUtil.bounding_box_hash_from_geo_json(geo_json)
74
+ scope = SolrFormat.get_spatial_scope_facet_with_bounding_box(bbox_hash)
75
+ scope unless scope.nil?
76
+ end.uniq
74
77
  end
75
78
  end
76
79
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'rgeo/geo_json'
2
4
 
3
5
  require_relative 'solr_format'
@@ -1,4 +1,5 @@
1
- # rubocop:disable Metrics/ClassLength
1
+ # frozen_string_literal: true
2
+
2
3
  require 'rgeo/geo_json'
3
4
 
4
5
  require 'search_solr_tools'
@@ -10,50 +11,50 @@ module SearchSolrTools
10
11
  module Translators
11
12
  # Translates NSIDC JSON format to Solr JSON add format
12
13
  class NsidcJsonToSolr
13
- PARAMETER_PARTS = %w(category topic term variableLevel1 variableLevel2 variableLevel3 detailedVariable)
14
+ PARAMETER_PARTS = %w[category topic term variableLevel1 variableLevel2 variableLevel3 detailedVariable].freeze
14
15
 
15
16
  # rubocop:disable Metrics/MethodLength
16
17
  # rubocop:disable Metrics/AbcSize
17
18
  def translate(json_doc)
18
- copy_keys = %w(title summary keywords brokered)
19
+ copy_keys = %w[title summary keywords brokered]
19
20
  temporal_coverage_values = Helpers::TranslateTemporalCoverage.translate_coverages json_doc['temporalCoverages']
20
21
  spatial_coverages = convert_spatial_coverages(json_doc['spatialCoverages'])
21
22
 
22
23
  solr_add_hash = json_doc.select { |k, _v| copy_keys.include?(k) }
23
24
  solr_add_hash.merge!(
24
- 'authoritative_id' => json_doc['authoritativeId'],
25
- 'dataset_version' => json_doc['majorVersion']['version'],
26
- 'data_centers' => Helpers::SolrFormat::DATA_CENTER_NAMES[:NSIDC][:long_name],
27
- 'facet_data_center' => "#{Helpers::SolrFormat::DATA_CENTER_NAMES[:NSIDC][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:NSIDC][:short_name]}",
28
- 'authors' => translate_personnel_and_creators_to_authors(json_doc['personnel'], generate_data_citation_creators(json_doc['dataCitation'])),
29
- 'topics' => translate_iso_topic_categories(json_doc['isoTopicCategories']),
30
- 'parameters' => translate_parameters(json_doc['parameters']),
31
- 'full_parameters' => translate_json_string(json_doc['parameters'], PARAMETER_PARTS),
32
- 'facet_parameter' => translate_parameters_to_facet_parameters(json_doc['parameters']),
33
- 'platforms' => translate_json_string(json_doc['platforms']),
34
- 'sensors' => translate_json_string(json_doc['instruments']),
35
- 'facet_sensor' => translate_sensor_to_facet_sensor(json_doc['instruments']),
36
- 'published_date' => (Helpers::SolrFormat.date_str json_doc['releaseDate']),
37
- 'spatial_coverages' => Helpers::TranslateSpatialCoverage.geojson_to_spatial_display_str(spatial_coverages),
38
- 'spatial' => Helpers::TranslateSpatialCoverage.geojson_to_spatial_index_str(spatial_coverages),
39
- 'spatial_area' => Helpers::TranslateSpatialCoverage.geojson_to_spatial_area(spatial_coverages),
40
- 'facet_spatial_coverage' => Helpers::TranslateSpatialCoverage.geojson_to_global_facet(spatial_coverages),
41
- 'facet_spatial_scope' => Helpers::TranslateSpatialCoverage.geojson_to_spatial_scope_facet(spatial_coverages),
42
- 'temporal_coverages' => temporal_coverage_values['temporal_coverages'],
43
- 'temporal_duration' => temporal_coverage_values['temporal_duration'],
44
- 'temporal' => temporal_coverage_values['temporal'],
45
- 'facet_temporal_duration' => temporal_coverage_values['facet_temporal_duration'],
46
- 'last_revision_date' => (Helpers::SolrFormat.date_str json_doc['lastRevisionDate']),
47
- 'dataset_url' => json_doc['datasetUrl'],
48
- 'distribution_formats' => json_doc['distributionFormats'],
49
- 'facet_format' => json_doc['distributionFormats'].empty? ? [Helpers::SolrFormat::NOT_SPECIFIED] : translate_format_to_facet_format(json_doc['distributionFormats']),
50
- 'source' => %w(NSIDC ADE),
51
- 'popularity' => json_doc['popularity'],
52
- 'data_access_urls' => translate_data_access_urls(json_doc['dataAccessLinks']),
53
- 'facet_sponsored_program' => translate_short_long_names_to_facet_value(json_doc['internalDataCenters']),
25
+ 'authoritative_id' => json_doc['authoritativeId'],
26
+ 'dataset_version' => json_doc['majorVersion']['version'],
27
+ 'data_centers' => Helpers::SolrFormat::DATA_CENTER_NAMES[:NSIDC][:long_name],
28
+ 'facet_data_center' => "#{Helpers::SolrFormat::DATA_CENTER_NAMES[:NSIDC][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:NSIDC][:short_name]}",
29
+ 'authors' => translate_personnel_and_creators_to_authors(json_doc['personnel'], generate_data_citation_creators(json_doc['dataCitation'])),
30
+ 'topics' => translate_iso_topic_categories(json_doc['isoTopicCategories']),
31
+ 'parameters' => translate_parameters(json_doc['parameters']),
32
+ 'full_parameters' => translate_json_string(json_doc['parameters'], PARAMETER_PARTS),
33
+ 'facet_parameter' => translate_parameters_to_facet_parameters(json_doc['parameters']),
34
+ 'platforms' => translate_json_string(json_doc['platforms']),
35
+ 'sensors' => translate_json_string(json_doc['instruments']),
36
+ 'facet_sensor' => translate_sensor_to_facet_sensor(json_doc['instruments']),
37
+ 'published_date' => (Helpers::SolrFormat.date_str json_doc['releaseDate']),
38
+ 'spatial_coverages' => Helpers::TranslateSpatialCoverage.geojson_to_spatial_display_str(spatial_coverages),
39
+ 'spatial' => Helpers::TranslateSpatialCoverage.geojson_to_spatial_index_str(spatial_coverages),
40
+ 'spatial_area' => Helpers::TranslateSpatialCoverage.geojson_to_spatial_area(spatial_coverages),
41
+ 'facet_spatial_coverage' => Helpers::TranslateSpatialCoverage.geojson_to_global_facet(spatial_coverages),
42
+ 'facet_spatial_scope' => Helpers::TranslateSpatialCoverage.geojson_to_spatial_scope_facet(spatial_coverages),
43
+ 'temporal_coverages' => temporal_coverage_values['temporal_coverages'],
44
+ 'temporal_duration' => temporal_coverage_values['temporal_duration'],
45
+ 'temporal' => temporal_coverage_values['temporal'],
46
+ 'facet_temporal_duration' => temporal_coverage_values['facet_temporal_duration'],
47
+ 'last_revision_date' => (Helpers::SolrFormat.date_str json_doc['lastRevisionDate']),
48
+ 'dataset_url' => json_doc['datasetUrl'],
49
+ 'distribution_formats' => json_doc['distributionFormats'],
50
+ 'facet_format' => json_doc['distributionFormats'].empty? ? [Helpers::SolrFormat::NOT_SPECIFIED] : translate_format_to_facet_format(json_doc['distributionFormats']),
51
+ 'source' => %w[NSIDC ADE],
52
+ 'popularity' => json_doc['popularity'],
53
+ 'data_access_urls' => translate_data_access_urls(json_doc['dataAccessLinks']),
54
+ 'facet_sponsored_program' => translate_short_long_names_to_facet_value(json_doc['internalDataCenters']),
54
55
  'facet_temporal_resolution' => translate_temporal_resolution_facet_values(json_doc['parameters']),
55
- 'facet_spatial_resolution' => translate_spatial_resolution_facet_values(json_doc['parameters']),
56
- 'sponsored_programs' => translate_internal_datacenters(json_doc['internalDataCenters'])
56
+ 'facet_spatial_resolution' => translate_spatial_resolution_facet_values(json_doc['parameters']),
57
+ 'sponsored_programs' => translate_internal_datacenters(json_doc['internalDataCenters'])
57
58
  )
58
59
  end
59
60
  # rubocop:enable Metrics/MethodLength
@@ -70,13 +71,14 @@ module SearchSolrTools
70
71
  def translate_sensor_to_facet_sensor(json)
71
72
  facet_values = []
72
73
  return facet_values if json.nil?
74
+
73
75
  json.each do |json_entry|
74
76
  sensor_bin = Helpers::SolrFormat.facet_binning('sensor', json_entry['shortName'].to_s)
75
- if sensor_bin.eql? json_entry['shortName']
76
- facet_values << "#{json_entry['longName']} | #{json_entry['shortName']}"
77
- else
78
- facet_values << " | #{sensor_bin}"
79
- end
77
+ facet_values << if sensor_bin.eql? json_entry['shortName']
78
+ "#{json_entry['longName']} | #{json_entry['shortName']}"
79
+ else
80
+ " | #{sensor_bin}"
81
+ end
80
82
  end
81
83
  facet_values
82
84
  end
@@ -100,12 +102,13 @@ module SearchSolrTools
100
102
  end
101
103
 
102
104
  def translate_iso_topic_categories(iso_topic_categories_json)
103
- iso_topic_categories_json.map { |t| t['name'] } unless iso_topic_categories_json.nil?
105
+ iso_topic_categories_json&.map { |t| t['name'] }
104
106
  end
105
107
 
106
108
  def translate_data_access_urls(json)
107
109
  values = []
108
110
  return values if json.nil?
111
+
109
112
  json.each do |json_entry|
110
113
  link_display = json_entry['displayText'].nil? ? '' : json_entry['displayText']
111
114
  link_type = json_entry['type'].nil? ? '' : json_entry['type']
@@ -120,6 +123,7 @@ module SearchSolrTools
120
123
  def translate_internal_datacenters(json)
121
124
  values = []
122
125
  return values if json.nil?
126
+
123
127
  json.each do |json_entry|
124
128
  short_name = json_entry['shortName'].nil? ? '' : json_entry['shortName']
125
129
  values << short_name
@@ -130,6 +134,7 @@ module SearchSolrTools
130
134
  def translate_short_long_names_to_facet_value(json)
131
135
  facet_values = []
132
136
  return facet_values if json.nil?
137
+
133
138
  json.each do |json_entry|
134
139
  long_name = json_entry['longName'].nil? ? '' : json_entry['longName']
135
140
  short_name = json_entry['shortName'].nil? ? '' : json_entry['shortName']
@@ -169,6 +174,7 @@ module SearchSolrTools
169
174
  def translate_parameters_to_facet_parameters(parameters_json)
170
175
  parameters_strings = translate_json_string(parameters_json, PARAMETER_PARTS)
171
176
  return [] if parameters_strings.nil?
177
+
172
178
  facet_params = []
173
179
  parameters_strings.each do |str|
174
180
  facet_params << Helpers::SolrFormat.parameter_binning(str)
@@ -199,8 +205,7 @@ module SearchSolrTools
199
205
  end
200
206
 
201
207
  def generate_data_citation_creators(data_citation)
202
- data_citation.nil? ? creators = [] : creators = data_citation['creators']
203
- creators
208
+ data_citation.nil? ? [] : data_citation['creators']
204
209
  end
205
210
 
206
211
  def generate_part_array(json, limit_values = nil)
@@ -214,6 +219,5 @@ module SearchSolrTools
214
219
  parts
215
220
  end
216
221
  end
217
- # rubocop:enable Metrics/ClassLength
218
222
  end
219
223
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module SearchSolrTools
2
- VERSION = '6.1.0'
4
+ VERSION = '6.3.0'
3
5
  end
@@ -1,10 +1,11 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'search_solr_tools/config/environments'
2
4
  require_relative 'search_solr_tools/version'
3
5
 
4
- require_relative 'search_solr_tools/helpers/selectors'
5
6
  require_relative 'search_solr_tools/helpers/harvest_status'
6
7
  require_relative 'search_solr_tools/errors/harvest_error'
7
8
 
8
- %w( selectors harvesters translators ).each do |subdir|
9
+ %w[harvesters translators].each do |subdir|
9
10
  Dir[File.join(__dir__, 'search_solr_tools', subdir, '*.rb')].each { |file| require file }
10
11
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: search_solr_tools
3
3
  version: !ruby/object:Gem::Version
4
- version: 6.1.0
4
+ version: 6.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chris Chalstrom
@@ -14,7 +14,7 @@ authors:
14
14
  autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
- date: 2023-07-17 00:00:00.000000000 Z
17
+ date: 2023-07-24 00:00:00.000000000 Z
18
18
  dependencies:
19
19
  - !ruby/object:Gem::Dependency
20
20
  name: ffi-geos
@@ -321,59 +321,17 @@ files:
321
321
  - lib/search_solr_tools/config/environments.rb
322
322
  - lib/search_solr_tools/config/environments.yaml
323
323
  - lib/search_solr_tools/errors/harvest_error.rb
324
- - lib/search_solr_tools/harvesters/adc.rb
325
- - lib/search_solr_tools/harvesters/ade_auto_suggest.rb
326
324
  - lib/search_solr_tools/harvesters/auto_suggest.rb
327
325
  - lib/search_solr_tools/harvesters/base.rb
328
- - lib/search_solr_tools/harvesters/bcodmo.rb
329
- - lib/search_solr_tools/harvesters/data_one.rb
330
- - lib/search_solr_tools/harvesters/echo.rb
331
- - lib/search_solr_tools/harvesters/eol.rb
332
- - lib/search_solr_tools/harvesters/gtnp.rb
333
- - lib/search_solr_tools/harvesters/ices.rb
334
- - lib/search_solr_tools/harvesters/ncdc_paleo.rb
335
- - lib/search_solr_tools/harvesters/nmi.rb
336
- - lib/search_solr_tools/harvesters/nodc.rb
337
326
  - lib/search_solr_tools/harvesters/nsidc_auto_suggest.rb
338
327
  - lib/search_solr_tools/harvesters/nsidc_json.rb
339
- - lib/search_solr_tools/harvesters/oai.rb
340
- - lib/search_solr_tools/harvesters/pdc.rb
341
- - lib/search_solr_tools/harvesters/r2r.rb
342
- - lib/search_solr_tools/harvesters/rda.rb
343
- - lib/search_solr_tools/harvesters/tdar.rb
344
- - lib/search_solr_tools/harvesters/usgs.rb
345
328
  - lib/search_solr_tools/helpers/bounding_box_util.rb
346
- - lib/search_solr_tools/helpers/csw_iso_query_builder.rb
347
- - lib/search_solr_tools/helpers/data_one_format.rb
348
329
  - lib/search_solr_tools/helpers/facet_configuration.rb
349
330
  - lib/search_solr_tools/helpers/harvest_status.rb
350
331
  - lib/search_solr_tools/helpers/iso_namespaces.rb
351
- - lib/search_solr_tools/helpers/iso_to_solr.rb
352
- - lib/search_solr_tools/helpers/iso_to_solr_format.rb
353
- - lib/search_solr_tools/helpers/ncdc_paleo_format.rb
354
- - lib/search_solr_tools/helpers/query_builder.rb
355
- - lib/search_solr_tools/helpers/r2r_format.rb
356
- - lib/search_solr_tools/helpers/selectors.rb
357
332
  - lib/search_solr_tools/helpers/solr_format.rb
358
- - lib/search_solr_tools/helpers/tdar_format.rb
359
333
  - lib/search_solr_tools/helpers/translate_spatial_coverage.rb
360
334
  - lib/search_solr_tools/helpers/translate_temporal_coverage.rb
361
- - lib/search_solr_tools/helpers/usgs_format.rb
362
- - lib/search_solr_tools/selectors/adc.rb
363
- - lib/search_solr_tools/selectors/data_one.rb
364
- - lib/search_solr_tools/selectors/echo_iso.rb
365
- - lib/search_solr_tools/selectors/ices_iso.rb
366
- - lib/search_solr_tools/selectors/ncdc_paleo.rb
367
- - lib/search_solr_tools/selectors/nmi.rb
368
- - lib/search_solr_tools/selectors/nodc_iso.rb
369
- - lib/search_solr_tools/selectors/pdc_iso.rb
370
- - lib/search_solr_tools/selectors/r2r.rb
371
- - lib/search_solr_tools/selectors/rda.rb
372
- - lib/search_solr_tools/selectors/tdar_opensearch.rb
373
- - lib/search_solr_tools/selectors/usgs_iso.rb
374
- - lib/search_solr_tools/translators/bcodmo_json.rb
375
- - lib/search_solr_tools/translators/eol_to_solr.rb
376
- - lib/search_solr_tools/translators/gtnp_json.rb
377
335
  - lib/search_solr_tools/translators/nsidc_json.rb
378
336
  - lib/search_solr_tools/version.rb
379
337
  - search_solr_tools.gemspec
@@ -396,7 +354,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
396
354
  - !ruby/object:Gem::Version
397
355
  version: '0'
398
356
  requirements: []
399
- rubygems_version: 3.4.10
357
+ rubygems_version: 3.4.17
400
358
  signing_key:
401
359
  specification_version: 4
402
360
  summary: Tools to harvest and manage various scientific dataset feeds in a Solr instance.
@@ -1,49 +0,0 @@
1
- require_relative 'base'
2
-
3
- module SearchSolrTools
4
- module Harvesters
5
- class Adc < Base
6
- def initialize(env = 'development', die_on_failure = false)
7
- super
8
- @page_size = 250
9
- @translator = Helpers::IsoToSolr.new :adc
10
- end
11
-
12
- def harvest_and_delete
13
- puts "Running harvest of adc catalog from #{metadata_url}"
14
- super(method(:harvest_adc_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:ADC][:long_name]}\"")
15
- end
16
-
17
- def harvest_adc_into_solr
18
- start = 0
19
- while (entries = get_results_from_adc(start)) && (entries.length > 0)
20
- begin
21
- insert_solr_docs(get_docs_with_translated_entries_from_adc(entries))
22
- rescue => e
23
- puts "ERROR: #{e}\n\n"
24
- raise e if @die_on_failure
25
- end
26
- start += @page_size
27
- end
28
- end
29
-
30
- def get_results_from_adc(start)
31
- get_results(build_request(start, @page_size), './response/result/doc')
32
- end
33
-
34
- def metadata_url
35
- SolrEnvironments[@environment][:adc_url]
36
- end
37
-
38
- def get_docs_with_translated_entries_from_adc(entries)
39
- entries.map do |e|
40
- create_new_solr_add_doc_with_child(@translator.translate(e).root)
41
- end
42
- end
43
-
44
- def build_request(start = 0, max_records = 100)
45
- "#{metadata_url}&start=#{start}&rows=#{max_records}"
46
- end
47
- end
48
- end
49
- end
@@ -1,46 +0,0 @@
1
- require_relative 'auto_suggest'
2
-
3
- module SearchSolrTools
4
- module Harvesters
5
- class AdeAutoSuggest < AutoSuggest
6
- def harvest_and_delete
7
- puts 'Building auto-suggest indexes for ADE'
8
- super(method(:harvest), 'source:"ADE"', @env_settings[:auto_suggest_collection_name])
9
- end
10
-
11
- def harvest
12
- url = "#{solr_url}/#{@env_settings[:collection_name]}/select?q=*%3A*&fq=source%3AADE&fq=spatial:[45.0,-180.0+TO+90.0,180.0]&rows=0&wt=json&indent=true&facet=true&facet.mincount=1&facet.sort=count&facet.limit=-1"
13
- super url, fields
14
- end
15
-
16
- def fields
17
- {
18
- 'full_keywords_and_parameters' => { weight: 2, source: 'ADE', creator: method(:keyword_creator) },
19
- 'full_authors' => { weight: 1, source: 'ADE', creator: method(:author_creator) }
20
- }
21
- end
22
-
23
- def split_creator(value, count, field_weight, source, split_regex)
24
- add_docs = []
25
- value.downcase.split(split_regex).each do |v|
26
- v = v.strip.chomp('/')
27
- add_docs.concat(ade_length_limit_creator(v, count, field_weight, source)) unless v.nil? || v.empty?
28
- end
29
- add_docs
30
- end
31
-
32
- def keyword_creator(value, count, field_weight, source)
33
- split_creator value, count, field_weight, source, %r{/ [\/ \>]+ /}
34
- end
35
-
36
- def author_creator(value, count, field_weight, source)
37
- split_creator value, count, field_weight, source, %r{/;/}
38
- end
39
-
40
- def ade_length_limit_creator(value, count, field_weight, source)
41
- return [] if value.length > 80
42
- standard_add_creator value, count, field_weight, source
43
- end
44
- end
45
- end
46
- end