search_solr_tools 6.1.0 → 6.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +11 -2
  3. data/bin/search_solr_tools +5 -17
  4. data/lib/search_solr_tools/config/environments.rb +3 -1
  5. data/lib/search_solr_tools/config/environments.yaml +0 -32
  6. data/lib/search_solr_tools/errors/harvest_error.rb +44 -31
  7. data/lib/search_solr_tools/harvesters/auto_suggest.rb +5 -3
  8. data/lib/search_solr_tools/harvesters/base.rb +21 -20
  9. data/lib/search_solr_tools/harvesters/nsidc_auto_suggest.rb +7 -5
  10. data/lib/search_solr_tools/harvesters/nsidc_json.rb +9 -8
  11. data/lib/search_solr_tools/helpers/bounding_box_util.rb +8 -8
  12. data/lib/search_solr_tools/helpers/facet_configuration.rb +3 -1
  13. data/lib/search_solr_tools/helpers/harvest_status.rb +10 -8
  14. data/lib/search_solr_tools/helpers/iso_namespaces.rb +3 -1
  15. data/lib/search_solr_tools/helpers/solr_format.rb +25 -45
  16. data/lib/search_solr_tools/helpers/translate_spatial_coverage.rb +13 -10
  17. data/lib/search_solr_tools/helpers/translate_temporal_coverage.rb +2 -0
  18. data/lib/search_solr_tools/translators/nsidc_json.rb +48 -44
  19. data/lib/search_solr_tools/version.rb +3 -1
  20. data/lib/search_solr_tools.rb +3 -2
  21. metadata +3 -45
  22. data/lib/search_solr_tools/harvesters/adc.rb +0 -49
  23. data/lib/search_solr_tools/harvesters/ade_auto_suggest.rb +0 -46
  24. data/lib/search_solr_tools/harvesters/bcodmo.rb +0 -64
  25. data/lib/search_solr_tools/harvesters/data_one.rb +0 -49
  26. data/lib/search_solr_tools/harvesters/echo.rb +0 -52
  27. data/lib/search_solr_tools/harvesters/eol.rb +0 -51
  28. data/lib/search_solr_tools/harvesters/gtnp.rb +0 -67
  29. data/lib/search_solr_tools/harvesters/ices.rb +0 -58
  30. data/lib/search_solr_tools/harvesters/ncdc_paleo.rb +0 -62
  31. data/lib/search_solr_tools/harvesters/nmi.rb +0 -34
  32. data/lib/search_solr_tools/harvesters/nodc.rb +0 -75
  33. data/lib/search_solr_tools/harvesters/oai.rb +0 -62
  34. data/lib/search_solr_tools/harvesters/pdc.rb +0 -40
  35. data/lib/search_solr_tools/harvesters/r2r.rb +0 -61
  36. data/lib/search_solr_tools/harvesters/rda.rb +0 -35
  37. data/lib/search_solr_tools/harvesters/tdar.rb +0 -71
  38. data/lib/search_solr_tools/harvesters/usgs.rb +0 -76
  39. data/lib/search_solr_tools/helpers/csw_iso_query_builder.rb +0 -29
  40. data/lib/search_solr_tools/helpers/data_one_format.rb +0 -74
  41. data/lib/search_solr_tools/helpers/iso_to_solr.rb +0 -97
  42. data/lib/search_solr_tools/helpers/iso_to_solr_format.rb +0 -197
  43. data/lib/search_solr_tools/helpers/ncdc_paleo_format.rb +0 -61
  44. data/lib/search_solr_tools/helpers/query_builder.rb +0 -13
  45. data/lib/search_solr_tools/helpers/r2r_format.rb +0 -25
  46. data/lib/search_solr_tools/helpers/selectors.rb +0 -22
  47. data/lib/search_solr_tools/helpers/tdar_format.rb +0 -70
  48. data/lib/search_solr_tools/helpers/usgs_format.rb +0 -50
  49. data/lib/search_solr_tools/selectors/adc.rb +0 -96
  50. data/lib/search_solr_tools/selectors/data_one.rb +0 -96
  51. data/lib/search_solr_tools/selectors/echo_iso.rb +0 -112
  52. data/lib/search_solr_tools/selectors/ices_iso.rb +0 -108
  53. data/lib/search_solr_tools/selectors/ncdc_paleo.rb +0 -90
  54. data/lib/search_solr_tools/selectors/nmi.rb +0 -107
  55. data/lib/search_solr_tools/selectors/nodc_iso.rb +0 -108
  56. data/lib/search_solr_tools/selectors/pdc_iso.rb +0 -109
  57. data/lib/search_solr_tools/selectors/r2r.rb +0 -115
  58. data/lib/search_solr_tools/selectors/rda.rb +0 -107
  59. data/lib/search_solr_tools/selectors/tdar_opensearch.rb +0 -91
  60. data/lib/search_solr_tools/selectors/usgs_iso.rb +0 -107
  61. data/lib/search_solr_tools/translators/bcodmo_json.rb +0 -89
  62. data/lib/search_solr_tools/translators/eol_to_solr.rb +0 -84
  63. data/lib/search_solr_tools/translators/gtnp_json.rb +0 -59
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'date'
2
4
  require 'iso8601'
3
5
 
@@ -7,30 +9,14 @@ require_relative 'facet_configuration'
7
9
  module SearchSolrTools
8
10
  module Helpers
9
11
  # Methods for generating formatted values that can be indexed by SOLR
10
- # rubocop:disable Metrics/ModuleLength
11
12
  module SolrFormat
12
13
  DATA_CENTER_NAMES = {
13
- BCODMO: { short_name: 'BCO-DMO', long_name: 'Biological and Chemical Oceanography Data Management Office' },
14
- ADC: { short_name: 'NSF ADC', long_name: 'NSF Arctic Data Center' },
15
- DATA_ONE: { short_name: 'DataONE', long_name: 'DataONE' },
16
- ECHO: { short_name: 'NASA ECHO', long_name: 'NASA Earth Observing System (EOS) Clearing House (ECHO)' },
17
- EOL: { short_name: 'UCAR NCAR EOL', long_name: 'UCAR NCAR - Earth Observing Laboratory' },
18
- GTNP: { short_name: 'GTN-P', long_name: 'Global Terrestrial Network for Permafrost' },
19
- ICES: { short_name: 'ICES', long_name: 'International Council for the Exploration of the Sea' },
20
- NCDC_PALEO: { short_name: 'NOAA WDS Paleo', long_name: 'NOAA’s National Centers for Environmental Information, World Data Service for Paleoclimatology' },
21
- NMI: { short_name: 'Met.no', long_name: 'Norwegian Meteorological Institute' },
22
- NODC: { short_name: 'NOAA NODC', long_name: 'NOAA National Oceanographic Data Center' },
23
- NSIDC: { short_name: 'NSIDC', long_name: 'National Snow and Ice Data Center' },
24
- PDC: { short_name: 'PDC', long_name: 'Polar Data Catalogue' },
25
- R2R: { short_name: 'R2R', long_name: 'Rolling Deck to Repository' },
26
- RDA: { short_name: 'UCAR NCAR RDA', long_name: 'UCAR NCAR Research Data Archive' },
27
- TDAR: { short_name: 'tDAR', long_name: 'tDAR: The Digital Archaeological Record' },
28
- USGS: { short_name: 'USGS ScienceBase', long_name: 'U.S. Geological Survey ScienceBase' }
29
- }
14
+ NSIDC: { short_name: 'NSIDC', long_name: 'National Snow and Ice Data Center' }
15
+ }.freeze
30
16
 
31
17
  NOT_SPECIFIED = 'Not specified'
32
18
 
33
- TEMPORAL_RESOLUTION_FACET_VALUES = %w(Subhourly Hourly Subdaily Daily Weekly Submonthly Monthly Subyearly Yearly Multiyearly)
19
+ TEMPORAL_RESOLUTION_FACET_VALUES = %w[Subhourly Hourly Subdaily Daily Weekly Submonthly Monthly Subyearly Yearly Multiyearly].freeze
34
20
  SUBHOURLY_INDEX = 0
35
21
  HOURLY_INDEX = 1
36
22
  SUBDAILY_INDEX = 2
@@ -42,7 +28,7 @@ module SearchSolrTools
42
28
  YEARLY_INDEX = 8
43
29
  MULTIYEARLY_INDEX = 9
44
30
 
45
- SPATIAL_RESOLUTION_FACET_VALUES = ['0 - 500 m', '501 m - 1 km', '2 - 5 km', '6 - 15 km', '16 - 30 km', '>30 km']
31
+ SPATIAL_RESOLUTION_FACET_VALUES = ['0 - 500 m', '501 m - 1 km', '2 - 5 km', '6 - 15 km', '16 - 30 km', '>30 km'].freeze
46
32
  SPATIAL_0_500_INDEX = 0
47
33
  SPATIAL_501_1_INDEX = 1
48
34
  SPATIAL_2_5_INDEX = 2
@@ -59,7 +45,7 @@ module SearchSolrTools
59
45
  end
60
46
 
61
47
  def self.temporal_display_str(date_range)
62
- temporal_str = "#{date_range[:start]}"
48
+ temporal_str = (date_range[:start]).to_s
63
49
  temporal_str += ",#{date_range[:end]}" unless date_range[:end].nil?
64
50
  temporal_str
65
51
  end
@@ -82,6 +68,7 @@ module SearchSolrTools
82
68
 
83
69
  def self.get_temporal_duration_facet(duration)
84
70
  return NOT_SPECIFIED if duration.nil?
71
+
85
72
  years = duration.to_i / 365
86
73
  temporal_duration_range(years)
87
74
  end
@@ -101,31 +88,28 @@ module SearchSolrTools
101
88
  def self.facet_binning(type, format_string)
102
89
  binned_facet = bin(FacetConfiguration.get_facet_bin(type), format_string)
103
90
  if binned_facet.nil?
104
- return format_string
91
+ format_string
105
92
  elsif binned_facet.eql?('exclude')
106
- return nil
93
+ nil
107
94
  else
108
- return binned_facet
95
+ binned_facet
109
96
  end
110
-
111
- nil
112
97
  end
113
98
 
114
99
  def self.parameter_binning(parameter_string)
115
100
  binned_parameter = bin(FacetConfiguration.get_facet_bin('parameter'), parameter_string)
116
101
  # use variable_level_1 if no mapping exists
117
- if binned_parameter.nil?
118
- parts = parameter_string.split '>'
119
- return parts[3].strip if parts.length >= 4
120
- else
121
- return binned_parameter
122
- end
102
+ return binned_parameter unless binned_parameter.nil?
103
+
104
+ parts = parameter_string.split '>'
105
+ return parts[3].strip if parts.length >= 4
123
106
 
124
107
  nil
125
108
  end
126
109
 
127
110
  def self.resolution_value(resolution, find_index_method, resolution_values)
128
- return NOT_SPECIFIED if self.resolution_not_specified? resolution
111
+ return NOT_SPECIFIED if resolution_not_specified? resolution
112
+
129
113
  if resolution['type'] == 'single'
130
114
  i = send(find_index_method, resolution['resolution'])
131
115
  return resolution_values[i]
@@ -135,12 +119,12 @@ module SearchSolrTools
135
119
  j = send(find_index_method, resolution['max_resolution'])
136
120
  return resolution_values[i..j]
137
121
  end
138
- fail "Invalid resolution #{resolution['type']}"
122
+ raise "Invalid resolution #{resolution['type']}"
139
123
  end
140
124
 
141
125
  def self.resolution_not_specified?(resolution)
142
126
  return true if resolution.to_s.empty?
143
- return true unless %w(single range).include? resolution['type']
127
+ return true unless %w[single range].include? resolution['type']
144
128
  return true if resolution['type'] == 'single' && resolution['resolution'].to_s.empty?
145
129
  return true if resolution['type'] == 'range' && resolution['min_resolution'].to_s.empty?
146
130
  end
@@ -155,6 +139,7 @@ module SearchSolrTools
155
139
  else
156
140
  facet = 'Between 1 and 170 degrees of latitude change | Regional'
157
141
  end
142
+
158
143
  facet
159
144
  end
160
145
 
@@ -167,8 +152,6 @@ module SearchSolrTools
167
152
  "#{d.iso8601[0..-7]}Z" unless d.nil?
168
153
  end
169
154
 
170
- private
171
-
172
155
  MIN_DATE = '00010101'
173
156
  MAX_DATE = Time.now.strftime('%Y%m%d')
174
157
 
@@ -181,7 +164,6 @@ module SearchSolrTools
181
164
  nil
182
165
  end
183
166
 
184
- # rubocop:disable CyclomaticComplexity
185
167
  def self.find_index_for_single_temporal_resolution_value(string_duration)
186
168
  iso8601_duration = ISO8601::Duration.new(string_duration)
187
169
 
@@ -201,10 +183,9 @@ module SearchSolrTools
201
183
  MULTIYEARLY_INDEX
202
184
  end
203
185
  end
204
- # rubocop:enable CyclomaticComplexity
205
186
 
206
187
  def self.find_index_for_single_spatial_resolution_value(string_duration)
207
- value, units = string_duration.split(' ')
188
+ value, units = string_duration.split
208
189
 
209
190
  if units == 'deg'
210
191
  spatial_resolution_index_degrees(value)
@@ -249,11 +230,10 @@ module SearchSolrTools
249
230
  end
250
231
 
251
232
  def self.date?(date)
252
- valid_date = if date.is_a? String
253
- d = DateTime.parse(date.strip) rescue false
254
- DateTime.valid_date?(d.year, d.mon, d.day) unless d.eql?(false)
255
- end
256
- valid_date
233
+ return false unless date.is_a? String
234
+
235
+ d = DateTime.parse(date.strip) rescue false
236
+ DateTime.valid_date?(d.year, d.mon, d.day) unless d.eql?(false)
257
237
  end
258
238
 
259
239
  def self.format_date_for_index(date_str, default)
@@ -1,7 +1,8 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'rgeo/geo_json'
2
4
 
3
5
  require_relative 'bounding_box_util'
4
- require_relative 'iso_to_solr_format'
5
6
 
6
7
  module SearchSolrTools
7
8
  module Helpers
@@ -43,7 +44,7 @@ module SearchSolrTools
43
44
 
44
45
  def self.geojson_to_spatial_area(spatial_coverage_geom)
45
46
  spatial_areas = spatial_coverage_geom.map do |geo_json|
46
- if %w(point).include?(geo_json.geometry_type.to_s.downcase)
47
+ if %w[point].include?(geo_json.geometry_type.to_s.downcase)
47
48
  0.0
48
49
  else
49
50
  bbox = RGeo::Cartesian::BoundingBox.create_from_geometry(geo_json)
@@ -51,11 +52,13 @@ module SearchSolrTools
51
52
  end
52
53
  end
53
54
  return nil if spatial_areas.empty?
54
- spatial_areas.sort.last
55
+
56
+ spatial_areas.max
55
57
  end
56
58
 
57
59
  def self.geojson_to_global_facet(spatial_coverage_geom)
58
60
  return nil if spatial_coverage_geom.nil?
61
+
59
62
  spatial_coverage_geom.each do |geo_json|
60
63
  bbox_hash = BoundingBoxUtil.bounding_box_hash_from_geo_json(geo_json)
61
64
  return 'Show Global Only' if BoundingBoxUtil.box_global?(bbox_hash)
@@ -64,13 +67,13 @@ module SearchSolrTools
64
67
  end
65
68
 
66
69
  def self.geojson_to_spatial_scope_facet(spatial_coverage_geom)
67
- unless spatial_coverage_geom.nil?
68
- spatial_coverage_geom.map do |geo_json|
69
- bbox_hash = BoundingBoxUtil.bounding_box_hash_from_geo_json(geo_json)
70
- scope = SolrFormat.get_spatial_scope_facet_with_bounding_box(bbox_hash)
71
- scope unless scope.nil?
72
- end.uniq
73
- end
70
+ return if spatial_coverage_geom.nil?
71
+
72
+ spatial_coverage_geom.map do |geo_json|
73
+ bbox_hash = BoundingBoxUtil.bounding_box_hash_from_geo_json(geo_json)
74
+ scope = SolrFormat.get_spatial_scope_facet_with_bounding_box(bbox_hash)
75
+ scope unless scope.nil?
76
+ end.uniq
74
77
  end
75
78
  end
76
79
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'rgeo/geo_json'
2
4
 
3
5
  require_relative 'solr_format'
@@ -1,4 +1,5 @@
1
- # rubocop:disable Metrics/ClassLength
1
+ # frozen_string_literal: true
2
+
2
3
  require 'rgeo/geo_json'
3
4
 
4
5
  require 'search_solr_tools'
@@ -10,50 +11,50 @@ module SearchSolrTools
10
11
  module Translators
11
12
  # Translates NSIDC JSON format to Solr JSON add format
12
13
  class NsidcJsonToSolr
13
- PARAMETER_PARTS = %w(category topic term variableLevel1 variableLevel2 variableLevel3 detailedVariable)
14
+ PARAMETER_PARTS = %w[category topic term variableLevel1 variableLevel2 variableLevel3 detailedVariable].freeze
14
15
 
15
16
  # rubocop:disable Metrics/MethodLength
16
17
  # rubocop:disable Metrics/AbcSize
17
18
  def translate(json_doc)
18
- copy_keys = %w(title summary keywords brokered)
19
+ copy_keys = %w[title summary keywords brokered]
19
20
  temporal_coverage_values = Helpers::TranslateTemporalCoverage.translate_coverages json_doc['temporalCoverages']
20
21
  spatial_coverages = convert_spatial_coverages(json_doc['spatialCoverages'])
21
22
 
22
23
  solr_add_hash = json_doc.select { |k, _v| copy_keys.include?(k) }
23
24
  solr_add_hash.merge!(
24
- 'authoritative_id' => json_doc['authoritativeId'],
25
- 'dataset_version' => json_doc['majorVersion']['version'],
26
- 'data_centers' => Helpers::SolrFormat::DATA_CENTER_NAMES[:NSIDC][:long_name],
27
- 'facet_data_center' => "#{Helpers::SolrFormat::DATA_CENTER_NAMES[:NSIDC][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:NSIDC][:short_name]}",
28
- 'authors' => translate_personnel_and_creators_to_authors(json_doc['personnel'], generate_data_citation_creators(json_doc['dataCitation'])),
29
- 'topics' => translate_iso_topic_categories(json_doc['isoTopicCategories']),
30
- 'parameters' => translate_parameters(json_doc['parameters']),
31
- 'full_parameters' => translate_json_string(json_doc['parameters'], PARAMETER_PARTS),
32
- 'facet_parameter' => translate_parameters_to_facet_parameters(json_doc['parameters']),
33
- 'platforms' => translate_json_string(json_doc['platforms']),
34
- 'sensors' => translate_json_string(json_doc['instruments']),
35
- 'facet_sensor' => translate_sensor_to_facet_sensor(json_doc['instruments']),
36
- 'published_date' => (Helpers::SolrFormat.date_str json_doc['releaseDate']),
37
- 'spatial_coverages' => Helpers::TranslateSpatialCoverage.geojson_to_spatial_display_str(spatial_coverages),
38
- 'spatial' => Helpers::TranslateSpatialCoverage.geojson_to_spatial_index_str(spatial_coverages),
39
- 'spatial_area' => Helpers::TranslateSpatialCoverage.geojson_to_spatial_area(spatial_coverages),
40
- 'facet_spatial_coverage' => Helpers::TranslateSpatialCoverage.geojson_to_global_facet(spatial_coverages),
41
- 'facet_spatial_scope' => Helpers::TranslateSpatialCoverage.geojson_to_spatial_scope_facet(spatial_coverages),
42
- 'temporal_coverages' => temporal_coverage_values['temporal_coverages'],
43
- 'temporal_duration' => temporal_coverage_values['temporal_duration'],
44
- 'temporal' => temporal_coverage_values['temporal'],
45
- 'facet_temporal_duration' => temporal_coverage_values['facet_temporal_duration'],
46
- 'last_revision_date' => (Helpers::SolrFormat.date_str json_doc['lastRevisionDate']),
47
- 'dataset_url' => json_doc['datasetUrl'],
48
- 'distribution_formats' => json_doc['distributionFormats'],
49
- 'facet_format' => json_doc['distributionFormats'].empty? ? [Helpers::SolrFormat::NOT_SPECIFIED] : translate_format_to_facet_format(json_doc['distributionFormats']),
50
- 'source' => %w(NSIDC ADE),
51
- 'popularity' => json_doc['popularity'],
52
- 'data_access_urls' => translate_data_access_urls(json_doc['dataAccessLinks']),
53
- 'facet_sponsored_program' => translate_short_long_names_to_facet_value(json_doc['internalDataCenters']),
25
+ 'authoritative_id' => json_doc['authoritativeId'],
26
+ 'dataset_version' => json_doc['majorVersion']['version'],
27
+ 'data_centers' => Helpers::SolrFormat::DATA_CENTER_NAMES[:NSIDC][:long_name],
28
+ 'facet_data_center' => "#{Helpers::SolrFormat::DATA_CENTER_NAMES[:NSIDC][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:NSIDC][:short_name]}",
29
+ 'authors' => translate_personnel_and_creators_to_authors(json_doc['personnel'], generate_data_citation_creators(json_doc['dataCitation'])),
30
+ 'topics' => translate_iso_topic_categories(json_doc['isoTopicCategories']),
31
+ 'parameters' => translate_parameters(json_doc['parameters']),
32
+ 'full_parameters' => translate_json_string(json_doc['parameters'], PARAMETER_PARTS),
33
+ 'facet_parameter' => translate_parameters_to_facet_parameters(json_doc['parameters']),
34
+ 'platforms' => translate_json_string(json_doc['platforms']),
35
+ 'sensors' => translate_json_string(json_doc['instruments']),
36
+ 'facet_sensor' => translate_sensor_to_facet_sensor(json_doc['instruments']),
37
+ 'published_date' => (Helpers::SolrFormat.date_str json_doc['releaseDate']),
38
+ 'spatial_coverages' => Helpers::TranslateSpatialCoverage.geojson_to_spatial_display_str(spatial_coverages),
39
+ 'spatial' => Helpers::TranslateSpatialCoverage.geojson_to_spatial_index_str(spatial_coverages),
40
+ 'spatial_area' => Helpers::TranslateSpatialCoverage.geojson_to_spatial_area(spatial_coverages),
41
+ 'facet_spatial_coverage' => Helpers::TranslateSpatialCoverage.geojson_to_global_facet(spatial_coverages),
42
+ 'facet_spatial_scope' => Helpers::TranslateSpatialCoverage.geojson_to_spatial_scope_facet(spatial_coverages),
43
+ 'temporal_coverages' => temporal_coverage_values['temporal_coverages'],
44
+ 'temporal_duration' => temporal_coverage_values['temporal_duration'],
45
+ 'temporal' => temporal_coverage_values['temporal'],
46
+ 'facet_temporal_duration' => temporal_coverage_values['facet_temporal_duration'],
47
+ 'last_revision_date' => (Helpers::SolrFormat.date_str json_doc['lastRevisionDate']),
48
+ 'dataset_url' => json_doc['datasetUrl'],
49
+ 'distribution_formats' => json_doc['distributionFormats'],
50
+ 'facet_format' => json_doc['distributionFormats'].empty? ? [Helpers::SolrFormat::NOT_SPECIFIED] : translate_format_to_facet_format(json_doc['distributionFormats']),
51
+ 'source' => %w[NSIDC ADE],
52
+ 'popularity' => json_doc['popularity'],
53
+ 'data_access_urls' => translate_data_access_urls(json_doc['dataAccessLinks']),
54
+ 'facet_sponsored_program' => translate_short_long_names_to_facet_value(json_doc['internalDataCenters']),
54
55
  'facet_temporal_resolution' => translate_temporal_resolution_facet_values(json_doc['parameters']),
55
- 'facet_spatial_resolution' => translate_spatial_resolution_facet_values(json_doc['parameters']),
56
- 'sponsored_programs' => translate_internal_datacenters(json_doc['internalDataCenters'])
56
+ 'facet_spatial_resolution' => translate_spatial_resolution_facet_values(json_doc['parameters']),
57
+ 'sponsored_programs' => translate_internal_datacenters(json_doc['internalDataCenters'])
57
58
  )
58
59
  end
59
60
  # rubocop:enable Metrics/MethodLength
@@ -70,13 +71,14 @@ module SearchSolrTools
70
71
  def translate_sensor_to_facet_sensor(json)
71
72
  facet_values = []
72
73
  return facet_values if json.nil?
74
+
73
75
  json.each do |json_entry|
74
76
  sensor_bin = Helpers::SolrFormat.facet_binning('sensor', json_entry['shortName'].to_s)
75
- if sensor_bin.eql? json_entry['shortName']
76
- facet_values << "#{json_entry['longName']} | #{json_entry['shortName']}"
77
- else
78
- facet_values << " | #{sensor_bin}"
79
- end
77
+ facet_values << if sensor_bin.eql? json_entry['shortName']
78
+ "#{json_entry['longName']} | #{json_entry['shortName']}"
79
+ else
80
+ " | #{sensor_bin}"
81
+ end
80
82
  end
81
83
  facet_values
82
84
  end
@@ -100,12 +102,13 @@ module SearchSolrTools
100
102
  end
101
103
 
102
104
  def translate_iso_topic_categories(iso_topic_categories_json)
103
- iso_topic_categories_json.map { |t| t['name'] } unless iso_topic_categories_json.nil?
105
+ iso_topic_categories_json&.map { |t| t['name'] }
104
106
  end
105
107
 
106
108
  def translate_data_access_urls(json)
107
109
  values = []
108
110
  return values if json.nil?
111
+
109
112
  json.each do |json_entry|
110
113
  link_display = json_entry['displayText'].nil? ? '' : json_entry['displayText']
111
114
  link_type = json_entry['type'].nil? ? '' : json_entry['type']
@@ -120,6 +123,7 @@ module SearchSolrTools
120
123
  def translate_internal_datacenters(json)
121
124
  values = []
122
125
  return values if json.nil?
126
+
123
127
  json.each do |json_entry|
124
128
  short_name = json_entry['shortName'].nil? ? '' : json_entry['shortName']
125
129
  values << short_name
@@ -130,6 +134,7 @@ module SearchSolrTools
130
134
  def translate_short_long_names_to_facet_value(json)
131
135
  facet_values = []
132
136
  return facet_values if json.nil?
137
+
133
138
  json.each do |json_entry|
134
139
  long_name = json_entry['longName'].nil? ? '' : json_entry['longName']
135
140
  short_name = json_entry['shortName'].nil? ? '' : json_entry['shortName']
@@ -169,6 +174,7 @@ module SearchSolrTools
169
174
  def translate_parameters_to_facet_parameters(parameters_json)
170
175
  parameters_strings = translate_json_string(parameters_json, PARAMETER_PARTS)
171
176
  return [] if parameters_strings.nil?
177
+
172
178
  facet_params = []
173
179
  parameters_strings.each do |str|
174
180
  facet_params << Helpers::SolrFormat.parameter_binning(str)
@@ -199,8 +205,7 @@ module SearchSolrTools
199
205
  end
200
206
 
201
207
  def generate_data_citation_creators(data_citation)
202
- data_citation.nil? ? creators = [] : creators = data_citation['creators']
203
- creators
208
+ data_citation.nil? ? [] : data_citation['creators']
204
209
  end
205
210
 
206
211
  def generate_part_array(json, limit_values = nil)
@@ -214,6 +219,5 @@ module SearchSolrTools
214
219
  parts
215
220
  end
216
221
  end
217
- # rubocop:enable Metrics/ClassLength
218
222
  end
219
223
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module SearchSolrTools
2
- VERSION = '6.1.0'
4
+ VERSION = '6.3.0'
3
5
  end
@@ -1,10 +1,11 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'search_solr_tools/config/environments'
2
4
  require_relative 'search_solr_tools/version'
3
5
 
4
- require_relative 'search_solr_tools/helpers/selectors'
5
6
  require_relative 'search_solr_tools/helpers/harvest_status'
6
7
  require_relative 'search_solr_tools/errors/harvest_error'
7
8
 
8
- %w( selectors harvesters translators ).each do |subdir|
9
+ %w[harvesters translators].each do |subdir|
9
10
  Dir[File.join(__dir__, 'search_solr_tools', subdir, '*.rb')].each { |file| require file }
10
11
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: search_solr_tools
3
3
  version: !ruby/object:Gem::Version
4
- version: 6.1.0
4
+ version: 6.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chris Chalstrom
@@ -14,7 +14,7 @@ authors:
14
14
  autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
- date: 2023-07-17 00:00:00.000000000 Z
17
+ date: 2023-07-24 00:00:00.000000000 Z
18
18
  dependencies:
19
19
  - !ruby/object:Gem::Dependency
20
20
  name: ffi-geos
@@ -321,59 +321,17 @@ files:
321
321
  - lib/search_solr_tools/config/environments.rb
322
322
  - lib/search_solr_tools/config/environments.yaml
323
323
  - lib/search_solr_tools/errors/harvest_error.rb
324
- - lib/search_solr_tools/harvesters/adc.rb
325
- - lib/search_solr_tools/harvesters/ade_auto_suggest.rb
326
324
  - lib/search_solr_tools/harvesters/auto_suggest.rb
327
325
  - lib/search_solr_tools/harvesters/base.rb
328
- - lib/search_solr_tools/harvesters/bcodmo.rb
329
- - lib/search_solr_tools/harvesters/data_one.rb
330
- - lib/search_solr_tools/harvesters/echo.rb
331
- - lib/search_solr_tools/harvesters/eol.rb
332
- - lib/search_solr_tools/harvesters/gtnp.rb
333
- - lib/search_solr_tools/harvesters/ices.rb
334
- - lib/search_solr_tools/harvesters/ncdc_paleo.rb
335
- - lib/search_solr_tools/harvesters/nmi.rb
336
- - lib/search_solr_tools/harvesters/nodc.rb
337
326
  - lib/search_solr_tools/harvesters/nsidc_auto_suggest.rb
338
327
  - lib/search_solr_tools/harvesters/nsidc_json.rb
339
- - lib/search_solr_tools/harvesters/oai.rb
340
- - lib/search_solr_tools/harvesters/pdc.rb
341
- - lib/search_solr_tools/harvesters/r2r.rb
342
- - lib/search_solr_tools/harvesters/rda.rb
343
- - lib/search_solr_tools/harvesters/tdar.rb
344
- - lib/search_solr_tools/harvesters/usgs.rb
345
328
  - lib/search_solr_tools/helpers/bounding_box_util.rb
346
- - lib/search_solr_tools/helpers/csw_iso_query_builder.rb
347
- - lib/search_solr_tools/helpers/data_one_format.rb
348
329
  - lib/search_solr_tools/helpers/facet_configuration.rb
349
330
  - lib/search_solr_tools/helpers/harvest_status.rb
350
331
  - lib/search_solr_tools/helpers/iso_namespaces.rb
351
- - lib/search_solr_tools/helpers/iso_to_solr.rb
352
- - lib/search_solr_tools/helpers/iso_to_solr_format.rb
353
- - lib/search_solr_tools/helpers/ncdc_paleo_format.rb
354
- - lib/search_solr_tools/helpers/query_builder.rb
355
- - lib/search_solr_tools/helpers/r2r_format.rb
356
- - lib/search_solr_tools/helpers/selectors.rb
357
332
  - lib/search_solr_tools/helpers/solr_format.rb
358
- - lib/search_solr_tools/helpers/tdar_format.rb
359
333
  - lib/search_solr_tools/helpers/translate_spatial_coverage.rb
360
334
  - lib/search_solr_tools/helpers/translate_temporal_coverage.rb
361
- - lib/search_solr_tools/helpers/usgs_format.rb
362
- - lib/search_solr_tools/selectors/adc.rb
363
- - lib/search_solr_tools/selectors/data_one.rb
364
- - lib/search_solr_tools/selectors/echo_iso.rb
365
- - lib/search_solr_tools/selectors/ices_iso.rb
366
- - lib/search_solr_tools/selectors/ncdc_paleo.rb
367
- - lib/search_solr_tools/selectors/nmi.rb
368
- - lib/search_solr_tools/selectors/nodc_iso.rb
369
- - lib/search_solr_tools/selectors/pdc_iso.rb
370
- - lib/search_solr_tools/selectors/r2r.rb
371
- - lib/search_solr_tools/selectors/rda.rb
372
- - lib/search_solr_tools/selectors/tdar_opensearch.rb
373
- - lib/search_solr_tools/selectors/usgs_iso.rb
374
- - lib/search_solr_tools/translators/bcodmo_json.rb
375
- - lib/search_solr_tools/translators/eol_to_solr.rb
376
- - lib/search_solr_tools/translators/gtnp_json.rb
377
335
  - lib/search_solr_tools/translators/nsidc_json.rb
378
336
  - lib/search_solr_tools/version.rb
379
337
  - search_solr_tools.gemspec
@@ -396,7 +354,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
396
354
  - !ruby/object:Gem::Version
397
355
  version: '0'
398
356
  requirements: []
399
- rubygems_version: 3.4.10
357
+ rubygems_version: 3.4.17
400
358
  signing_key:
401
359
  specification_version: 4
402
360
  summary: Tools to harvest and manage various scientific dataset feeds in a Solr instance.
@@ -1,49 +0,0 @@
1
- require_relative 'base'
2
-
3
- module SearchSolrTools
4
- module Harvesters
5
- class Adc < Base
6
- def initialize(env = 'development', die_on_failure = false)
7
- super
8
- @page_size = 250
9
- @translator = Helpers::IsoToSolr.new :adc
10
- end
11
-
12
- def harvest_and_delete
13
- puts "Running harvest of adc catalog from #{metadata_url}"
14
- super(method(:harvest_adc_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:ADC][:long_name]}\"")
15
- end
16
-
17
- def harvest_adc_into_solr
18
- start = 0
19
- while (entries = get_results_from_adc(start)) && (entries.length > 0)
20
- begin
21
- insert_solr_docs(get_docs_with_translated_entries_from_adc(entries))
22
- rescue => e
23
- puts "ERROR: #{e}\n\n"
24
- raise e if @die_on_failure
25
- end
26
- start += @page_size
27
- end
28
- end
29
-
30
- def get_results_from_adc(start)
31
- get_results(build_request(start, @page_size), './response/result/doc')
32
- end
33
-
34
- def metadata_url
35
- SolrEnvironments[@environment][:adc_url]
36
- end
37
-
38
- def get_docs_with_translated_entries_from_adc(entries)
39
- entries.map do |e|
40
- create_new_solr_add_doc_with_child(@translator.translate(e).root)
41
- end
42
- end
43
-
44
- def build_request(start = 0, max_records = 100)
45
- "#{metadata_url}&start=#{start}&rows=#{max_records}"
46
- end
47
- end
48
- end
49
- end
@@ -1,46 +0,0 @@
1
- require_relative 'auto_suggest'
2
-
3
- module SearchSolrTools
4
- module Harvesters
5
- class AdeAutoSuggest < AutoSuggest
6
- def harvest_and_delete
7
- puts 'Building auto-suggest indexes for ADE'
8
- super(method(:harvest), 'source:"ADE"', @env_settings[:auto_suggest_collection_name])
9
- end
10
-
11
- def harvest
12
- url = "#{solr_url}/#{@env_settings[:collection_name]}/select?q=*%3A*&fq=source%3AADE&fq=spatial:[45.0,-180.0+TO+90.0,180.0]&rows=0&wt=json&indent=true&facet=true&facet.mincount=1&facet.sort=count&facet.limit=-1"
13
- super url, fields
14
- end
15
-
16
- def fields
17
- {
18
- 'full_keywords_and_parameters' => { weight: 2, source: 'ADE', creator: method(:keyword_creator) },
19
- 'full_authors' => { weight: 1, source: 'ADE', creator: method(:author_creator) }
20
- }
21
- end
22
-
23
- def split_creator(value, count, field_weight, source, split_regex)
24
- add_docs = []
25
- value.downcase.split(split_regex).each do |v|
26
- v = v.strip.chomp('/')
27
- add_docs.concat(ade_length_limit_creator(v, count, field_weight, source)) unless v.nil? || v.empty?
28
- end
29
- add_docs
30
- end
31
-
32
- def keyword_creator(value, count, field_weight, source)
33
- split_creator value, count, field_weight, source, %r{/ [\/ \>]+ /}
34
- end
35
-
36
- def author_creator(value, count, field_weight, source)
37
- split_creator value, count, field_weight, source, %r{/;/}
38
- end
39
-
40
- def ade_length_limit_creator(value, count, field_weight, source)
41
- return [] if value.length > 80
42
- standard_add_creator value, count, field_weight, source
43
- end
44
- end
45
- end
46
- end