search_solr_tools 6.1.0 → 6.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +11 -2
  3. data/bin/search_solr_tools +5 -17
  4. data/lib/search_solr_tools/config/environments.rb +3 -1
  5. data/lib/search_solr_tools/config/environments.yaml +0 -32
  6. data/lib/search_solr_tools/errors/harvest_error.rb +44 -31
  7. data/lib/search_solr_tools/harvesters/auto_suggest.rb +5 -3
  8. data/lib/search_solr_tools/harvesters/base.rb +21 -20
  9. data/lib/search_solr_tools/harvesters/nsidc_auto_suggest.rb +7 -5
  10. data/lib/search_solr_tools/harvesters/nsidc_json.rb +9 -8
  11. data/lib/search_solr_tools/helpers/bounding_box_util.rb +8 -8
  12. data/lib/search_solr_tools/helpers/facet_configuration.rb +3 -1
  13. data/lib/search_solr_tools/helpers/harvest_status.rb +10 -8
  14. data/lib/search_solr_tools/helpers/iso_namespaces.rb +3 -1
  15. data/lib/search_solr_tools/helpers/solr_format.rb +25 -45
  16. data/lib/search_solr_tools/helpers/translate_spatial_coverage.rb +13 -10
  17. data/lib/search_solr_tools/helpers/translate_temporal_coverage.rb +2 -0
  18. data/lib/search_solr_tools/translators/nsidc_json.rb +48 -44
  19. data/lib/search_solr_tools/version.rb +3 -1
  20. data/lib/search_solr_tools.rb +3 -2
  21. metadata +3 -45
  22. data/lib/search_solr_tools/harvesters/adc.rb +0 -49
  23. data/lib/search_solr_tools/harvesters/ade_auto_suggest.rb +0 -46
  24. data/lib/search_solr_tools/harvesters/bcodmo.rb +0 -64
  25. data/lib/search_solr_tools/harvesters/data_one.rb +0 -49
  26. data/lib/search_solr_tools/harvesters/echo.rb +0 -52
  27. data/lib/search_solr_tools/harvesters/eol.rb +0 -51
  28. data/lib/search_solr_tools/harvesters/gtnp.rb +0 -67
  29. data/lib/search_solr_tools/harvesters/ices.rb +0 -58
  30. data/lib/search_solr_tools/harvesters/ncdc_paleo.rb +0 -62
  31. data/lib/search_solr_tools/harvesters/nmi.rb +0 -34
  32. data/lib/search_solr_tools/harvesters/nodc.rb +0 -75
  33. data/lib/search_solr_tools/harvesters/oai.rb +0 -62
  34. data/lib/search_solr_tools/harvesters/pdc.rb +0 -40
  35. data/lib/search_solr_tools/harvesters/r2r.rb +0 -61
  36. data/lib/search_solr_tools/harvesters/rda.rb +0 -35
  37. data/lib/search_solr_tools/harvesters/tdar.rb +0 -71
  38. data/lib/search_solr_tools/harvesters/usgs.rb +0 -76
  39. data/lib/search_solr_tools/helpers/csw_iso_query_builder.rb +0 -29
  40. data/lib/search_solr_tools/helpers/data_one_format.rb +0 -74
  41. data/lib/search_solr_tools/helpers/iso_to_solr.rb +0 -97
  42. data/lib/search_solr_tools/helpers/iso_to_solr_format.rb +0 -197
  43. data/lib/search_solr_tools/helpers/ncdc_paleo_format.rb +0 -61
  44. data/lib/search_solr_tools/helpers/query_builder.rb +0 -13
  45. data/lib/search_solr_tools/helpers/r2r_format.rb +0 -25
  46. data/lib/search_solr_tools/helpers/selectors.rb +0 -22
  47. data/lib/search_solr_tools/helpers/tdar_format.rb +0 -70
  48. data/lib/search_solr_tools/helpers/usgs_format.rb +0 -50
  49. data/lib/search_solr_tools/selectors/adc.rb +0 -96
  50. data/lib/search_solr_tools/selectors/data_one.rb +0 -96
  51. data/lib/search_solr_tools/selectors/echo_iso.rb +0 -112
  52. data/lib/search_solr_tools/selectors/ices_iso.rb +0 -108
  53. data/lib/search_solr_tools/selectors/ncdc_paleo.rb +0 -90
  54. data/lib/search_solr_tools/selectors/nmi.rb +0 -107
  55. data/lib/search_solr_tools/selectors/nodc_iso.rb +0 -108
  56. data/lib/search_solr_tools/selectors/pdc_iso.rb +0 -109
  57. data/lib/search_solr_tools/selectors/r2r.rb +0 -115
  58. data/lib/search_solr_tools/selectors/rda.rb +0 -107
  59. data/lib/search_solr_tools/selectors/tdar_opensearch.rb +0 -91
  60. data/lib/search_solr_tools/selectors/usgs_iso.rb +0 -107
  61. data/lib/search_solr_tools/translators/bcodmo_json.rb +0 -89
  62. data/lib/search_solr_tools/translators/eol_to_solr.rb +0 -84
  63. data/lib/search_solr_tools/translators/gtnp_json.rb +0 -59
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cded5837c68e69c4dd22f9aea6b3efab1d4cc21570daba1b91b9561d7cde1216
4
- data.tar.gz: 73a840e47c0cbc41a3fb13e62ca85735e20d87dc9f4b0acfd3d522ef1874ad34
3
+ metadata.gz: f9ced4643b8adbda2b5ef09192f036af86878e07243fe959448213762e0e5cc1
4
+ data.tar.gz: 0a5f27a7bc1d8c9c0c07a20b6fbf122d5a3b6163a5654db635d5c478ac4a21bc
5
5
  SHA512:
6
- metadata.gz: 5056c01da21c54b3b4c86ddef264d2e0f9dbe83db472677ac1076a31a5e42e97a6ad0cd0a037b0431374a2d243bfc398dfcfa631863d3d4f3b3b334751b6e2c6
7
- data.tar.gz: 39418c870ebada693b8ef2b75460d04e8ed0d1f9764e534d2195fa59fb5daaec4ae002ae4f016d05449a1a8a593b7e931261965b5756c664c6edfc7c94e6e3b1
6
+ metadata.gz: cc66f8b40c62e2640fd72ce05aa2ac01aa76c58c730b6c445976fc7cf6e43b88cff29ec088f73e5dff913c879f7a1a31016cb634d851cfb3adb7b8bb735614c8
7
+ data.tar.gz: f896f7b473f977f0e349d422f6568774342e6bdb66e1a7dad1cf4477d0ffa7e9b05b81184898ab2d4d69c44f8ca98a3aa6791234926190484820cca4b439dc7d
data/CHANGELOG.md CHANGED
@@ -1,8 +1,17 @@
1
- ## v6.1.0 (2022-07-14)
1
+ ## v6.3.0 (2023-07-24)
2
+
3
+ - Update Rubocop configuration to actually run against files, and make
4
+ necessary corrections to comply with Rubocop styling.
5
+
6
+ ## v6.2.0 (2023-07-18)
7
+
8
+ - Remove deprecated harvesters and associated tests, helpers, etc.
9
+
10
+ ## v6.1.0 (2023-07-14)
2
11
 
3
12
  - Updated a few other dependencies that weren't at the newest versions.
4
13
 
5
- ## v6.0.0 (2022-07-14)
14
+ ## v6.0.0 (2023-07-14)
6
15
 
7
16
  - Updated Ruby to 3.2.2, updated gem dependencies to more recent versions.
8
17
 
@@ -47,7 +47,7 @@ class SolrHarvestCLI < Thor
47
47
  end
48
48
 
49
49
  ping_status = SearchSolrTools::Helpers::HarvestStatus.new(
50
- SearchSolrTools::Helpers::HarvestStatus::PING_SOLR => solr_success,
50
+ SearchSolrTools::Helpers::HarvestStatus::PING_SOLR => solr_success,
51
51
  SearchSolrTools::Helpers::HarvestStatus::PING_SOURCE => source_success
52
52
  )
53
53
  raise SearchSolrTools::Errors::HarvestError, ping_status unless ping_status.ok?
@@ -64,9 +64,9 @@ class SolrHarvestCLI < Thor
64
64
  puts "Target: #{target}"
65
65
  begin
66
66
  harvest_class = get_harvester_class(target)
67
- harvester = harvest_class.new(options[:environment], die_on_failure)
67
+ harvester = harvest_class.new(options[:environment], die_on_failure:)
68
68
  ping_status = SearchSolrTools::Helpers::HarvestStatus.new(
69
- SearchSolrTools::Helpers::HarvestStatus::PING_SOLR => harvester.ping_solr,
69
+ SearchSolrTools::Helpers::HarvestStatus::PING_SOLR => harvester.ping_solr,
70
70
  SearchSolrTools::Helpers::HarvestStatus::PING_SOURCE => harvester.ping_source
71
71
  )
72
72
  raise SearchSolrTools::Errors::HarvestError, ping_status unless ping_status.ok?
@@ -120,20 +120,8 @@ class SolrHarvestCLI < Thor
120
120
  no_tasks do
121
121
  def harvester_map
122
122
  {
123
- 'bco_dmo' => SearchSolrTools::Harvesters::BcoDmo,
124
- 'adc' => SearchSolrTools::Harvesters::Adc,
125
- 'echo' => SearchSolrTools::Harvesters::Echo,
126
- 'ices' => SearchSolrTools::Harvesters::Ices,
127
- 'nmi' => SearchSolrTools::Harvesters::Nmi,
128
- 'nodc' => SearchSolrTools::Harvesters::Nodc,
129
- 'r2r' => SearchSolrTools::Harvesters::R2R,
130
- 'rda' => SearchSolrTools::Harvesters::Rda,
131
- 'usgs' => SearchSolrTools::Harvesters::Usgs,
132
- 'tdar' => SearchSolrTools::Harvesters::Tdar,
133
- 'pdc' => SearchSolrTools::Harvesters::Pdc,
134
- 'nsidc' => SearchSolrTools::Harvesters::NsidcJson,
135
- 'nsidc_auto_suggest' => SearchSolrTools::Harvesters::NsidcAutoSuggest,
136
- 'ade_auto_suggest' => SearchSolrTools::Harvesters::AdeAutoSuggest
123
+ 'nsidc' => SearchSolrTools::Harvesters::NsidcJson,
124
+ 'nsidc_auto_suggest' => SearchSolrTools::Harvesters::NsidcAutoSuggest
137
125
  }
138
126
  end
139
127
 
@@ -1,9 +1,11 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'yaml'
2
4
 
3
5
  module SearchSolrTools
4
6
  # configuration to work with solr locally, or on integration/qa/staging/prod
5
7
  module SolrEnvironments
6
- YAML_ENVS = YAML.load_file(File.expand_path('../environments.yaml', __FILE__))
8
+ YAML_ENVS = YAML.load_file(File.expand_path('environments.yaml', __dir__))
7
9
 
8
10
  def self.[](env = :development)
9
11
  YAML_ENVS[:common].merge(YAML_ENVS[env.to_sym])
@@ -3,38 +3,6 @@
3
3
  :collection_name: nsidc_oai
4
4
  :collection_path: solr
5
5
  :port: 8983
6
- :bcodmo_url: http://www.bco-dmo.org/nsidc/arctic-deployments.json
7
- :adc_url: https://arcticdata.io/metacat/d1/mn/v2/query/solr/select?q=northBoundCoord:%5B45.0%20TO%2090.0%5D
8
- :data_one_url: https://cn.dataone.org/cn/v1/query/solr/select?q=northBoundCoord:%5B45.0%20TO%2090.0%5D
9
- :echo_url: https://api.echo.nasa.gov/catalog-rest/echo_catalog/datasets.echo10?bounding_box=-180,45,180,90
10
- :gtnp:
11
- - http://www.gtnpdatabase.org/rest/boreholes/json
12
- - http://www.gtnpdatabase.org/rest/activelayers/json
13
- :ices_url: http://geo.ices.dk/geonetwork/srv/en/csw
14
- :ncdc_paleo_url: https://gis.ncdc.noaa.gov/gptpaleo/csw
15
- :nmi_url: http://arcticdata.met.no/metamod/oai
16
- :nodc_url: https://data.nodc.noaa.gov/geoportal/csw
17
- :pdc_url: http://www.polardata.ca/oai/provider
18
- :rda_url: https://rda.ucar.edu/cgi-bin/oai
19
- :tdar_url: http://core.tdar.org/search/rss
20
- :usgs_url: https://www.sciencebase.gov/catalog/item/527cf4ede4b0850ea05182ee/csw
21
- :eol:
22
- - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.SHEBA.thredds.xml
23
- - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.SBI.thredds.xml
24
- - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.PacMARS.thredds.xml
25
- - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BASE.thredds.xml
26
- - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.ATLAS.thredds.xml
27
- - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.ARC_MIP.thredds.xml
28
- - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.AMTS.thredds.xml
29
- - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BOREAS.thredds.xml
30
- - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BeringSea.thredds.xml
31
- - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.ARCSS.thredds.xml
32
- - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BEST.thredds.xml
33
- - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BSIERP.thredds.xml
34
- - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BARROW.thredds.xml
35
- - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.DBO.thredds.xml
36
- - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.ITEX.thredds.xml
37
- :r2r_url: http://get.rvdata.us/services/cruise/
38
6
 
39
7
  # Not using DCS API v2 here because not all retired datasets have their "retired"
40
8
  # flag checked. For example, GLA01.033 is retired; GLA01.018 is not, but it
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module SearchSolrTools
2
4
  module Errors
3
5
  class HarvestError < StandardError
@@ -10,34 +12,47 @@ module SearchSolrTools
10
12
  ERRCODE_OTHER = 128
11
13
 
12
14
  ERRCODE_DESC = {
13
- ERRCODE_SOLR_PING => 'Solr instance did not return a successful ping',
14
- ERRCODE_SOURCE_PING => 'Source to be harvested did not return a successful ping',
15
- ERRCODE_SOURCE_NO_RESULTS => 'Source to be harvested returned no documents matching query',
16
- ERRCODE_SOURCE_HARVEST_ERROR => 'One or more source documents returned an error when trying to retrieve or translate',
17
- ERRCODE_DOCUMENT_INVALID => 'One or more documents to be harvested was invalid (malformed)',
18
- ERRCODE_INGEST_ERROR => 'Solr returned an error trying to ingest one or more harvested documents',
19
- ERRCODE_OTHER => 'General error code for non-harvest related issues'
15
+ ERRCODE_SOLR_PING => 'Solr instance did not return a successful ping',
16
+ ERRCODE_SOURCE_PING => 'Source to be harvested did not return a successful ping',
17
+ ERRCODE_SOURCE_NO_RESULTS => 'Source to be harvested returned no documents matching query',
18
+ ERRCODE_SOURCE_HARVEST_ERROR => 'One or more source documents returned an error when trying to retrieve or translate',
19
+ ERRCODE_DOCUMENT_INVALID => 'One or more documents to be harvested was invalid (malformed)',
20
+ ERRCODE_INGEST_ERROR => 'Solr returned an error trying to ingest one or more harvested documents',
21
+ ERRCODE_OTHER => 'General error code for non-harvest related issues'
20
22
  }.freeze
21
23
 
22
24
  PING_ERRCODE_MAP = {
23
- 'ping_solr' => ERRCODE_SOLR_PING,
24
- 'ping_source' => ERRCODE_SOURCE_PING,
25
- }
25
+ 'ping_solr' => ERRCODE_SOLR_PING,
26
+ 'ping_source' => ERRCODE_SOURCE_PING
27
+ }.freeze
26
28
 
27
29
  STATUS_ERRCODE_MAP = {
28
- Helpers::HarvestStatus::HARVEST_NO_DOCS => ERRCODE_SOURCE_NO_RESULTS,
29
- Helpers::HarvestStatus::HARVEST_FAILURE => ERRCODE_SOURCE_HARVEST_ERROR,
30
- Helpers::HarvestStatus::INGEST_ERR_INVALID_DOC => ERRCODE_DOCUMENT_INVALID,
31
- Helpers::HarvestStatus::INGEST_ERR_SOLR_ERROR => ERRCODE_INGEST_ERROR,
32
- Helpers::HarvestStatus::OTHER_ERROR => ERRCODE_OTHER
30
+ Helpers::HarvestStatus::HARVEST_NO_DOCS => ERRCODE_SOURCE_NO_RESULTS,
31
+ Helpers::HarvestStatus::HARVEST_FAILURE => ERRCODE_SOURCE_HARVEST_ERROR,
32
+ Helpers::HarvestStatus::INGEST_ERR_INVALID_DOC => ERRCODE_DOCUMENT_INVALID,
33
+ Helpers::HarvestStatus::INGEST_ERR_SOLR_ERROR => ERRCODE_INGEST_ERROR,
34
+ Helpers::HarvestStatus::OTHER_ERROR => ERRCODE_OTHER
33
35
  }.freeze
34
36
 
35
37
  # If code is -1, it means display all error codes
36
38
  def self.describe_exit_code(code = -1)
39
+ code_list = code_to_list(code)
40
+
41
+ codes = {}
42
+ code_list.each do |k|
43
+ next if code == -1 && !ERRCODE_DESC.keys.include?(k) # skip INVALID CODE if showing all codes
44
+
45
+ codes[k] = ERRCODE_DESC.keys.include?(k) ? ERRCODE_DESC[k] : 'INVALID CODE NUMBER'
46
+ end
47
+
48
+ codes
49
+ end
50
+
51
+ # Loop through all bit-flag values to produce a list of integers
52
+ def self.code_to_list(code)
37
53
  code = code.to_i
38
54
  code_list = []
39
55
 
40
- # Loop through all bit-flag values
41
56
  [128, 64, 32, 16, 8, 4, 2, 1].each do |k|
42
57
  if code >= k || code == -1
43
58
  code_list.prepend k
@@ -45,20 +60,17 @@ module SearchSolrTools
45
60
  end
46
61
  end
47
62
 
48
- codes = {}
49
- code_list.each do |k|
50
- next if code == -1 && !ERRCODE_DESC.keys.include?(k) # skip INVALID CODE if showing all codes
51
- codes[k] = ERRCODE_DESC.keys.include?(k) ? ERRCODE_DESC[k] : 'INVALID CODE NUMBER'
52
- end
53
-
54
- codes
63
+ code_list
55
64
  end
56
65
 
57
- def initialize(status, message=nil)
66
+ def initialize(status, message = nil)
58
67
  @status_data = status
59
68
  @other_message = message
69
+
70
+ super message
60
71
  end
61
72
 
73
+ # rubocop:disable Metrics/AbcSize
62
74
  def exit_code
63
75
  if @status_data.nil?
64
76
  puts "OTHER ERROR REPORTED: #{@other_message}"
@@ -70,19 +82,20 @@ module SearchSolrTools
70
82
  code = 0
71
83
  code += ERRCODE_SOLR_PING unless @status_data.ping_solr
72
84
  code += ERRCODE_SOURCE_PING unless @status_data.ping_source
73
- code += ERRCODE_SOURCE_NO_RESULTS if @status_data.status[Helpers::HarvestStatus::HARVEST_NO_DOCS] > 0
74
- code += ERRCODE_SOURCE_HARVEST_ERROR if @status_data.status[Helpers::HarvestStatus::HARVEST_FAILURE] > 0
75
- code += ERRCODE_DOCUMENT_INVALID if @status_data.status[Helpers::HarvestStatus::INGEST_ERR_INVALID_DOC] > 0
76
- code += ERRCODE_INGEST_ERROR if @status_data.status[Helpers::HarvestStatus::INGEST_ERR_SOLR_ERROR] > 0
85
+ code += ERRCODE_SOURCE_NO_RESULTS if @status_data.status[Helpers::HarvestStatus::HARVEST_NO_DOCS].positive?
86
+ code += ERRCODE_SOURCE_HARVEST_ERROR if @status_data.status[Helpers::HarvestStatus::HARVEST_FAILURE].positive?
87
+ code += ERRCODE_DOCUMENT_INVALID if @status_data.status[Helpers::HarvestStatus::INGEST_ERR_INVALID_DOC].positive?
88
+ code += ERRCODE_INGEST_ERROR if @status_data.status[Helpers::HarvestStatus::INGEST_ERR_SOLR_ERROR].positive?
77
89
 
78
- code = ERRCODE_OTHER if code == 0
90
+ code = ERRCODE_OTHER if code.zero?
79
91
 
80
92
  code
81
93
  end
94
+ # rubocop:enable Metrics/AbcSize
82
95
 
83
96
  def message
84
- self.class.describe_exit_code(exit_code).map{|c,v| v}.join("\n")
97
+ self.class.describe_exit_code(exit_code).map { |_c, v| v }.join("\n")
85
98
  end
86
99
  end
87
100
  end
88
- end
101
+ end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'base'
2
4
  require 'json'
3
5
  require 'rest-client'
@@ -6,8 +8,8 @@ module SearchSolrTools
6
8
  module Harvesters
7
9
  # Use the nsidc_oai core to populate the auto_suggest core
8
10
  class AutoSuggest < Base
9
- def initialize(env = 'development', die_on_failure = false)
10
- super env, die_on_failure
11
+ def initialize(env = 'development', die_on_failure: false)
12
+ super
11
13
  @env_settings = SolrEnvironments[@environment] # super sets @environment.
12
14
  end
13
15
 
@@ -50,7 +52,7 @@ module SearchSolrTools
50
52
 
51
53
  if status == Helpers::HarvestStatus::INGEST_OK
52
54
  puts "Added #{add_docs.size} auto suggest documents in one commit"
53
- return Helpers::HarvestStatus.new(Helpers::HarvestStatus::INGEST_OK => add_docs)
55
+ Helpers::HarvestStatus.new(Helpers::HarvestStatus::INGEST_OK => add_docs)
54
56
  else
55
57
  puts "Failed adding #{add_docs.size} documents in single commit, retrying one by one"
56
58
  new_add_docs = []
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'multi_json'
2
4
  require 'nokogiri'
3
5
  require 'open-uri'
@@ -8,8 +10,6 @@ require 'time'
8
10
  require 'search_solr_tools'
9
11
  require_relative '../helpers/iso_namespaces'
10
12
  require_relative '../helpers/solr_format'
11
- require_relative '../helpers/iso_to_solr'
12
-
13
13
 
14
14
  module SearchSolrTools
15
15
  module Harvesters
@@ -21,7 +21,7 @@ module SearchSolrTools
21
21
  XML_CONTENT_TYPE = 'text/xml; charset=utf-8'
22
22
  JSON_CONTENT_TYPE = 'application/json; charset=utf-8'
23
23
 
24
- def initialize(env = 'development', die_on_failure = false)
24
+ def initialize(env = 'development', die_on_failure: false)
25
25
  @environment = env
26
26
  @die_on_failure = die_on_failure
27
27
  end
@@ -52,7 +52,7 @@ module SearchSolrTools
52
52
  success = response.code == 200
53
53
  puts "Error in ping request: #{response.body}" unless success
54
54
  end
55
- rescue => e
55
+ rescue StandardError => e
56
56
  puts "Rest exception while pinging Solr: #{e}"
57
57
  end
58
58
  success
@@ -62,7 +62,7 @@ module SearchSolrTools
62
62
  # to "ping" the data center. Returns true if the ping is successful (or, as
63
63
  # in this default, no ping method was defined)
64
64
  def ping_source
65
- puts "Harvester does not have ping method defined, assuming true"
65
+ puts 'Harvester does not have ping method defined, assuming true'
66
66
  true
67
67
  end
68
68
 
@@ -75,12 +75,12 @@ module SearchSolrTools
75
75
  harvest_status
76
76
  end
77
77
 
78
- def delete_old_documents(timestamp, constraints, solr_core, force = false)
78
+ def delete_old_documents(timestamp, constraints, solr_core, force: false)
79
79
  constraints = sanitize_data_centers_constraints(constraints)
80
80
  delete_query = "last_update:[* TO #{timestamp}] AND #{constraints}"
81
81
  solr = RSolr.connect url: solr_url + "/#{solr_core}"
82
82
  unchanged_count = (solr.get 'select', params: { wt: :ruby, q: delete_query, rows: 0 })['response']['numFound'].to_i
83
- if unchanged_count == 0
83
+ if unchanged_count.zero?
84
84
  puts "All documents were updated after #{timestamp}, nothing to delete"
85
85
  else
86
86
  puts "Begin removing documents older than #{timestamp}"
@@ -91,8 +91,8 @@ module SearchSolrTools
91
91
  def sanitize_data_centers_constraints(query_string)
92
92
  # Remove lucene special characters, preserve the query parameter and compress whitespace
93
93
  query_string.gsub!(/[:&|!~\-\(\)\{\}\[\]\^\*\?\+]+/, ' ')
94
- query_string.gsub!(/data_centers /, 'data_centers:')
95
- query_string.gsub!(/source /, 'source:')
94
+ query_string.gsub!('data_centers ', 'data_centers:')
95
+ query_string.gsub!('source ', 'source:')
96
96
  query_string.squeeze(' ').strip
97
97
  end
98
98
 
@@ -127,7 +127,7 @@ module SearchSolrTools
127
127
  status
128
128
  end
129
129
 
130
- # TODO Need to return a specific type of failure:
130
+ # TODO: Need to return a specific type of failure:
131
131
  # - Bad record content identified and no ingest attempted
132
132
  # - Solr tries to ingest document and fails (bad content not detected prior to ingest)
133
133
  # - Solr cannot insert document for reasons other than the document structure and content.
@@ -143,15 +143,15 @@ module SearchSolrTools
143
143
 
144
144
  # Some docs will cause solr to time out during the POST
145
145
  begin
146
- RestClient.post(url, doc_serialized, content_type: content_type) do |response, _request, _result|
146
+ RestClient.post(url, doc_serialized, content_type:) do |response, _request, _result|
147
147
  success = response.code == 200
148
148
  unless success
149
149
  puts "Error for #{doc_serialized}\n\n response: #{response.body}"
150
150
  status = Helpers::HarvestStatus::INGEST_ERR_SOLR_ERROR
151
151
  end
152
152
  end
153
- rescue => e
154
- # TODO Need to provide more detail re: this failure so we know whether to
153
+ rescue StandardError => e
154
+ # TODO: Need to provide more detail re: this failure so we know whether to
155
155
  # exit the job with a status != 0
156
156
  puts "Rest exception while POSTing to Solr: #{e}, for doc: #{doc_serialized}"
157
157
  status = Helpers::HarvestStatus::INGEST_ERR_SOLR_ERROR
@@ -161,11 +161,11 @@ module SearchSolrTools
161
161
 
162
162
  def get_serialized_doc(doc, content_type)
163
163
  if content_type.eql?(XML_CONTENT_TYPE)
164
- return doc.respond_to?(:to_xml) ? doc.to_xml : doc
164
+ doc.respond_to?(:to_xml) ? doc.to_xml : doc
165
165
  elsif content_type.eql?(JSON_CONTENT_TYPE)
166
- return MultiJson.dump(doc)
166
+ MultiJson.dump(doc)
167
167
  else
168
- return doc
168
+ doc
169
169
  end
170
170
  end
171
171
 
@@ -178,17 +178,18 @@ module SearchSolrTools
178
178
 
179
179
  begin
180
180
  puts "Request: #{request_url}"
181
- response = URI.open(request_url, read_timeout: timeout, 'Content-Type' => content_type)
181
+ response = URI.parse(request_url).open(read_timeout: timeout, 'Content-Type' => content_type)
182
182
  rescue OpenURI::HTTPError, Timeout::Error, Errno::ETIMEDOUT => e
183
183
  retries_left -= 1
184
184
  puts "## REQUEST FAILED ## #{e.class} ## Retrying #{retries_left} more times..."
185
185
 
186
- retry if retries_left > 0
186
+ retry if retries_left.positive?
187
187
 
188
- # TODO - Do we really need this "die_on_failure" anymore? The empty return
188
+ # TODO: Do we really need this "die_on_failure" anymore? The empty return
189
189
  # will cause the "No Documents" error to be thrown in the harvester class
190
190
  # now, so it will pretty much always "die on failure"
191
191
  raise e if @die_on_failure
192
+
192
193
  return
193
194
  end
194
195
  doc = Nokogiri.XML(response)
@@ -216,7 +217,7 @@ module SearchSolrTools
216
217
  spatial_coverages = doc.xpath(".//field[@name='spatial_coverages']").first
217
218
  return true if spatial_coverages.nil?
218
219
 
219
- spatial_coverages = spatial_coverages.text.split(' ')
220
+ spatial_coverages = spatial_coverages.text.split
220
221
 
221
222
  # We've only seen the failure with 4 spatial coverage values
222
223
  return true if spatial_coverages.size < 4
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'auto_suggest'
2
4
 
3
5
  module SearchSolrTools
@@ -16,11 +18,11 @@ module SearchSolrTools
16
18
  def fields
17
19
  {
18
20
  'authoritative_id' => { weight: 1, source: 'NSIDC', creator: method(:standard_add_creator) },
19
- 'full_title' => { weight: 2, source: 'NSIDC', creator: method(:standard_add_creator) },
20
- 'copy_parameters' => { weight: 5, source: 'NSIDC', creator: method(:standard_add_creator) },
21
- 'full_platforms' => { weight: 2, source: 'NSIDC', creator: method(:short_full_split_add_creator) },
22
- 'full_sensors' => { weight: 2, source: 'NSIDC', creator: method(:short_full_split_add_creator) },
23
- 'full_authors' => { weight: 1, source: 'NSIDC', creator: method(:standard_add_creator) }
21
+ 'full_title' => { weight: 2, source: 'NSIDC', creator: method(:standard_add_creator) },
22
+ 'copy_parameters' => { weight: 5, source: 'NSIDC', creator: method(:standard_add_creator) },
23
+ 'full_platforms' => { weight: 2, source: 'NSIDC', creator: method(:short_full_split_add_creator) },
24
+ 'full_sensors' => { weight: 2, source: 'NSIDC', creator: method(:short_full_split_add_creator) },
25
+ 'full_authors' => { weight: 1, source: 'NSIDC', creator: method(:standard_add_creator) }
24
26
  }
25
27
  end
26
28
 
@@ -1,15 +1,16 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'json'
2
4
  require 'rest-client'
3
5
 
4
6
  require 'search_solr_tools'
5
7
 
6
-
7
8
  module SearchSolrTools
8
9
  module Harvesters
9
10
  # Harvests data from NSIDC OAI and inserts it into Solr after it has been translated
10
11
  class NsidcJson < Base
11
- def initialize(env = 'development', die_on_failure = false)
12
- super env, die_on_failure
12
+ def initialize(env = 'development', die_on_failure: false)
13
+ super
13
14
  @translator = Translators::NsidcJsonToSolr.new
14
15
  Helpers::FacetConfiguration.import_bin_configuration(env)
15
16
  end
@@ -19,7 +20,7 @@ module SearchSolrTools
19
20
  RestClient.options(nsidc_json_url) do |response, _request, _result|
20
21
  return response.code == 200
21
22
  end
22
- rescue => e
23
+ rescue StandardError
23
24
  puts "Error trying to get options for #{nsidc_json_url} (ping)"
24
25
  end
25
26
  false
@@ -37,7 +38,7 @@ module SearchSolrTools
37
38
 
38
39
  status = insert_solr_docs result[:add_docs], Base::JSON_CONTENT_TYPE
39
40
 
40
- status.record_status(Helpers::HarvestStatus::HARVEST_NO_DOCS) if result[:num_docs] == 0
41
+ status.record_status(Helpers::HarvestStatus::HARVEST_NO_DOCS) if (result[:num_docs]).zero?
41
42
 
42
43
  # Record the number of harvest failures; note that if this is 0, thats OK, the status will stay at 0
43
44
  status.record_status(Helpers::HarvestStatus::HARVEST_FAILURE, result[:failure_ids].length)
@@ -66,7 +67,7 @@ module SearchSolrTools
66
67
  # @param id [String] NSIDC authoritative ID for the dataset
67
68
  # @return [Hash] Parsed version of the JSON response
68
69
  def fetch_json_from_nsidc(id)
69
- json_response = RestClient.get(nsidc_json_url + id + '.json')
70
+ json_response = RestClient.get("#{nsidc_json_url}#{id}.json")
70
71
  JSON.parse(json_response)
71
72
  end
72
73
 
@@ -81,13 +82,13 @@ module SearchSolrTools
81
82
  id = r.text.split('/').last
82
83
  begin
83
84
  docs << { 'add' => { 'doc' => @translator.translate(fetch_json_from_nsidc(id)) } }
84
- rescue => e
85
+ rescue StandardError => e
85
86
  puts "Failed to fetch #{id} with error #{e}: #{e.backtrace}"
86
87
  failure_ids << id
87
88
  end
88
89
  end
89
90
 
90
- { num_docs: all_docs.size, add_docs: docs, failure_ids: failure_ids }
91
+ { num_docs: all_docs.size, add_docs: docs, failure_ids: }
91
92
  end
92
93
  end
93
94
  end
@@ -1,4 +1,6 @@
1
- require_relative './iso_namespaces'
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'iso_namespaces'
2
4
 
3
5
  module SearchSolrTools
4
6
  module Helpers
@@ -8,12 +10,10 @@ module SearchSolrTools
8
10
  NORTHERN_GLOBAL_BOUNDARY = 85.0
9
11
 
10
12
  def self.bounding_box_hash_from_geo_json(geometry)
11
- if geometry_is_point?(geometry)
12
- return { west: geometry.x.to_s, south: geometry.y.to_s, east: geometry.x.to_s, north: geometry.y.to_s }
13
- else
14
- bbox = RGeo::Cartesian::BoundingBox.create_from_geometry(geometry)
15
- return { west: bbox.min_x.to_s, south: bbox.min_y.to_s, east: bbox.max_x.to_s, north: bbox.max_y.to_s }
16
- end
13
+ return { west: geometry.x.to_s, south: geometry.y.to_s, east: geometry.x.to_s, north: geometry.y.to_s } if geometry_is_point?(geometry)
14
+
15
+ bbox = RGeo::Cartesian::BoundingBox.create_from_geometry(geometry)
16
+ { west: bbox.min_x.to_s, south: bbox.min_y.to_s, east: bbox.max_x.to_s, north: bbox.max_y.to_s }
17
17
  end
18
18
 
19
19
  def self.geometry_is_point?(geometry)
@@ -30,7 +30,7 @@ module SearchSolrTools
30
30
  end
31
31
 
32
32
  def self.box_invalid?(box)
33
- [:north, :south, :east, :west].any? { |d| box[d].to_s.empty? }
33
+ %i[north south east west].any? { |d| box[d].to_s.empty? }
34
34
  end
35
35
  end
36
36
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'json'
2
4
  require 'rest_client'
3
5
  require 'singleton'
@@ -8,7 +10,7 @@ module SearchSolrTools
8
10
  class FacetConfiguration
9
11
  include Singleton
10
12
  def self.import_bin_configuration(env)
11
- @bin_configuration = JSON.parse(RestClient.get(SolrEnvironments[env][:nsidc_dataset_metadata_url] + 'binConfiguration')) if @bin_configuration.nil?
13
+ @bin_configuration = JSON.parse(RestClient.get("#{SolrEnvironments[env][:nsidc_dataset_metadata_url]}binConfiguration")) if @bin_configuration.nil?
12
14
  end
13
15
 
14
16
  def self.get_facet_bin(facet_name)
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module SearchSolrTools
2
4
  module Helpers
3
5
  class HarvestStatus
@@ -7,17 +9,17 @@ module SearchSolrTools
7
9
  INGEST_ERR_INVALID_DOC = :invalid
8
10
  INGEST_ERR_SOLR_ERROR = :solr_error
9
11
  OTHER_ERROR = :other
10
- PING_SOLR = :ping_solr # used for initialize only
11
- PING_SOURCE = :ping_source # used for initialize only
12
+ PING_SOLR = :ping_solr # used for initialize only
13
+ PING_SOURCE = :ping_source # used for initialize only
12
14
 
13
- ERROR_STATUS = [HARVEST_NO_DOCS, HARVEST_FAILURE, INGEST_ERR_INVALID_DOC, INGEST_ERR_SOLR_ERROR, OTHER_ERROR]
15
+ ERROR_STATUS = [HARVEST_NO_DOCS, HARVEST_FAILURE, INGEST_ERR_INVALID_DOC, INGEST_ERR_SOLR_ERROR, OTHER_ERROR].freeze
14
16
 
15
- attr_reader :status, :ping_solr, :ping_source
16
- attr_writer :ping_solr, :ping_source
17
+ attr_accessor :ping_solr, :ping_source
18
+ attr_reader :status
17
19
 
18
20
  # init_info is an optional hash that contains the various status keys and the documents to
19
21
  # associate with them
20
- def initialize(init_info={})
22
+ def initialize(init_info = {})
21
23
  @status = { INGEST_OK => 0 }
22
24
  @ping_solr = true
23
25
  @ping_source = true
@@ -36,9 +38,9 @@ module SearchSolrTools
36
38
  end
37
39
 
38
40
  def ok?
39
- ERROR_STATUS.each { |s| return false unless @status[s] == 0 }
41
+ ERROR_STATUS.each { |s| return false unless (@status[s]).zero? }
40
42
  @ping_solr && @ping_source
41
43
  end
42
44
  end
43
45
  end
44
- end
46
+ end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module SearchSolrTools
2
4
  module Helpers
3
5
  # Helper class to provide default namespaces for XML document parsing.
@@ -25,7 +27,7 @@ module SearchSolrTools
25
27
  'srv' => 'http://www.isotc211.org/2005/srv',
26
28
  'xlink' => 'http://www.w3.org/1999/xlink',
27
29
  'xsi' => 'http://www.w3.org/2001/XMLSchema-instance'
28
- }
30
+ }.freeze
29
31
  end
30
32
  end
31
33
  end