search_solr_tools 6.1.0 → 6.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +11 -2
  3. data/bin/search_solr_tools +5 -17
  4. data/lib/search_solr_tools/config/environments.rb +3 -1
  5. data/lib/search_solr_tools/config/environments.yaml +0 -32
  6. data/lib/search_solr_tools/errors/harvest_error.rb +44 -31
  7. data/lib/search_solr_tools/harvesters/auto_suggest.rb +5 -3
  8. data/lib/search_solr_tools/harvesters/base.rb +21 -20
  9. data/lib/search_solr_tools/harvesters/nsidc_auto_suggest.rb +7 -5
  10. data/lib/search_solr_tools/harvesters/nsidc_json.rb +9 -8
  11. data/lib/search_solr_tools/helpers/bounding_box_util.rb +8 -8
  12. data/lib/search_solr_tools/helpers/facet_configuration.rb +3 -1
  13. data/lib/search_solr_tools/helpers/harvest_status.rb +10 -8
  14. data/lib/search_solr_tools/helpers/iso_namespaces.rb +3 -1
  15. data/lib/search_solr_tools/helpers/solr_format.rb +25 -45
  16. data/lib/search_solr_tools/helpers/translate_spatial_coverage.rb +13 -10
  17. data/lib/search_solr_tools/helpers/translate_temporal_coverage.rb +2 -0
  18. data/lib/search_solr_tools/translators/nsidc_json.rb +48 -44
  19. data/lib/search_solr_tools/version.rb +3 -1
  20. data/lib/search_solr_tools.rb +3 -2
  21. metadata +3 -45
  22. data/lib/search_solr_tools/harvesters/adc.rb +0 -49
  23. data/lib/search_solr_tools/harvesters/ade_auto_suggest.rb +0 -46
  24. data/lib/search_solr_tools/harvesters/bcodmo.rb +0 -64
  25. data/lib/search_solr_tools/harvesters/data_one.rb +0 -49
  26. data/lib/search_solr_tools/harvesters/echo.rb +0 -52
  27. data/lib/search_solr_tools/harvesters/eol.rb +0 -51
  28. data/lib/search_solr_tools/harvesters/gtnp.rb +0 -67
  29. data/lib/search_solr_tools/harvesters/ices.rb +0 -58
  30. data/lib/search_solr_tools/harvesters/ncdc_paleo.rb +0 -62
  31. data/lib/search_solr_tools/harvesters/nmi.rb +0 -34
  32. data/lib/search_solr_tools/harvesters/nodc.rb +0 -75
  33. data/lib/search_solr_tools/harvesters/oai.rb +0 -62
  34. data/lib/search_solr_tools/harvesters/pdc.rb +0 -40
  35. data/lib/search_solr_tools/harvesters/r2r.rb +0 -61
  36. data/lib/search_solr_tools/harvesters/rda.rb +0 -35
  37. data/lib/search_solr_tools/harvesters/tdar.rb +0 -71
  38. data/lib/search_solr_tools/harvesters/usgs.rb +0 -76
  39. data/lib/search_solr_tools/helpers/csw_iso_query_builder.rb +0 -29
  40. data/lib/search_solr_tools/helpers/data_one_format.rb +0 -74
  41. data/lib/search_solr_tools/helpers/iso_to_solr.rb +0 -97
  42. data/lib/search_solr_tools/helpers/iso_to_solr_format.rb +0 -197
  43. data/lib/search_solr_tools/helpers/ncdc_paleo_format.rb +0 -61
  44. data/lib/search_solr_tools/helpers/query_builder.rb +0 -13
  45. data/lib/search_solr_tools/helpers/r2r_format.rb +0 -25
  46. data/lib/search_solr_tools/helpers/selectors.rb +0 -22
  47. data/lib/search_solr_tools/helpers/tdar_format.rb +0 -70
  48. data/lib/search_solr_tools/helpers/usgs_format.rb +0 -50
  49. data/lib/search_solr_tools/selectors/adc.rb +0 -96
  50. data/lib/search_solr_tools/selectors/data_one.rb +0 -96
  51. data/lib/search_solr_tools/selectors/echo_iso.rb +0 -112
  52. data/lib/search_solr_tools/selectors/ices_iso.rb +0 -108
  53. data/lib/search_solr_tools/selectors/ncdc_paleo.rb +0 -90
  54. data/lib/search_solr_tools/selectors/nmi.rb +0 -107
  55. data/lib/search_solr_tools/selectors/nodc_iso.rb +0 -108
  56. data/lib/search_solr_tools/selectors/pdc_iso.rb +0 -109
  57. data/lib/search_solr_tools/selectors/r2r.rb +0 -115
  58. data/lib/search_solr_tools/selectors/rda.rb +0 -107
  59. data/lib/search_solr_tools/selectors/tdar_opensearch.rb +0 -91
  60. data/lib/search_solr_tools/selectors/usgs_iso.rb +0 -107
  61. data/lib/search_solr_tools/translators/bcodmo_json.rb +0 -89
  62. data/lib/search_solr_tools/translators/eol_to_solr.rb +0 -84
  63. data/lib/search_solr_tools/translators/gtnp_json.rb +0 -59
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cded5837c68e69c4dd22f9aea6b3efab1d4cc21570daba1b91b9561d7cde1216
4
- data.tar.gz: 73a840e47c0cbc41a3fb13e62ca85735e20d87dc9f4b0acfd3d522ef1874ad34
3
+ metadata.gz: f9ced4643b8adbda2b5ef09192f036af86878e07243fe959448213762e0e5cc1
4
+ data.tar.gz: 0a5f27a7bc1d8c9c0c07a20b6fbf122d5a3b6163a5654db635d5c478ac4a21bc
5
5
  SHA512:
6
- metadata.gz: 5056c01da21c54b3b4c86ddef264d2e0f9dbe83db472677ac1076a31a5e42e97a6ad0cd0a037b0431374a2d243bfc398dfcfa631863d3d4f3b3b334751b6e2c6
7
- data.tar.gz: 39418c870ebada693b8ef2b75460d04e8ed0d1f9764e534d2195fa59fb5daaec4ae002ae4f016d05449a1a8a593b7e931261965b5756c664c6edfc7c94e6e3b1
6
+ metadata.gz: cc66f8b40c62e2640fd72ce05aa2ac01aa76c58c730b6c445976fc7cf6e43b88cff29ec088f73e5dff913c879f7a1a31016cb634d851cfb3adb7b8bb735614c8
7
+ data.tar.gz: f896f7b473f977f0e349d422f6568774342e6bdb66e1a7dad1cf4477d0ffa7e9b05b81184898ab2d4d69c44f8ca98a3aa6791234926190484820cca4b439dc7d
data/CHANGELOG.md CHANGED
@@ -1,8 +1,17 @@
1
- ## v6.1.0 (2022-07-14)
1
+ ## v6.3.0 (2023-07-24)
2
+
3
+ - Update Rubocop configuration to actually run against files, and make
4
+ necessary corrections to comply with Rubocop styling.
5
+
6
+ ## v6.2.0 (2023-07-18)
7
+
8
+ - Remove deprecated harvesters and associated tests, helpers, etc.
9
+
10
+ ## v6.1.0 (2023-07-14)
2
11
 
3
12
  - Updated a few other dependencies that weren't at the newest versions.
4
13
 
5
- ## v6.0.0 (2022-07-14)
14
+ ## v6.0.0 (2023-07-14)
6
15
 
7
16
  - Updated Ruby to 3.2.2, updated gem dependencies to more recent versions.
8
17
 
@@ -47,7 +47,7 @@ class SolrHarvestCLI < Thor
47
47
  end
48
48
 
49
49
  ping_status = SearchSolrTools::Helpers::HarvestStatus.new(
50
- SearchSolrTools::Helpers::HarvestStatus::PING_SOLR => solr_success,
50
+ SearchSolrTools::Helpers::HarvestStatus::PING_SOLR => solr_success,
51
51
  SearchSolrTools::Helpers::HarvestStatus::PING_SOURCE => source_success
52
52
  )
53
53
  raise SearchSolrTools::Errors::HarvestError, ping_status unless ping_status.ok?
@@ -64,9 +64,9 @@ class SolrHarvestCLI < Thor
64
64
  puts "Target: #{target}"
65
65
  begin
66
66
  harvest_class = get_harvester_class(target)
67
- harvester = harvest_class.new(options[:environment], die_on_failure)
67
+ harvester = harvest_class.new(options[:environment], die_on_failure:)
68
68
  ping_status = SearchSolrTools::Helpers::HarvestStatus.new(
69
- SearchSolrTools::Helpers::HarvestStatus::PING_SOLR => harvester.ping_solr,
69
+ SearchSolrTools::Helpers::HarvestStatus::PING_SOLR => harvester.ping_solr,
70
70
  SearchSolrTools::Helpers::HarvestStatus::PING_SOURCE => harvester.ping_source
71
71
  )
72
72
  raise SearchSolrTools::Errors::HarvestError, ping_status unless ping_status.ok?
@@ -120,20 +120,8 @@ class SolrHarvestCLI < Thor
120
120
  no_tasks do
121
121
  def harvester_map
122
122
  {
123
- 'bco_dmo' => SearchSolrTools::Harvesters::BcoDmo,
124
- 'adc' => SearchSolrTools::Harvesters::Adc,
125
- 'echo' => SearchSolrTools::Harvesters::Echo,
126
- 'ices' => SearchSolrTools::Harvesters::Ices,
127
- 'nmi' => SearchSolrTools::Harvesters::Nmi,
128
- 'nodc' => SearchSolrTools::Harvesters::Nodc,
129
- 'r2r' => SearchSolrTools::Harvesters::R2R,
130
- 'rda' => SearchSolrTools::Harvesters::Rda,
131
- 'usgs' => SearchSolrTools::Harvesters::Usgs,
132
- 'tdar' => SearchSolrTools::Harvesters::Tdar,
133
- 'pdc' => SearchSolrTools::Harvesters::Pdc,
134
- 'nsidc' => SearchSolrTools::Harvesters::NsidcJson,
135
- 'nsidc_auto_suggest' => SearchSolrTools::Harvesters::NsidcAutoSuggest,
136
- 'ade_auto_suggest' => SearchSolrTools::Harvesters::AdeAutoSuggest
123
+ 'nsidc' => SearchSolrTools::Harvesters::NsidcJson,
124
+ 'nsidc_auto_suggest' => SearchSolrTools::Harvesters::NsidcAutoSuggest
137
125
  }
138
126
  end
139
127
 
@@ -1,9 +1,11 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'yaml'
2
4
 
3
5
  module SearchSolrTools
4
6
  # configuration to work with solr locally, or on integration/qa/staging/prod
5
7
  module SolrEnvironments
6
- YAML_ENVS = YAML.load_file(File.expand_path('../environments.yaml', __FILE__))
8
+ YAML_ENVS = YAML.load_file(File.expand_path('environments.yaml', __dir__))
7
9
 
8
10
  def self.[](env = :development)
9
11
  YAML_ENVS[:common].merge(YAML_ENVS[env.to_sym])
@@ -3,38 +3,6 @@
3
3
  :collection_name: nsidc_oai
4
4
  :collection_path: solr
5
5
  :port: 8983
6
- :bcodmo_url: http://www.bco-dmo.org/nsidc/arctic-deployments.json
7
- :adc_url: https://arcticdata.io/metacat/d1/mn/v2/query/solr/select?q=northBoundCoord:%5B45.0%20TO%2090.0%5D
8
- :data_one_url: https://cn.dataone.org/cn/v1/query/solr/select?q=northBoundCoord:%5B45.0%20TO%2090.0%5D
9
- :echo_url: https://api.echo.nasa.gov/catalog-rest/echo_catalog/datasets.echo10?bounding_box=-180,45,180,90
10
- :gtnp:
11
- - http://www.gtnpdatabase.org/rest/boreholes/json
12
- - http://www.gtnpdatabase.org/rest/activelayers/json
13
- :ices_url: http://geo.ices.dk/geonetwork/srv/en/csw
14
- :ncdc_paleo_url: https://gis.ncdc.noaa.gov/gptpaleo/csw
15
- :nmi_url: http://arcticdata.met.no/metamod/oai
16
- :nodc_url: https://data.nodc.noaa.gov/geoportal/csw
17
- :pdc_url: http://www.polardata.ca/oai/provider
18
- :rda_url: https://rda.ucar.edu/cgi-bin/oai
19
- :tdar_url: http://core.tdar.org/search/rss
20
- :usgs_url: https://www.sciencebase.gov/catalog/item/527cf4ede4b0850ea05182ee/csw
21
- :eol:
22
- - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.SHEBA.thredds.xml
23
- - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.SBI.thredds.xml
24
- - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.PacMARS.thredds.xml
25
- - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BASE.thredds.xml
26
- - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.ATLAS.thredds.xml
27
- - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.ARC_MIP.thredds.xml
28
- - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.AMTS.thredds.xml
29
- - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BOREAS.thredds.xml
30
- - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BeringSea.thredds.xml
31
- - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.ARCSS.thredds.xml
32
- - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BEST.thredds.xml
33
- - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BSIERP.thredds.xml
34
- - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BARROW.thredds.xml
35
- - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.DBO.thredds.xml
36
- - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.ITEX.thredds.xml
37
- :r2r_url: http://get.rvdata.us/services/cruise/
38
6
 
39
7
  # Not using DCS API v2 here because not all retired datasets have their "retired"
40
8
  # flag checked. For example, GLA01.033 is retired; GLA01.018 is not, but it
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module SearchSolrTools
2
4
  module Errors
3
5
  class HarvestError < StandardError
@@ -10,34 +12,47 @@ module SearchSolrTools
10
12
  ERRCODE_OTHER = 128
11
13
 
12
14
  ERRCODE_DESC = {
13
- ERRCODE_SOLR_PING => 'Solr instance did not return a successful ping',
14
- ERRCODE_SOURCE_PING => 'Source to be harvested did not return a successful ping',
15
- ERRCODE_SOURCE_NO_RESULTS => 'Source to be harvested returned no documents matching query',
16
- ERRCODE_SOURCE_HARVEST_ERROR => 'One or more source documents returned an error when trying to retrieve or translate',
17
- ERRCODE_DOCUMENT_INVALID => 'One or more documents to be harvested was invalid (malformed)',
18
- ERRCODE_INGEST_ERROR => 'Solr returned an error trying to ingest one or more harvested documents',
19
- ERRCODE_OTHER => 'General error code for non-harvest related issues'
15
+ ERRCODE_SOLR_PING => 'Solr instance did not return a successful ping',
16
+ ERRCODE_SOURCE_PING => 'Source to be harvested did not return a successful ping',
17
+ ERRCODE_SOURCE_NO_RESULTS => 'Source to be harvested returned no documents matching query',
18
+ ERRCODE_SOURCE_HARVEST_ERROR => 'One or more source documents returned an error when trying to retrieve or translate',
19
+ ERRCODE_DOCUMENT_INVALID => 'One or more documents to be harvested was invalid (malformed)',
20
+ ERRCODE_INGEST_ERROR => 'Solr returned an error trying to ingest one or more harvested documents',
21
+ ERRCODE_OTHER => 'General error code for non-harvest related issues'
20
22
  }.freeze
21
23
 
22
24
  PING_ERRCODE_MAP = {
23
- 'ping_solr' => ERRCODE_SOLR_PING,
24
- 'ping_source' => ERRCODE_SOURCE_PING,
25
- }
25
+ 'ping_solr' => ERRCODE_SOLR_PING,
26
+ 'ping_source' => ERRCODE_SOURCE_PING
27
+ }.freeze
26
28
 
27
29
  STATUS_ERRCODE_MAP = {
28
- Helpers::HarvestStatus::HARVEST_NO_DOCS => ERRCODE_SOURCE_NO_RESULTS,
29
- Helpers::HarvestStatus::HARVEST_FAILURE => ERRCODE_SOURCE_HARVEST_ERROR,
30
- Helpers::HarvestStatus::INGEST_ERR_INVALID_DOC => ERRCODE_DOCUMENT_INVALID,
31
- Helpers::HarvestStatus::INGEST_ERR_SOLR_ERROR => ERRCODE_INGEST_ERROR,
32
- Helpers::HarvestStatus::OTHER_ERROR => ERRCODE_OTHER
30
+ Helpers::HarvestStatus::HARVEST_NO_DOCS => ERRCODE_SOURCE_NO_RESULTS,
31
+ Helpers::HarvestStatus::HARVEST_FAILURE => ERRCODE_SOURCE_HARVEST_ERROR,
32
+ Helpers::HarvestStatus::INGEST_ERR_INVALID_DOC => ERRCODE_DOCUMENT_INVALID,
33
+ Helpers::HarvestStatus::INGEST_ERR_SOLR_ERROR => ERRCODE_INGEST_ERROR,
34
+ Helpers::HarvestStatus::OTHER_ERROR => ERRCODE_OTHER
33
35
  }.freeze
34
36
 
35
37
  # If code is -1, it means display all error codes
36
38
  def self.describe_exit_code(code = -1)
39
+ code_list = code_to_list(code)
40
+
41
+ codes = {}
42
+ code_list.each do |k|
43
+ next if code == -1 && !ERRCODE_DESC.keys.include?(k) # skip INVALID CODE if showing all codes
44
+
45
+ codes[k] = ERRCODE_DESC.keys.include?(k) ? ERRCODE_DESC[k] : 'INVALID CODE NUMBER'
46
+ end
47
+
48
+ codes
49
+ end
50
+
51
+ # Loop through all bit-flag values to produce a list of integers
52
+ def self.code_to_list(code)
37
53
  code = code.to_i
38
54
  code_list = []
39
55
 
40
- # Loop through all bit-flag values
41
56
  [128, 64, 32, 16, 8, 4, 2, 1].each do |k|
42
57
  if code >= k || code == -1
43
58
  code_list.prepend k
@@ -45,20 +60,17 @@ module SearchSolrTools
45
60
  end
46
61
  end
47
62
 
48
- codes = {}
49
- code_list.each do |k|
50
- next if code == -1 && !ERRCODE_DESC.keys.include?(k) # skip INVALID CODE if showing all codes
51
- codes[k] = ERRCODE_DESC.keys.include?(k) ? ERRCODE_DESC[k] : 'INVALID CODE NUMBER'
52
- end
53
-
54
- codes
63
+ code_list
55
64
  end
56
65
 
57
- def initialize(status, message=nil)
66
+ def initialize(status, message = nil)
58
67
  @status_data = status
59
68
  @other_message = message
69
+
70
+ super message
60
71
  end
61
72
 
73
+ # rubocop:disable Metrics/AbcSize
62
74
  def exit_code
63
75
  if @status_data.nil?
64
76
  puts "OTHER ERROR REPORTED: #{@other_message}"
@@ -70,19 +82,20 @@ module SearchSolrTools
70
82
  code = 0
71
83
  code += ERRCODE_SOLR_PING unless @status_data.ping_solr
72
84
  code += ERRCODE_SOURCE_PING unless @status_data.ping_source
73
- code += ERRCODE_SOURCE_NO_RESULTS if @status_data.status[Helpers::HarvestStatus::HARVEST_NO_DOCS] > 0
74
- code += ERRCODE_SOURCE_HARVEST_ERROR if @status_data.status[Helpers::HarvestStatus::HARVEST_FAILURE] > 0
75
- code += ERRCODE_DOCUMENT_INVALID if @status_data.status[Helpers::HarvestStatus::INGEST_ERR_INVALID_DOC] > 0
76
- code += ERRCODE_INGEST_ERROR if @status_data.status[Helpers::HarvestStatus::INGEST_ERR_SOLR_ERROR] > 0
85
+ code += ERRCODE_SOURCE_NO_RESULTS if @status_data.status[Helpers::HarvestStatus::HARVEST_NO_DOCS].positive?
86
+ code += ERRCODE_SOURCE_HARVEST_ERROR if @status_data.status[Helpers::HarvestStatus::HARVEST_FAILURE].positive?
87
+ code += ERRCODE_DOCUMENT_INVALID if @status_data.status[Helpers::HarvestStatus::INGEST_ERR_INVALID_DOC].positive?
88
+ code += ERRCODE_INGEST_ERROR if @status_data.status[Helpers::HarvestStatus::INGEST_ERR_SOLR_ERROR].positive?
77
89
 
78
- code = ERRCODE_OTHER if code == 0
90
+ code = ERRCODE_OTHER if code.zero?
79
91
 
80
92
  code
81
93
  end
94
+ # rubocop:enable Metrics/AbcSize
82
95
 
83
96
  def message
84
- self.class.describe_exit_code(exit_code).map{|c,v| v}.join("\n")
97
+ self.class.describe_exit_code(exit_code).map { |_c, v| v }.join("\n")
85
98
  end
86
99
  end
87
100
  end
88
- end
101
+ end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'base'
2
4
  require 'json'
3
5
  require 'rest-client'
@@ -6,8 +8,8 @@ module SearchSolrTools
6
8
  module Harvesters
7
9
  # Use the nsidc_oai core to populate the auto_suggest core
8
10
  class AutoSuggest < Base
9
- def initialize(env = 'development', die_on_failure = false)
10
- super env, die_on_failure
11
+ def initialize(env = 'development', die_on_failure: false)
12
+ super
11
13
  @env_settings = SolrEnvironments[@environment] # super sets @environment.
12
14
  end
13
15
 
@@ -50,7 +52,7 @@ module SearchSolrTools
50
52
 
51
53
  if status == Helpers::HarvestStatus::INGEST_OK
52
54
  puts "Added #{add_docs.size} auto suggest documents in one commit"
53
- return Helpers::HarvestStatus.new(Helpers::HarvestStatus::INGEST_OK => add_docs)
55
+ Helpers::HarvestStatus.new(Helpers::HarvestStatus::INGEST_OK => add_docs)
54
56
  else
55
57
  puts "Failed adding #{add_docs.size} documents in single commit, retrying one by one"
56
58
  new_add_docs = []
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'multi_json'
2
4
  require 'nokogiri'
3
5
  require 'open-uri'
@@ -8,8 +10,6 @@ require 'time'
8
10
  require 'search_solr_tools'
9
11
  require_relative '../helpers/iso_namespaces'
10
12
  require_relative '../helpers/solr_format'
11
- require_relative '../helpers/iso_to_solr'
12
-
13
13
 
14
14
  module SearchSolrTools
15
15
  module Harvesters
@@ -21,7 +21,7 @@ module SearchSolrTools
21
21
  XML_CONTENT_TYPE = 'text/xml; charset=utf-8'
22
22
  JSON_CONTENT_TYPE = 'application/json; charset=utf-8'
23
23
 
24
- def initialize(env = 'development', die_on_failure = false)
24
+ def initialize(env = 'development', die_on_failure: false)
25
25
  @environment = env
26
26
  @die_on_failure = die_on_failure
27
27
  end
@@ -52,7 +52,7 @@ module SearchSolrTools
52
52
  success = response.code == 200
53
53
  puts "Error in ping request: #{response.body}" unless success
54
54
  end
55
- rescue => e
55
+ rescue StandardError => e
56
56
  puts "Rest exception while pinging Solr: #{e}"
57
57
  end
58
58
  success
@@ -62,7 +62,7 @@ module SearchSolrTools
62
62
  # to "ping" the data center. Returns true if the ping is successful (or, as
63
63
  # in this default, no ping method was defined)
64
64
  def ping_source
65
- puts "Harvester does not have ping method defined, assuming true"
65
+ puts 'Harvester does not have ping method defined, assuming true'
66
66
  true
67
67
  end
68
68
 
@@ -75,12 +75,12 @@ module SearchSolrTools
75
75
  harvest_status
76
76
  end
77
77
 
78
- def delete_old_documents(timestamp, constraints, solr_core, force = false)
78
+ def delete_old_documents(timestamp, constraints, solr_core, force: false)
79
79
  constraints = sanitize_data_centers_constraints(constraints)
80
80
  delete_query = "last_update:[* TO #{timestamp}] AND #{constraints}"
81
81
  solr = RSolr.connect url: solr_url + "/#{solr_core}"
82
82
  unchanged_count = (solr.get 'select', params: { wt: :ruby, q: delete_query, rows: 0 })['response']['numFound'].to_i
83
- if unchanged_count == 0
83
+ if unchanged_count.zero?
84
84
  puts "All documents were updated after #{timestamp}, nothing to delete"
85
85
  else
86
86
  puts "Begin removing documents older than #{timestamp}"
@@ -91,8 +91,8 @@ module SearchSolrTools
91
91
  def sanitize_data_centers_constraints(query_string)
92
92
  # Remove lucene special characters, preserve the query parameter and compress whitespace
93
93
  query_string.gsub!(/[:&|!~\-\(\)\{\}\[\]\^\*\?\+]+/, ' ')
94
- query_string.gsub!(/data_centers /, 'data_centers:')
95
- query_string.gsub!(/source /, 'source:')
94
+ query_string.gsub!('data_centers ', 'data_centers:')
95
+ query_string.gsub!('source ', 'source:')
96
96
  query_string.squeeze(' ').strip
97
97
  end
98
98
 
@@ -127,7 +127,7 @@ module SearchSolrTools
127
127
  status
128
128
  end
129
129
 
130
- # TODO Need to return a specific type of failure:
130
+ # TODO: Need to return a specific type of failure:
131
131
  # - Bad record content identified and no ingest attempted
132
132
  # - Solr tries to ingest document and fails (bad content not detected prior to ingest)
133
133
  # - Solr cannot insert document for reasons other than the document structure and content.
@@ -143,15 +143,15 @@ module SearchSolrTools
143
143
 
144
144
  # Some docs will cause solr to time out during the POST
145
145
  begin
146
- RestClient.post(url, doc_serialized, content_type: content_type) do |response, _request, _result|
146
+ RestClient.post(url, doc_serialized, content_type:) do |response, _request, _result|
147
147
  success = response.code == 200
148
148
  unless success
149
149
  puts "Error for #{doc_serialized}\n\n response: #{response.body}"
150
150
  status = Helpers::HarvestStatus::INGEST_ERR_SOLR_ERROR
151
151
  end
152
152
  end
153
- rescue => e
154
- # TODO Need to provide more detail re: this failure so we know whether to
153
+ rescue StandardError => e
154
+ # TODO: Need to provide more detail re: this failure so we know whether to
155
155
  # exit the job with a status != 0
156
156
  puts "Rest exception while POSTing to Solr: #{e}, for doc: #{doc_serialized}"
157
157
  status = Helpers::HarvestStatus::INGEST_ERR_SOLR_ERROR
@@ -161,11 +161,11 @@ module SearchSolrTools
161
161
 
162
162
  def get_serialized_doc(doc, content_type)
163
163
  if content_type.eql?(XML_CONTENT_TYPE)
164
- return doc.respond_to?(:to_xml) ? doc.to_xml : doc
164
+ doc.respond_to?(:to_xml) ? doc.to_xml : doc
165
165
  elsif content_type.eql?(JSON_CONTENT_TYPE)
166
- return MultiJson.dump(doc)
166
+ MultiJson.dump(doc)
167
167
  else
168
- return doc
168
+ doc
169
169
  end
170
170
  end
171
171
 
@@ -178,17 +178,18 @@ module SearchSolrTools
178
178
 
179
179
  begin
180
180
  puts "Request: #{request_url}"
181
- response = URI.open(request_url, read_timeout: timeout, 'Content-Type' => content_type)
181
+ response = URI.parse(request_url).open(read_timeout: timeout, 'Content-Type' => content_type)
182
182
  rescue OpenURI::HTTPError, Timeout::Error, Errno::ETIMEDOUT => e
183
183
  retries_left -= 1
184
184
  puts "## REQUEST FAILED ## #{e.class} ## Retrying #{retries_left} more times..."
185
185
 
186
- retry if retries_left > 0
186
+ retry if retries_left.positive?
187
187
 
188
- # TODO - Do we really need this "die_on_failure" anymore? The empty return
188
+ # TODO: Do we really need this "die_on_failure" anymore? The empty return
189
189
  # will cause the "No Documents" error to be thrown in the harvester class
190
190
  # now, so it will pretty much always "die on failure"
191
191
  raise e if @die_on_failure
192
+
192
193
  return
193
194
  end
194
195
  doc = Nokogiri.XML(response)
@@ -216,7 +217,7 @@ module SearchSolrTools
216
217
  spatial_coverages = doc.xpath(".//field[@name='spatial_coverages']").first
217
218
  return true if spatial_coverages.nil?
218
219
 
219
- spatial_coverages = spatial_coverages.text.split(' ')
220
+ spatial_coverages = spatial_coverages.text.split
220
221
 
221
222
  # We've only seen the failure with 4 spatial coverage values
222
223
  return true if spatial_coverages.size < 4
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'auto_suggest'
2
4
 
3
5
  module SearchSolrTools
@@ -16,11 +18,11 @@ module SearchSolrTools
16
18
  def fields
17
19
  {
18
20
  'authoritative_id' => { weight: 1, source: 'NSIDC', creator: method(:standard_add_creator) },
19
- 'full_title' => { weight: 2, source: 'NSIDC', creator: method(:standard_add_creator) },
20
- 'copy_parameters' => { weight: 5, source: 'NSIDC', creator: method(:standard_add_creator) },
21
- 'full_platforms' => { weight: 2, source: 'NSIDC', creator: method(:short_full_split_add_creator) },
22
- 'full_sensors' => { weight: 2, source: 'NSIDC', creator: method(:short_full_split_add_creator) },
23
- 'full_authors' => { weight: 1, source: 'NSIDC', creator: method(:standard_add_creator) }
21
+ 'full_title' => { weight: 2, source: 'NSIDC', creator: method(:standard_add_creator) },
22
+ 'copy_parameters' => { weight: 5, source: 'NSIDC', creator: method(:standard_add_creator) },
23
+ 'full_platforms' => { weight: 2, source: 'NSIDC', creator: method(:short_full_split_add_creator) },
24
+ 'full_sensors' => { weight: 2, source: 'NSIDC', creator: method(:short_full_split_add_creator) },
25
+ 'full_authors' => { weight: 1, source: 'NSIDC', creator: method(:standard_add_creator) }
24
26
  }
25
27
  end
26
28
 
@@ -1,15 +1,16 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'json'
2
4
  require 'rest-client'
3
5
 
4
6
  require 'search_solr_tools'
5
7
 
6
-
7
8
  module SearchSolrTools
8
9
  module Harvesters
9
10
  # Harvests data from NSIDC OAI and inserts it into Solr after it has been translated
10
11
  class NsidcJson < Base
11
- def initialize(env = 'development', die_on_failure = false)
12
- super env, die_on_failure
12
+ def initialize(env = 'development', die_on_failure: false)
13
+ super
13
14
  @translator = Translators::NsidcJsonToSolr.new
14
15
  Helpers::FacetConfiguration.import_bin_configuration(env)
15
16
  end
@@ -19,7 +20,7 @@ module SearchSolrTools
19
20
  RestClient.options(nsidc_json_url) do |response, _request, _result|
20
21
  return response.code == 200
21
22
  end
22
- rescue => e
23
+ rescue StandardError
23
24
  puts "Error trying to get options for #{nsidc_json_url} (ping)"
24
25
  end
25
26
  false
@@ -37,7 +38,7 @@ module SearchSolrTools
37
38
 
38
39
  status = insert_solr_docs result[:add_docs], Base::JSON_CONTENT_TYPE
39
40
 
40
- status.record_status(Helpers::HarvestStatus::HARVEST_NO_DOCS) if result[:num_docs] == 0
41
+ status.record_status(Helpers::HarvestStatus::HARVEST_NO_DOCS) if (result[:num_docs]).zero?
41
42
 
42
43
  # Record the number of harvest failures; note that if this is 0, thats OK, the status will stay at 0
43
44
  status.record_status(Helpers::HarvestStatus::HARVEST_FAILURE, result[:failure_ids].length)
@@ -66,7 +67,7 @@ module SearchSolrTools
66
67
  # @param id [String] NSIDC authoritative ID for the dataset
67
68
  # @return [Hash] Parsed version of the JSON response
68
69
  def fetch_json_from_nsidc(id)
69
- json_response = RestClient.get(nsidc_json_url + id + '.json')
70
+ json_response = RestClient.get("#{nsidc_json_url}#{id}.json")
70
71
  JSON.parse(json_response)
71
72
  end
72
73
 
@@ -81,13 +82,13 @@ module SearchSolrTools
81
82
  id = r.text.split('/').last
82
83
  begin
83
84
  docs << { 'add' => { 'doc' => @translator.translate(fetch_json_from_nsidc(id)) } }
84
- rescue => e
85
+ rescue StandardError => e
85
86
  puts "Failed to fetch #{id} with error #{e}: #{e.backtrace}"
86
87
  failure_ids << id
87
88
  end
88
89
  end
89
90
 
90
- { num_docs: all_docs.size, add_docs: docs, failure_ids: failure_ids }
91
+ { num_docs: all_docs.size, add_docs: docs, failure_ids: }
91
92
  end
92
93
  end
93
94
  end
@@ -1,4 +1,6 @@
1
- require_relative './iso_namespaces'
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'iso_namespaces'
2
4
 
3
5
  module SearchSolrTools
4
6
  module Helpers
@@ -8,12 +10,10 @@ module SearchSolrTools
8
10
  NORTHERN_GLOBAL_BOUNDARY = 85.0
9
11
 
10
12
  def self.bounding_box_hash_from_geo_json(geometry)
11
- if geometry_is_point?(geometry)
12
- return { west: geometry.x.to_s, south: geometry.y.to_s, east: geometry.x.to_s, north: geometry.y.to_s }
13
- else
14
- bbox = RGeo::Cartesian::BoundingBox.create_from_geometry(geometry)
15
- return { west: bbox.min_x.to_s, south: bbox.min_y.to_s, east: bbox.max_x.to_s, north: bbox.max_y.to_s }
16
- end
13
+ return { west: geometry.x.to_s, south: geometry.y.to_s, east: geometry.x.to_s, north: geometry.y.to_s } if geometry_is_point?(geometry)
14
+
15
+ bbox = RGeo::Cartesian::BoundingBox.create_from_geometry(geometry)
16
+ { west: bbox.min_x.to_s, south: bbox.min_y.to_s, east: bbox.max_x.to_s, north: bbox.max_y.to_s }
17
17
  end
18
18
 
19
19
  def self.geometry_is_point?(geometry)
@@ -30,7 +30,7 @@ module SearchSolrTools
30
30
  end
31
31
 
32
32
  def self.box_invalid?(box)
33
- [:north, :south, :east, :west].any? { |d| box[d].to_s.empty? }
33
+ %i[north south east west].any? { |d| box[d].to_s.empty? }
34
34
  end
35
35
  end
36
36
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'json'
2
4
  require 'rest_client'
3
5
  require 'singleton'
@@ -8,7 +10,7 @@ module SearchSolrTools
8
10
  class FacetConfiguration
9
11
  include Singleton
10
12
  def self.import_bin_configuration(env)
11
- @bin_configuration = JSON.parse(RestClient.get(SolrEnvironments[env][:nsidc_dataset_metadata_url] + 'binConfiguration')) if @bin_configuration.nil?
13
+ @bin_configuration = JSON.parse(RestClient.get("#{SolrEnvironments[env][:nsidc_dataset_metadata_url]}binConfiguration")) if @bin_configuration.nil?
12
14
  end
13
15
 
14
16
  def self.get_facet_bin(facet_name)
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module SearchSolrTools
2
4
  module Helpers
3
5
  class HarvestStatus
@@ -7,17 +9,17 @@ module SearchSolrTools
7
9
  INGEST_ERR_INVALID_DOC = :invalid
8
10
  INGEST_ERR_SOLR_ERROR = :solr_error
9
11
  OTHER_ERROR = :other
10
- PING_SOLR = :ping_solr # used for initialize only
11
- PING_SOURCE = :ping_source # used for initialize only
12
+ PING_SOLR = :ping_solr # used for initialize only
13
+ PING_SOURCE = :ping_source # used for initialize only
12
14
 
13
- ERROR_STATUS = [HARVEST_NO_DOCS, HARVEST_FAILURE, INGEST_ERR_INVALID_DOC, INGEST_ERR_SOLR_ERROR, OTHER_ERROR]
15
+ ERROR_STATUS = [HARVEST_NO_DOCS, HARVEST_FAILURE, INGEST_ERR_INVALID_DOC, INGEST_ERR_SOLR_ERROR, OTHER_ERROR].freeze
14
16
 
15
- attr_reader :status, :ping_solr, :ping_source
16
- attr_writer :ping_solr, :ping_source
17
+ attr_accessor :ping_solr, :ping_source
18
+ attr_reader :status
17
19
 
18
20
  # init_info is an optional hash that contains the various status keys and the documents to
19
21
  # associate with them
20
- def initialize(init_info={})
22
+ def initialize(init_info = {})
21
23
  @status = { INGEST_OK => 0 }
22
24
  @ping_solr = true
23
25
  @ping_source = true
@@ -36,9 +38,9 @@ module SearchSolrTools
36
38
  end
37
39
 
38
40
  def ok?
39
- ERROR_STATUS.each { |s| return false unless @status[s] == 0 }
41
+ ERROR_STATUS.each { |s| return false unless (@status[s]).zero? }
40
42
  @ping_solr && @ping_source
41
43
  end
42
44
  end
43
45
  end
44
- end
46
+ end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module SearchSolrTools
2
4
  module Helpers
3
5
  # Helper class to provide default namespaces for XML document parsing.
@@ -25,7 +27,7 @@ module SearchSolrTools
25
27
  'srv' => 'http://www.isotc211.org/2005/srv',
26
28
  'xlink' => 'http://www.w3.org/1999/xlink',
27
29
  'xsi' => 'http://www.w3.org/2001/XMLSchema-instance'
28
- }
30
+ }.freeze
29
31
  end
30
32
  end
31
33
  end