search_solr_tools 4.2.0 → 5.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +27 -1
  3. data/README.md +6 -6
  4. data/bin/search_solr_tools +58 -2
  5. data/lib/search_solr_tools.rb +9 -7
  6. data/lib/search_solr_tools/errors/harvest_error.rb +88 -0
  7. data/lib/search_solr_tools/harvesters/adc.rb +2 -0
  8. data/lib/search_solr_tools/harvesters/ade_auto_suggest.rb +2 -0
  9. data/lib/search_solr_tools/harvesters/auto_suggest.rb +5 -1
  10. data/lib/search_solr_tools/harvesters/base.rb +65 -11
  11. data/lib/search_solr_tools/harvesters/bcodmo.rb +1 -0
  12. data/lib/search_solr_tools/harvesters/data_one.rb +2 -0
  13. data/lib/search_solr_tools/harvesters/echo.rb +2 -0
  14. data/lib/search_solr_tools/harvesters/gtnp.rb +1 -0
  15. data/lib/search_solr_tools/harvesters/ices.rb +3 -0
  16. data/lib/search_solr_tools/harvesters/ncdc_paleo.rb +3 -0
  17. data/lib/search_solr_tools/harvesters/nmi.rb +2 -0
  18. data/lib/search_solr_tools/harvesters/nodc.rb +3 -0
  19. data/lib/search_solr_tools/harvesters/nsidc_auto_suggest.rb +2 -0
  20. data/lib/search_solr_tools/harvesters/nsidc_json.rb +32 -5
  21. data/lib/search_solr_tools/harvesters/oai.rb +3 -0
  22. data/lib/search_solr_tools/harvesters/pdc.rb +2 -0
  23. data/lib/search_solr_tools/harvesters/r2r.rb +2 -2
  24. data/lib/search_solr_tools/harvesters/rda.rb +2 -0
  25. data/lib/search_solr_tools/harvesters/tdar.rb +2 -0
  26. data/lib/search_solr_tools/harvesters/usgs.rb +3 -0
  27. data/lib/search_solr_tools/helpers/data_one_format.rb +3 -3
  28. data/lib/search_solr_tools/helpers/harvest_status.rb +44 -0
  29. data/lib/search_solr_tools/helpers/iso_to_solr.rb +1 -0
  30. data/lib/search_solr_tools/helpers/iso_to_solr_format.rb +2 -2
  31. data/lib/search_solr_tools/helpers/ncdc_paleo_format.rb +3 -3
  32. data/lib/search_solr_tools/helpers/r2r_format.rb +3 -3
  33. data/lib/search_solr_tools/helpers/selectors.rb +1 -2
  34. data/lib/search_solr_tools/helpers/solr_format.rb +1 -0
  35. data/lib/search_solr_tools/helpers/tdar_format.rb +3 -3
  36. data/lib/search_solr_tools/helpers/translate_spatial_coverage.rb +2 -2
  37. data/lib/search_solr_tools/helpers/translate_temporal_coverage.rb +1 -1
  38. data/lib/search_solr_tools/helpers/usgs_format.rb +2 -2
  39. data/lib/search_solr_tools/selectors/adc.rb +2 -1
  40. data/lib/search_solr_tools/selectors/data_one.rb +2 -1
  41. data/lib/search_solr_tools/selectors/echo_iso.rb +2 -1
  42. data/lib/search_solr_tools/selectors/ices_iso.rb +2 -1
  43. data/lib/search_solr_tools/selectors/ncdc_paleo.rb +2 -1
  44. data/lib/search_solr_tools/selectors/nmi.rb +2 -1
  45. data/lib/search_solr_tools/selectors/nodc_iso.rb +2 -1
  46. data/lib/search_solr_tools/selectors/pdc_iso.rb +2 -1
  47. data/lib/search_solr_tools/selectors/r2r.rb +3 -1
  48. data/lib/search_solr_tools/selectors/rda.rb +2 -1
  49. data/lib/search_solr_tools/selectors/tdar_opensearch.rb +3 -1
  50. data/lib/search_solr_tools/selectors/usgs_iso.rb +3 -1
  51. data/lib/search_solr_tools/translators/bcodmo_json.rb +3 -0
  52. data/lib/search_solr_tools/translators/eol_to_solr.rb +6 -0
  53. data/lib/search_solr_tools/translators/gtnp_json.rb +3 -0
  54. data/lib/search_solr_tools/translators/nsidc_json.rb +3 -0
  55. data/lib/search_solr_tools/version.rb +1 -1
  56. data/search_solr_tools.gemspec +22 -23
  57. metadata +47 -55
@@ -1,3 +1,4 @@
1
+ require_relative 'base'
1
2
  require 'json'
2
3
  require 'rest-client'
3
4
 
@@ -1,3 +1,5 @@
1
+ require_relative 'base'
2
+
1
3
  module SearchSolrTools
2
4
  module Harvesters
3
5
  class DataOne < Base
@@ -1,3 +1,5 @@
1
+ require_relative 'base'
2
+
1
3
  module SearchSolrTools
2
4
  module Harvesters
3
5
  # Harvests data from ECHO and inserts it into Solr after it has been translated
@@ -1,3 +1,4 @@
1
+ require_relative 'base'
1
2
  require 'json'
2
3
  require 'rest-client'
3
4
 
@@ -1,3 +1,6 @@
1
+ require_relative 'base'
2
+ require_relative '../helpers/csw_iso_query_builder'
3
+
1
4
  module SearchSolrTools
2
5
  module Harvesters
3
6
  # Harvests data from ICES and inserts it into Solr after it has been translated
@@ -1,3 +1,6 @@
1
+ require_relative 'base'
2
+ require_relative '../helpers/csw_iso_query_builder'
3
+
1
4
  module SearchSolrTools
2
5
  module Harvesters
3
6
  # Harvests data from NODC PALEO and inserts it into Solr after it has been translated
@@ -1,3 +1,5 @@
1
+ require_relative 'oai'
2
+
1
3
  module SearchSolrTools
2
4
  module Harvesters
3
5
  class Nmi < Oai
@@ -1,3 +1,6 @@
1
+ require_relative 'base'
2
+ require_relative '../helpers/csw_iso_query_builder'
3
+
1
4
  module SearchSolrTools
2
5
  module Harvesters
3
6
  # Harvests data from NODC and inserts it into Solr after it has been translated
@@ -1,3 +1,5 @@
1
+ require_relative 'auto_suggest'
2
+
1
3
  module SearchSolrTools
2
4
  module Harvesters
3
5
  class NsidcAutoSuggest < AutoSuggest
@@ -3,6 +3,7 @@ require 'rest-client'
3
3
 
4
4
  require 'search_solr_tools'
5
5
 
6
+
6
7
  module SearchSolrTools
7
8
  module Harvesters
8
9
  # Harvests data from NSIDC OAI and inserts it into Solr after it has been translated
@@ -13,6 +14,17 @@ module SearchSolrTools
13
14
  Helpers::FacetConfiguration.import_bin_configuration(env)
14
15
  end
15
16
 
17
+ def ping_source
18
+ begin
19
+ RestClient.options(nsidc_json_url) do |response, _request, _result|
20
+ return response.code == 200
21
+ end
22
+ rescue => e
23
+ puts "Error trying to get options for #{nsidc_json_url} (ping)"
24
+ end
25
+ false
26
+ end
27
+
16
28
  def harvest_and_delete
17
29
  puts "Running harvest of NSIDC catalog from #{nsidc_json_url}"
18
30
  super(method(:harvest_nsidc_json_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:NSIDC][:long_name]}\"")
@@ -22,8 +34,22 @@ module SearchSolrTools
22
34
  # this is the main entry point for the class
23
35
  def harvest_nsidc_json_into_solr
24
36
  result = docs_with_translated_entries_from_nsidc
25
- insert_solr_docs result[:add_docs], Base::JSON_CONTENT_TYPE
26
- fail 'Failed to harvest and insert some authoritative IDs' if result[:failure_ids].length > 0
37
+
38
+ status = insert_solr_docs result[:add_docs], Base::JSON_CONTENT_TYPE
39
+
40
+ status.record_status(Helpers::HarvestStatus::HARVEST_NO_DOCS) if result[:num_docs] == 0
41
+
42
+ # Record the number of harvest failures; note that if this is 0, thats OK, the status will stay at 0
43
+ status.record_status(Helpers::HarvestStatus::HARVEST_FAILURE, result[:failure_ids].length)
44
+
45
+ raise Errors::HarvestError, status unless status.ok?
46
+ rescue Errors::HarvestError => e
47
+ raise e
48
+ rescue StandardError => e
49
+ puts "An unexpected exception occurred while trying to harvest or insert: #{e}"
50
+ puts e.backtrace
51
+ status = Helpers::HarvestStatus.new(Helpers::HarvestStatus::OTHER_ERROR => e)
52
+ raise Errors::HarvestError, status
27
53
  end
28
54
 
29
55
  def nsidc_json_url
@@ -33,7 +59,7 @@ module SearchSolrTools
33
59
  def result_ids_from_nsidc
34
60
  url = SolrEnvironments[@environment][:nsidc_dataset_metadata_url] +
35
61
  SolrEnvironments[@environment][:nsidc_oai_identifiers_url]
36
- get_results url, '//xmlns:identifier'
62
+ get_results(url, '//xmlns:identifier') || []
37
63
  end
38
64
 
39
65
  # Fetch a JSON representation of a dataset's metadata
@@ -48,7 +74,8 @@ module SearchSolrTools
48
74
  docs = []
49
75
  failure_ids = []
50
76
 
51
- result_ids_from_nsidc.each do |r|
77
+ all_docs = result_ids_from_nsidc
78
+ all_docs.each do |r|
52
79
  # Each result looks like:
53
80
  # oai:nsidc.org/AE_L2A
54
81
  id = r.text.split('/').last
@@ -60,7 +87,7 @@ module SearchSolrTools
60
87
  end
61
88
  end
62
89
 
63
- { add_docs: docs, failure_ids: failure_ids }
90
+ { num_docs: all_docs.size, add_docs: docs, failure_ids: failure_ids }
64
91
  end
65
92
  end
66
93
  end
@@ -1,3 +1,6 @@
1
+ require_relative 'base'
2
+ require_relative '../helpers/query_builder'
3
+
1
4
  require 'json'
2
5
 
3
6
  module SearchSolrTools
@@ -1,3 +1,5 @@
1
+ require_relative 'oai'
2
+
1
3
  module SearchSolrTools
2
4
  module Harvesters
3
5
  # Harvests data from Polar data catalogue and inserts it into
@@ -1,8 +1,8 @@
1
+ require_relative 'base'
2
+
1
3
  require 'nokogiri'
2
4
  require 'rest-client'
3
5
 
4
- require_relative 'base'
5
-
6
6
  module SearchSolrTools
7
7
  module Harvesters
8
8
  class R2R < Base
@@ -1,3 +1,5 @@
1
+ require_relative 'oai'
2
+
1
3
  module SearchSolrTools
2
4
  module Harvesters
3
5
  # Harvests the RDA feed
@@ -1,3 +1,5 @@
1
+ require_relative 'base'
2
+
1
3
  module SearchSolrTools
2
4
  module Harvesters
3
5
  # Harvests data from TDAR and inserts it into Solr after it has been translated
@@ -1,3 +1,6 @@
1
+ require_relative 'base'
2
+ require_relative '../helpers/csw_iso_query_builder'
3
+
1
4
  module SearchSolrTools
2
5
  module Harvesters
3
6
  # Harvests data from USGS and inserts it into Solr after it has been translated
@@ -1,6 +1,6 @@
1
- require_relative './iso_namespaces'
2
- require_relative './iso_to_solr_format'
3
- require_relative './solr_format'
1
+ require_relative 'iso_namespaces'
2
+ require_relative 'iso_to_solr_format'
3
+ require_relative 'solr_format'
4
4
 
5
5
  module SearchSolrTools
6
6
  module Helpers
@@ -0,0 +1,44 @@
1
+ module SearchSolrTools
2
+ module Helpers
3
+ class HarvestStatus
4
+ INGEST_OK = :ok
5
+ HARVEST_NO_DOCS = :harvest_none
6
+ HARVEST_FAILURE = :harvest_fail
7
+ INGEST_ERR_INVALID_DOC = :invalid
8
+ INGEST_ERR_SOLR_ERROR = :solr_error
9
+ OTHER_ERROR = :other
10
+ PING_SOLR = :ping_solr # used for initialize only
11
+ PING_SOURCE = :ping_source # used for initialize only
12
+
13
+ ERROR_STATUS = [HARVEST_NO_DOCS, HARVEST_FAILURE, INGEST_ERR_INVALID_DOC, INGEST_ERR_SOLR_ERROR, OTHER_ERROR]
14
+
15
+ attr_reader :status, :ping_solr, :ping_source
16
+ attr_writer :ping_solr, :ping_source
17
+
18
+ # init_info is an optional hash that contains the various status keys and the documents to
19
+ # associate with them
20
+ def initialize(init_info={})
21
+ @status = { INGEST_OK => 0 }
22
+ @ping_solr = true
23
+ @ping_source = true
24
+ ERROR_STATUS.each { |s| @status[s] = 0 }
25
+
26
+ init_info.each do |key, count|
27
+ @status[key] = count if @status.include? key
28
+ end
29
+
30
+ @ping_solr = init_info[PING_SOLR] if init_info.include? PING_SOLR
31
+ @ping_source = init_info[PING_SOURCE] if init_info.include? PING_SOURCE
32
+ end
33
+
34
+ def record_status(status, count = 1)
35
+ @status[status] += count
36
+ end
37
+
38
+ def ok?
39
+ ERROR_STATUS.each { |s| return false unless @status[s] == 0 }
40
+ @ping_solr && @ping_source
41
+ end
42
+ end
43
+ end
44
+ end
@@ -1,3 +1,4 @@
1
+ require_relative 'selectors'
1
2
  require 'nokogiri'
2
3
 
3
4
  module SearchSolrTools
@@ -1,7 +1,7 @@
1
1
  require 'date'
2
2
 
3
- require_relative './iso_namespaces'
4
- require_relative './solr_format'
3
+ require_relative 'iso_namespaces'
4
+ require_relative 'solr_format'
5
5
 
6
6
  module SearchSolrTools
7
7
  module Helpers
@@ -1,8 +1,8 @@
1
1
  require 'date'
2
2
 
3
- require_relative './iso_namespaces'
4
- require_relative './solr_format'
5
- require_relative './iso_to_solr_format'
3
+ require_relative 'iso_namespaces'
4
+ require_relative 'solr_format'
5
+ require_relative 'iso_to_solr_format'
6
6
 
7
7
  module SearchSolrTools
8
8
  module Helpers
@@ -1,6 +1,6 @@
1
- require_relative './iso_namespaces'
2
- require_relative './iso_to_solr_format'
3
- require_relative './solr_format'
1
+ require_relative 'iso_namespaces'
2
+ require_relative 'iso_to_solr_format'
3
+ require_relative 'solr_format'
4
4
 
5
5
  module SearchSolrTools
6
6
  module Helpers
@@ -1,5 +1,4 @@
1
- require 'require_all'
2
- require_rel '../selectors'
1
+ Dir[File.join(__dir__, '..', 'selectors', '*.rb')].each { |file| require file }
3
2
 
4
3
  module SearchSolrTools
5
4
  module Helpers
@@ -2,6 +2,7 @@ require 'date'
2
2
  require 'iso8601'
3
3
 
4
4
  require_relative 'bounding_box_util'
5
+ require_relative 'facet_configuration'
5
6
 
6
7
  module SearchSolrTools
7
8
  module Helpers
@@ -1,6 +1,6 @@
1
- require_relative './iso_namespaces'
2
- require_relative './iso_to_solr_format'
3
- require_relative './solr_format'
1
+ require_relative 'iso_namespaces'
2
+ require_relative 'iso_to_solr_format'
3
+ require_relative 'solr_format'
4
4
 
5
5
  module SearchSolrTools
6
6
  module Helpers
@@ -1,7 +1,7 @@
1
1
  require 'rgeo/geo_json'
2
2
 
3
- require 'search_solr_tools/helpers/bounding_box_util'
4
- require 'search_solr_tools/helpers/iso_to_solr_format'
3
+ require_relative 'bounding_box_util'
4
+ require_relative 'iso_to_solr_format'
5
5
 
6
6
  module SearchSolrTools
7
7
  module Helpers
@@ -1,6 +1,6 @@
1
1
  require 'rgeo/geo_json'
2
2
 
3
- require 'search_solr_tools/helpers/solr_format'
3
+ require_relative 'solr_format'
4
4
 
5
5
  module SearchSolrTools
6
6
  module Helpers
@@ -1,5 +1,5 @@
1
- require_relative './iso_namespaces'
2
- require_relative './iso_to_solr_format'
1
+ require_relative 'iso_namespaces'
2
+ require_relative 'iso_to_solr_format'
3
3
 
4
4
  module SearchSolrTools
5
5
  module Helpers
@@ -1,4 +1,5 @@
1
- require 'search_solr_tools'
1
+ require_relative '../helpers/solr_format'
2
+ require_relative '../helpers/data_one_format'
2
3
 
3
4
  module SearchSolrTools
4
5
  module Selectors
@@ -1,4 +1,5 @@
1
- require 'search_solr_tools'
1
+ require_relative '../helpers/data_one_format'
2
+ require_relative '../helpers/solr_format'
2
3
 
3
4
  module SearchSolrTools
4
5
  module Selectors
@@ -1,4 +1,5 @@
1
- require 'search_solr_tools'
1
+ require_relative '../helpers/solr_format'
2
+ require_relative '../helpers/iso_to_solr_format'
2
3
 
3
4
  module SearchSolrTools
4
5
  module Selectors
@@ -1,4 +1,5 @@
1
- require 'search_solr_tools'
1
+ require_relative '../helpers/solr_format'
2
+ require_relative '../helpers/iso_to_solr_format'
2
3
 
3
4
  module SearchSolrTools
4
5
  module Selectors
@@ -1,4 +1,5 @@
1
- require 'search_solr_tools'
1
+ require_relative '../helpers/solr_format'
2
+ require_relative '../helpers/ncdc_paleo_format'
2
3
 
3
4
  module SearchSolrTools
4
5
  module Selectors
@@ -1,4 +1,5 @@
1
- require 'search_solr_tools'
1
+ require_relative '../helpers/solr_format'
2
+ require_relative '../helpers/iso_to_solr_format'
2
3
 
3
4
  module SearchSolrTools
4
5
  module Selectors
@@ -1,4 +1,5 @@
1
- require 'search_solr_tools'
1
+ require_relative '../helpers/solr_format'
2
+ require_relative '../helpers/iso_to_solr_format'
2
3
 
3
4
  module SearchSolrTools
4
5
  module Selectors
@@ -1,4 +1,5 @@
1
- require 'search_solr_tools'
1
+ require_relative '../helpers/solr_format'
2
+ require_relative '../helpers/iso_to_solr_format'
2
3
 
3
4
  module SearchSolrTools
4
5
  module Selectors
@@ -1,4 +1,6 @@
1
- require 'search_solr_tools'
1
+ require_relative '../helpers/solr_format'
2
+ require_relative '../helpers/iso_to_solr_format'
3
+ require_relative '../helpers/r2r_format'
2
4
 
3
5
  module SearchSolrTools
4
6
  module Selectors
@@ -1,4 +1,5 @@
1
- require 'search_solr_tools'
1
+ require_relative '../helpers/solr_format'
2
+ require_relative '../helpers/iso_to_solr_format'
2
3
 
3
4
  module SearchSolrTools
4
5
  module Selectors
@@ -1,4 +1,6 @@
1
- require 'search_solr_tools'
1
+ require_relative '../helpers/solr_format'
2
+ require_relative '../helpers/iso_to_solr_format'
3
+ require_relative '../helpers/tdar_format'
2
4
 
3
5
  module SearchSolrTools
4
6
  module Selectors
@@ -1,4 +1,6 @@
1
- require 'search_solr_tools'
1
+ require_relative '../helpers/solr_format'
2
+ require_relative '../helpers/iso_to_solr_format'
3
+ require_relative '../helpers/usgs_format'
2
4
 
3
5
  module SearchSolrTools
4
6
  module Selectors
@@ -4,6 +4,9 @@ require 'rgeo/geo_json'
4
4
  require 'rgeo/wkrep/wkt_parser'
5
5
 
6
6
  require 'search_solr_tools'
7
+ require_relative '../helpers/solr_format'
8
+ require_relative '../helpers/translate_temporal_coverage'
9
+ require_relative '../helpers/translate_spatial_coverage'
7
10
 
8
11
  module SearchSolrTools
9
12
  module Translators