search_solr_tools 4.2.0 → 5.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +27 -1
  3. data/README.md +6 -6
  4. data/bin/search_solr_tools +58 -2
  5. data/lib/search_solr_tools.rb +9 -7
  6. data/lib/search_solr_tools/errors/harvest_error.rb +88 -0
  7. data/lib/search_solr_tools/harvesters/adc.rb +2 -0
  8. data/lib/search_solr_tools/harvesters/ade_auto_suggest.rb +2 -0
  9. data/lib/search_solr_tools/harvesters/auto_suggest.rb +5 -1
  10. data/lib/search_solr_tools/harvesters/base.rb +65 -11
  11. data/lib/search_solr_tools/harvesters/bcodmo.rb +1 -0
  12. data/lib/search_solr_tools/harvesters/data_one.rb +2 -0
  13. data/lib/search_solr_tools/harvesters/echo.rb +2 -0
  14. data/lib/search_solr_tools/harvesters/gtnp.rb +1 -0
  15. data/lib/search_solr_tools/harvesters/ices.rb +3 -0
  16. data/lib/search_solr_tools/harvesters/ncdc_paleo.rb +3 -0
  17. data/lib/search_solr_tools/harvesters/nmi.rb +2 -0
  18. data/lib/search_solr_tools/harvesters/nodc.rb +3 -0
  19. data/lib/search_solr_tools/harvesters/nsidc_auto_suggest.rb +2 -0
  20. data/lib/search_solr_tools/harvesters/nsidc_json.rb +32 -5
  21. data/lib/search_solr_tools/harvesters/oai.rb +3 -0
  22. data/lib/search_solr_tools/harvesters/pdc.rb +2 -0
  23. data/lib/search_solr_tools/harvesters/r2r.rb +2 -2
  24. data/lib/search_solr_tools/harvesters/rda.rb +2 -0
  25. data/lib/search_solr_tools/harvesters/tdar.rb +2 -0
  26. data/lib/search_solr_tools/harvesters/usgs.rb +3 -0
  27. data/lib/search_solr_tools/helpers/data_one_format.rb +3 -3
  28. data/lib/search_solr_tools/helpers/harvest_status.rb +44 -0
  29. data/lib/search_solr_tools/helpers/iso_to_solr.rb +1 -0
  30. data/lib/search_solr_tools/helpers/iso_to_solr_format.rb +2 -2
  31. data/lib/search_solr_tools/helpers/ncdc_paleo_format.rb +3 -3
  32. data/lib/search_solr_tools/helpers/r2r_format.rb +3 -3
  33. data/lib/search_solr_tools/helpers/selectors.rb +1 -2
  34. data/lib/search_solr_tools/helpers/solr_format.rb +1 -0
  35. data/lib/search_solr_tools/helpers/tdar_format.rb +3 -3
  36. data/lib/search_solr_tools/helpers/translate_spatial_coverage.rb +2 -2
  37. data/lib/search_solr_tools/helpers/translate_temporal_coverage.rb +1 -1
  38. data/lib/search_solr_tools/helpers/usgs_format.rb +2 -2
  39. data/lib/search_solr_tools/selectors/adc.rb +2 -1
  40. data/lib/search_solr_tools/selectors/data_one.rb +2 -1
  41. data/lib/search_solr_tools/selectors/echo_iso.rb +2 -1
  42. data/lib/search_solr_tools/selectors/ices_iso.rb +2 -1
  43. data/lib/search_solr_tools/selectors/ncdc_paleo.rb +2 -1
  44. data/lib/search_solr_tools/selectors/nmi.rb +2 -1
  45. data/lib/search_solr_tools/selectors/nodc_iso.rb +2 -1
  46. data/lib/search_solr_tools/selectors/pdc_iso.rb +2 -1
  47. data/lib/search_solr_tools/selectors/r2r.rb +3 -1
  48. data/lib/search_solr_tools/selectors/rda.rb +2 -1
  49. data/lib/search_solr_tools/selectors/tdar_opensearch.rb +3 -1
  50. data/lib/search_solr_tools/selectors/usgs_iso.rb +3 -1
  51. data/lib/search_solr_tools/translators/bcodmo_json.rb +3 -0
  52. data/lib/search_solr_tools/translators/eol_to_solr.rb +6 -0
  53. data/lib/search_solr_tools/translators/gtnp_json.rb +3 -0
  54. data/lib/search_solr_tools/translators/nsidc_json.rb +3 -0
  55. data/lib/search_solr_tools/version.rb +1 -1
  56. data/search_solr_tools.gemspec +22 -23
  57. metadata +47 -55
@@ -1,3 +1,4 @@
1
+ require_relative 'base'
1
2
  require 'json'
2
3
  require 'rest-client'
3
4
 
@@ -1,3 +1,5 @@
1
+ require_relative 'base'
2
+
1
3
  module SearchSolrTools
2
4
  module Harvesters
3
5
  class DataOne < Base
@@ -1,3 +1,5 @@
1
+ require_relative 'base'
2
+
1
3
  module SearchSolrTools
2
4
  module Harvesters
3
5
  # Harvests data from ECHO and inserts it into Solr after it has been translated
@@ -1,3 +1,4 @@
1
+ require_relative 'base'
1
2
  require 'json'
2
3
  require 'rest-client'
3
4
 
@@ -1,3 +1,6 @@
1
+ require_relative 'base'
2
+ require_relative '../helpers/csw_iso_query_builder'
3
+
1
4
  module SearchSolrTools
2
5
  module Harvesters
3
6
  # Harvests data from ICES and inserts it into Solr after it has been translated
@@ -1,3 +1,6 @@
1
+ require_relative 'base'
2
+ require_relative '../helpers/csw_iso_query_builder'
3
+
1
4
  module SearchSolrTools
2
5
  module Harvesters
3
6
  # Harvests data from NODC PALEO and inserts it into Solr after it has been translated
@@ -1,3 +1,5 @@
1
+ require_relative 'oai'
2
+
1
3
  module SearchSolrTools
2
4
  module Harvesters
3
5
  class Nmi < Oai
@@ -1,3 +1,6 @@
1
+ require_relative 'base'
2
+ require_relative '../helpers/csw_iso_query_builder'
3
+
1
4
  module SearchSolrTools
2
5
  module Harvesters
3
6
  # Harvests data from NODC and inserts it into Solr after it has been translated
@@ -1,3 +1,5 @@
1
+ require_relative 'auto_suggest'
2
+
1
3
  module SearchSolrTools
2
4
  module Harvesters
3
5
  class NsidcAutoSuggest < AutoSuggest
@@ -3,6 +3,7 @@ require 'rest-client'
3
3
 
4
4
  require 'search_solr_tools'
5
5
 
6
+
6
7
  module SearchSolrTools
7
8
  module Harvesters
8
9
  # Harvests data from NSIDC OAI and inserts it into Solr after it has been translated
@@ -13,6 +14,17 @@ module SearchSolrTools
13
14
  Helpers::FacetConfiguration.import_bin_configuration(env)
14
15
  end
15
16
 
17
+ def ping_source
18
+ begin
19
+ RestClient.options(nsidc_json_url) do |response, _request, _result|
20
+ return response.code == 200
21
+ end
22
+ rescue => e
23
+ puts "Error trying to get options for #{nsidc_json_url} (ping)"
24
+ end
25
+ false
26
+ end
27
+
16
28
  def harvest_and_delete
17
29
  puts "Running harvest of NSIDC catalog from #{nsidc_json_url}"
18
30
  super(method(:harvest_nsidc_json_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:NSIDC][:long_name]}\"")
@@ -22,8 +34,22 @@ module SearchSolrTools
22
34
  # this is the main entry point for the class
23
35
  def harvest_nsidc_json_into_solr
24
36
  result = docs_with_translated_entries_from_nsidc
25
- insert_solr_docs result[:add_docs], Base::JSON_CONTENT_TYPE
26
- fail 'Failed to harvest and insert some authoritative IDs' if result[:failure_ids].length > 0
37
+
38
+ status = insert_solr_docs result[:add_docs], Base::JSON_CONTENT_TYPE
39
+
40
+ status.record_status(Helpers::HarvestStatus::HARVEST_NO_DOCS) if result[:num_docs] == 0
41
+
42
+ # Record the number of harvest failures; note that if this is 0, thats OK, the status will stay at 0
43
+ status.record_status(Helpers::HarvestStatus::HARVEST_FAILURE, result[:failure_ids].length)
44
+
45
+ raise Errors::HarvestError, status unless status.ok?
46
+ rescue Errors::HarvestError => e
47
+ raise e
48
+ rescue StandardError => e
49
+ puts "An unexpected exception occurred while trying to harvest or insert: #{e}"
50
+ puts e.backtrace
51
+ status = Helpers::HarvestStatus.new(Helpers::HarvestStatus::OTHER_ERROR => e)
52
+ raise Errors::HarvestError, status
27
53
  end
28
54
 
29
55
  def nsidc_json_url
@@ -33,7 +59,7 @@ module SearchSolrTools
33
59
  def result_ids_from_nsidc
34
60
  url = SolrEnvironments[@environment][:nsidc_dataset_metadata_url] +
35
61
  SolrEnvironments[@environment][:nsidc_oai_identifiers_url]
36
- get_results url, '//xmlns:identifier'
62
+ get_results(url, '//xmlns:identifier') || []
37
63
  end
38
64
 
39
65
  # Fetch a JSON representation of a dataset's metadata
@@ -48,7 +74,8 @@ module SearchSolrTools
48
74
  docs = []
49
75
  failure_ids = []
50
76
 
51
- result_ids_from_nsidc.each do |r|
77
+ all_docs = result_ids_from_nsidc
78
+ all_docs.each do |r|
52
79
  # Each result looks like:
53
80
  # oai:nsidc.org/AE_L2A
54
81
  id = r.text.split('/').last
@@ -60,7 +87,7 @@ module SearchSolrTools
60
87
  end
61
88
  end
62
89
 
63
- { add_docs: docs, failure_ids: failure_ids }
90
+ { num_docs: all_docs.size, add_docs: docs, failure_ids: failure_ids }
64
91
  end
65
92
  end
66
93
  end
@@ -1,3 +1,6 @@
1
+ require_relative 'base'
2
+ require_relative '../helpers/query_builder'
3
+
1
4
  require 'json'
2
5
 
3
6
  module SearchSolrTools
@@ -1,3 +1,5 @@
1
+ require_relative 'oai'
2
+
1
3
  module SearchSolrTools
2
4
  module Harvesters
3
5
  # Harvests data from Polar data catalogue and inserts it into
@@ -1,8 +1,8 @@
1
+ require_relative 'base'
2
+
1
3
  require 'nokogiri'
2
4
  require 'rest-client'
3
5
 
4
- require_relative 'base'
5
-
6
6
  module SearchSolrTools
7
7
  module Harvesters
8
8
  class R2R < Base
@@ -1,3 +1,5 @@
1
+ require_relative 'oai'
2
+
1
3
  module SearchSolrTools
2
4
  module Harvesters
3
5
  # Harvests the RDA feed
@@ -1,3 +1,5 @@
1
+ require_relative 'base'
2
+
1
3
  module SearchSolrTools
2
4
  module Harvesters
3
5
  # Harvests data from TDAR and inserts it into Solr after it has been translated
@@ -1,3 +1,6 @@
1
+ require_relative 'base'
2
+ require_relative '../helpers/csw_iso_query_builder'
3
+
1
4
  module SearchSolrTools
2
5
  module Harvesters
3
6
  # Harvests data from USGS and inserts it into Solr after it has been translated
@@ -1,6 +1,6 @@
1
- require_relative './iso_namespaces'
2
- require_relative './iso_to_solr_format'
3
- require_relative './solr_format'
1
+ require_relative 'iso_namespaces'
2
+ require_relative 'iso_to_solr_format'
3
+ require_relative 'solr_format'
4
4
 
5
5
  module SearchSolrTools
6
6
  module Helpers
@@ -0,0 +1,44 @@
1
+ module SearchSolrTools
2
+ module Helpers
3
+ class HarvestStatus
4
+ INGEST_OK = :ok
5
+ HARVEST_NO_DOCS = :harvest_none
6
+ HARVEST_FAILURE = :harvest_fail
7
+ INGEST_ERR_INVALID_DOC = :invalid
8
+ INGEST_ERR_SOLR_ERROR = :solr_error
9
+ OTHER_ERROR = :other
10
+ PING_SOLR = :ping_solr # used for initialize only
11
+ PING_SOURCE = :ping_source # used for initialize only
12
+
13
+ ERROR_STATUS = [HARVEST_NO_DOCS, HARVEST_FAILURE, INGEST_ERR_INVALID_DOC, INGEST_ERR_SOLR_ERROR, OTHER_ERROR]
14
+
15
+ attr_reader :status, :ping_solr, :ping_source
16
+ attr_writer :ping_solr, :ping_source
17
+
18
+ # init_info is an optional hash that contains the various status keys and the documents to
19
+ # associate with them
20
+ def initialize(init_info={})
21
+ @status = { INGEST_OK => 0 }
22
+ @ping_solr = true
23
+ @ping_source = true
24
+ ERROR_STATUS.each { |s| @status[s] = 0 }
25
+
26
+ init_info.each do |key, count|
27
+ @status[key] = count if @status.include? key
28
+ end
29
+
30
+ @ping_solr = init_info[PING_SOLR] if init_info.include? PING_SOLR
31
+ @ping_source = init_info[PING_SOURCE] if init_info.include? PING_SOURCE
32
+ end
33
+
34
+ def record_status(status, count = 1)
35
+ @status[status] += count
36
+ end
37
+
38
+ def ok?
39
+ ERROR_STATUS.each { |s| return false unless @status[s] == 0 }
40
+ @ping_solr && @ping_source
41
+ end
42
+ end
43
+ end
44
+ end
@@ -1,3 +1,4 @@
1
+ require_relative 'selectors'
1
2
  require 'nokogiri'
2
3
 
3
4
  module SearchSolrTools
@@ -1,7 +1,7 @@
1
1
  require 'date'
2
2
 
3
- require_relative './iso_namespaces'
4
- require_relative './solr_format'
3
+ require_relative 'iso_namespaces'
4
+ require_relative 'solr_format'
5
5
 
6
6
  module SearchSolrTools
7
7
  module Helpers
@@ -1,8 +1,8 @@
1
1
  require 'date'
2
2
 
3
- require_relative './iso_namespaces'
4
- require_relative './solr_format'
5
- require_relative './iso_to_solr_format'
3
+ require_relative 'iso_namespaces'
4
+ require_relative 'solr_format'
5
+ require_relative 'iso_to_solr_format'
6
6
 
7
7
  module SearchSolrTools
8
8
  module Helpers
@@ -1,6 +1,6 @@
1
- require_relative './iso_namespaces'
2
- require_relative './iso_to_solr_format'
3
- require_relative './solr_format'
1
+ require_relative 'iso_namespaces'
2
+ require_relative 'iso_to_solr_format'
3
+ require_relative 'solr_format'
4
4
 
5
5
  module SearchSolrTools
6
6
  module Helpers
@@ -1,5 +1,4 @@
1
- require 'require_all'
2
- require_rel '../selectors'
1
+ Dir[File.join(__dir__, '..', 'selectors', '*.rb')].each { |file| require file }
3
2
 
4
3
  module SearchSolrTools
5
4
  module Helpers
@@ -2,6 +2,7 @@ require 'date'
2
2
  require 'iso8601'
3
3
 
4
4
  require_relative 'bounding_box_util'
5
+ require_relative 'facet_configuration'
5
6
 
6
7
  module SearchSolrTools
7
8
  module Helpers
@@ -1,6 +1,6 @@
1
- require_relative './iso_namespaces'
2
- require_relative './iso_to_solr_format'
3
- require_relative './solr_format'
1
+ require_relative 'iso_namespaces'
2
+ require_relative 'iso_to_solr_format'
3
+ require_relative 'solr_format'
4
4
 
5
5
  module SearchSolrTools
6
6
  module Helpers
@@ -1,7 +1,7 @@
1
1
  require 'rgeo/geo_json'
2
2
 
3
- require 'search_solr_tools/helpers/bounding_box_util'
4
- require 'search_solr_tools/helpers/iso_to_solr_format'
3
+ require_relative 'bounding_box_util'
4
+ require_relative 'iso_to_solr_format'
5
5
 
6
6
  module SearchSolrTools
7
7
  module Helpers
@@ -1,6 +1,6 @@
1
1
  require 'rgeo/geo_json'
2
2
 
3
- require 'search_solr_tools/helpers/solr_format'
3
+ require_relative 'solr_format'
4
4
 
5
5
  module SearchSolrTools
6
6
  module Helpers
@@ -1,5 +1,5 @@
1
- require_relative './iso_namespaces'
2
- require_relative './iso_to_solr_format'
1
+ require_relative 'iso_namespaces'
2
+ require_relative 'iso_to_solr_format'
3
3
 
4
4
  module SearchSolrTools
5
5
  module Helpers
@@ -1,4 +1,5 @@
1
- require 'search_solr_tools'
1
+ require_relative '../helpers/solr_format'
2
+ require_relative '../helpers/data_one_format'
2
3
 
3
4
  module SearchSolrTools
4
5
  module Selectors
@@ -1,4 +1,5 @@
1
- require 'search_solr_tools'
1
+ require_relative '../helpers/data_one_format'
2
+ require_relative '../helpers/solr_format'
2
3
 
3
4
  module SearchSolrTools
4
5
  module Selectors
@@ -1,4 +1,5 @@
1
- require 'search_solr_tools'
1
+ require_relative '../helpers/solr_format'
2
+ require_relative '../helpers/iso_to_solr_format'
2
3
 
3
4
  module SearchSolrTools
4
5
  module Selectors
@@ -1,4 +1,5 @@
1
- require 'search_solr_tools'
1
+ require_relative '../helpers/solr_format'
2
+ require_relative '../helpers/iso_to_solr_format'
2
3
 
3
4
  module SearchSolrTools
4
5
  module Selectors
@@ -1,4 +1,5 @@
1
- require 'search_solr_tools'
1
+ require_relative '../helpers/solr_format'
2
+ require_relative '../helpers/ncdc_paleo_format'
2
3
 
3
4
  module SearchSolrTools
4
5
  module Selectors
@@ -1,4 +1,5 @@
1
- require 'search_solr_tools'
1
+ require_relative '../helpers/solr_format'
2
+ require_relative '../helpers/iso_to_solr_format'
2
3
 
3
4
  module SearchSolrTools
4
5
  module Selectors
@@ -1,4 +1,5 @@
1
- require 'search_solr_tools'
1
+ require_relative '../helpers/solr_format'
2
+ require_relative '../helpers/iso_to_solr_format'
2
3
 
3
4
  module SearchSolrTools
4
5
  module Selectors
@@ -1,4 +1,5 @@
1
- require 'search_solr_tools'
1
+ require_relative '../helpers/solr_format'
2
+ require_relative '../helpers/iso_to_solr_format'
2
3
 
3
4
  module SearchSolrTools
4
5
  module Selectors
@@ -1,4 +1,6 @@
1
- require 'search_solr_tools'
1
+ require_relative '../helpers/solr_format'
2
+ require_relative '../helpers/iso_to_solr_format'
3
+ require_relative '../helpers/r2r_format'
2
4
 
3
5
  module SearchSolrTools
4
6
  module Selectors
@@ -1,4 +1,5 @@
1
- require 'search_solr_tools'
1
+ require_relative '../helpers/solr_format'
2
+ require_relative '../helpers/iso_to_solr_format'
2
3
 
3
4
  module SearchSolrTools
4
5
  module Selectors
@@ -1,4 +1,6 @@
1
- require 'search_solr_tools'
1
+ require_relative '../helpers/solr_format'
2
+ require_relative '../helpers/iso_to_solr_format'
3
+ require_relative '../helpers/tdar_format'
2
4
 
3
5
  module SearchSolrTools
4
6
  module Selectors
@@ -1,4 +1,6 @@
1
- require 'search_solr_tools'
1
+ require_relative '../helpers/solr_format'
2
+ require_relative '../helpers/iso_to_solr_format'
3
+ require_relative '../helpers/usgs_format'
2
4
 
3
5
  module SearchSolrTools
4
6
  module Selectors
@@ -4,6 +4,9 @@ require 'rgeo/geo_json'
4
4
  require 'rgeo/wkrep/wkt_parser'
5
5
 
6
6
  require 'search_solr_tools'
7
+ require_relative '../helpers/solr_format'
8
+ require_relative '../helpers/translate_temporal_coverage'
9
+ require_relative '../helpers/translate_spatial_coverage'
7
10
 
8
11
  module SearchSolrTools
9
12
  module Translators