search_solr_tools 4.2.0 → 5.0.1.pre.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of search_solr_tools might be problematic. Click here for more details.

Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +25 -1
  3. data/README.md +6 -6
  4. data/bin/search_solr_tools +58 -2
  5. data/lib/search_solr_tools.rb +9 -7
  6. data/lib/search_solr_tools/errors/harvest_error.rb +73 -0
  7. data/lib/search_solr_tools/harvesters/adc.rb +2 -0
  8. data/lib/search_solr_tools/harvesters/ade_auto_suggest.rb +2 -0
  9. data/lib/search_solr_tools/harvesters/auto_suggest.rb +5 -1
  10. data/lib/search_solr_tools/harvesters/base.rb +65 -11
  11. data/lib/search_solr_tools/harvesters/bcodmo.rb +1 -0
  12. data/lib/search_solr_tools/harvesters/data_one.rb +2 -0
  13. data/lib/search_solr_tools/harvesters/echo.rb +2 -0
  14. data/lib/search_solr_tools/harvesters/gtnp.rb +1 -0
  15. data/lib/search_solr_tools/harvesters/ices.rb +3 -0
  16. data/lib/search_solr_tools/harvesters/ncdc_paleo.rb +3 -0
  17. data/lib/search_solr_tools/harvesters/nmi.rb +2 -0
  18. data/lib/search_solr_tools/harvesters/nodc.rb +3 -0
  19. data/lib/search_solr_tools/harvesters/nsidc_auto_suggest.rb +2 -0
  20. data/lib/search_solr_tools/harvesters/nsidc_json.rb +31 -5
  21. data/lib/search_solr_tools/harvesters/oai.rb +3 -0
  22. data/lib/search_solr_tools/harvesters/pdc.rb +2 -0
  23. data/lib/search_solr_tools/harvesters/r2r.rb +2 -2
  24. data/lib/search_solr_tools/harvesters/rda.rb +2 -0
  25. data/lib/search_solr_tools/harvesters/tdar.rb +2 -0
  26. data/lib/search_solr_tools/harvesters/usgs.rb +3 -0
  27. data/lib/search_solr_tools/helpers/data_one_format.rb +3 -3
  28. data/lib/search_solr_tools/helpers/harvest_status.rb +66 -0
  29. data/lib/search_solr_tools/helpers/iso_to_solr.rb +1 -0
  30. data/lib/search_solr_tools/helpers/iso_to_solr_format.rb +2 -2
  31. data/lib/search_solr_tools/helpers/ncdc_paleo_format.rb +3 -3
  32. data/lib/search_solr_tools/helpers/r2r_format.rb +3 -3
  33. data/lib/search_solr_tools/helpers/selectors.rb +1 -2
  34. data/lib/search_solr_tools/helpers/solr_format.rb +1 -0
  35. data/lib/search_solr_tools/helpers/tdar_format.rb +3 -3
  36. data/lib/search_solr_tools/helpers/translate_spatial_coverage.rb +2 -2
  37. data/lib/search_solr_tools/helpers/translate_temporal_coverage.rb +1 -1
  38. data/lib/search_solr_tools/helpers/usgs_format.rb +2 -2
  39. data/lib/search_solr_tools/selectors/adc.rb +2 -1
  40. data/lib/search_solr_tools/selectors/data_one.rb +2 -1
  41. data/lib/search_solr_tools/selectors/echo_iso.rb +2 -1
  42. data/lib/search_solr_tools/selectors/ices_iso.rb +2 -1
  43. data/lib/search_solr_tools/selectors/ncdc_paleo.rb +2 -1
  44. data/lib/search_solr_tools/selectors/nmi.rb +2 -1
  45. data/lib/search_solr_tools/selectors/nodc_iso.rb +2 -1
  46. data/lib/search_solr_tools/selectors/pdc_iso.rb +2 -1
  47. data/lib/search_solr_tools/selectors/r2r.rb +3 -1
  48. data/lib/search_solr_tools/selectors/rda.rb +2 -1
  49. data/lib/search_solr_tools/selectors/tdar_opensearch.rb +3 -1
  50. data/lib/search_solr_tools/selectors/usgs_iso.rb +3 -1
  51. data/lib/search_solr_tools/translators/bcodmo_json.rb +3 -0
  52. data/lib/search_solr_tools/translators/eol_to_solr.rb +6 -0
  53. data/lib/search_solr_tools/translators/gtnp_json.rb +3 -0
  54. data/lib/search_solr_tools/translators/nsidc_json.rb +3 -0
  55. data/lib/search_solr_tools/version.rb +1 -1
  56. data/search_solr_tools.gemspec +22 -23
  57. metadata +49 -57
@@ -1,3 +1,4 @@
1
+ require_relative 'base'
1
2
  require 'json'
2
3
  require 'rest-client'
3
4
 
@@ -1,3 +1,5 @@
1
+ require_relative 'base'
2
+
1
3
  module SearchSolrTools
2
4
  module Harvesters
3
5
  class DataOne < Base
@@ -1,3 +1,5 @@
1
+ require_relative 'base'
2
+
1
3
  module SearchSolrTools
2
4
  module Harvesters
3
5
  # Harvests data from ECHO and inserts it into Solr after it has been translated
@@ -1,3 +1,4 @@
1
+ require_relative 'base'
1
2
  require 'json'
2
3
  require 'rest-client'
3
4
 
@@ -1,3 +1,6 @@
1
+ require_relative 'base'
2
+ require_relative '../helpers/csw_iso_query_builder'
3
+
1
4
  module SearchSolrTools
2
5
  module Harvesters
3
6
  # Harvests data from ICES and inserts it into Solr after it has been translated
@@ -1,3 +1,6 @@
1
+ require_relative 'base'
2
+ require_relative '../helpers/csw_iso_query_builder'
3
+
1
4
  module SearchSolrTools
2
5
  module Harvesters
3
6
  # Harvests data from NODC PALEO and inserts it into Solr after it has been translated
@@ -1,3 +1,5 @@
1
+ require_relative 'oai'
2
+
1
3
  module SearchSolrTools
2
4
  module Harvesters
3
5
  class Nmi < Oai
@@ -1,3 +1,6 @@
1
+ require_relative 'base'
2
+ require_relative '../helpers/csw_iso_query_builder'
3
+
1
4
  module SearchSolrTools
2
5
  module Harvesters
3
6
  # Harvests data from NODC and inserts it into Solr after it has been translated
@@ -1,3 +1,5 @@
1
+ require_relative 'auto_suggest'
2
+
1
3
  module SearchSolrTools
2
4
  module Harvesters
3
5
  class NsidcAutoSuggest < AutoSuggest
@@ -3,6 +3,7 @@ require 'rest-client'
3
3
 
4
4
  require 'search_solr_tools'
5
5
 
6
+
6
7
  module SearchSolrTools
7
8
  module Harvesters
8
9
  # Harvests data from NSIDC OAI and inserts it into Solr after it has been translated
@@ -13,6 +14,17 @@ module SearchSolrTools
13
14
  Helpers::FacetConfiguration.import_bin_configuration(env)
14
15
  end
15
16
 
17
+ def ping_source
18
+ begin
19
+ RestClient.options(nsidc_json_url) do |response, _request, _result|
20
+ return response.code == 200
21
+ end
22
+ rescue => e
23
+ puts "Error trying to get options for #{nsidc_json_url} (ping)"
24
+ end
25
+ false
26
+ end
27
+
16
28
  def harvest_and_delete
17
29
  puts "Running harvest of NSIDC catalog from #{nsidc_json_url}"
18
30
  super(method(:harvest_nsidc_json_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:NSIDC][:long_name]}\"")
@@ -22,8 +34,21 @@ module SearchSolrTools
22
34
  # this is the main entry point for the class
23
35
  def harvest_nsidc_json_into_solr
24
36
  result = docs_with_translated_entries_from_nsidc
25
- insert_solr_docs result[:add_docs], Base::JSON_CONTENT_TYPE
26
- fail 'Failed to harvest and insert some authoritative IDs' if result[:failure_ids].length > 0
37
+
38
+ status = insert_solr_docs result[:add_docs], Base::JSON_CONTENT_TYPE
39
+
40
+ status.record_document_status('harvest', Helpers::HarvestStatus::HARVEST_NO_DOCS) if result[:num_docs] == 0
41
+ status.record_multiple_document_status(result[:failure_ids],
42
+ Helpers::HarvestStatus::HARVEST_FAILURE) if result[:failure_ids].length > 0
43
+
44
+ raise Errors::HarvestError, status unless status.ok?
45
+ rescue Errors::HarvestError => e
46
+ raise e
47
+ rescue StandardError => e
48
+ puts "An unexpected exception occurred while trying to harvest or insert: #{e}"
49
+ puts e.backtrace
50
+ status = Helpers::HarvestStatus.new(Helpers::HarvestStatus::OTHER_ERROR => e)
51
+ raise Errors::HarvestError, status
27
52
  end
28
53
 
29
54
  def nsidc_json_url
@@ -33,7 +58,7 @@ module SearchSolrTools
33
58
  def result_ids_from_nsidc
34
59
  url = SolrEnvironments[@environment][:nsidc_dataset_metadata_url] +
35
60
  SolrEnvironments[@environment][:nsidc_oai_identifiers_url]
36
- get_results url, '//xmlns:identifier'
61
+ get_results(url, '//xmlns:identifier') || []
37
62
  end
38
63
 
39
64
  # Fetch a JSON representation of a dataset's metadata
@@ -48,7 +73,8 @@ module SearchSolrTools
48
73
  docs = []
49
74
  failure_ids = []
50
75
 
51
- result_ids_from_nsidc.each do |r|
76
+ all_docs = result_ids_from_nsidc
77
+ all_docs.each do |r|
52
78
  # Each result looks like:
53
79
  # oai:nsidc.org/AE_L2A
54
80
  id = r.text.split('/').last
@@ -60,7 +86,7 @@ module SearchSolrTools
60
86
  end
61
87
  end
62
88
 
63
- { add_docs: docs, failure_ids: failure_ids }
89
+ { num_docs: all_docs.size, add_docs: docs, failure_ids: failure_ids }
64
90
  end
65
91
  end
66
92
  end
@@ -1,3 +1,6 @@
1
+ require_relative 'base'
2
+ require_relative '../helpers/query_builder'
3
+
1
4
  require 'json'
2
5
 
3
6
  module SearchSolrTools
@@ -1,3 +1,5 @@
1
+ require_relative 'oai'
2
+
1
3
  module SearchSolrTools
2
4
  module Harvesters
3
5
  # Harvests data from Polar data catalogue and inserts it into
@@ -1,8 +1,8 @@
1
+ require_relative 'base'
2
+
1
3
  require 'nokogiri'
2
4
  require 'rest-client'
3
5
 
4
- require_relative 'base'
5
-
6
6
  module SearchSolrTools
7
7
  module Harvesters
8
8
  class R2R < Base
@@ -1,3 +1,5 @@
1
+ require_relative 'oai'
2
+
1
3
  module SearchSolrTools
2
4
  module Harvesters
3
5
  # Harvests the RDA feed
@@ -1,3 +1,5 @@
1
+ require_relative 'base'
2
+
1
3
  module SearchSolrTools
2
4
  module Harvesters
3
5
  # Harvests data from TDAR and inserts it into Solr after it has been translated
@@ -1,3 +1,6 @@
1
+ require_relative 'base'
2
+ require_relative '../helpers/csw_iso_query_builder'
3
+
1
4
  module SearchSolrTools
2
5
  module Harvesters
3
6
  # Harvests data from USGS and inserts it into Solr after it has been translated
@@ -1,6 +1,6 @@
1
- require_relative './iso_namespaces'
2
- require_relative './iso_to_solr_format'
3
- require_relative './solr_format'
1
+ require_relative 'iso_namespaces'
2
+ require_relative 'iso_to_solr_format'
3
+ require_relative 'solr_format'
4
4
 
5
5
  module SearchSolrTools
6
6
  module Helpers
@@ -0,0 +1,66 @@
1
+ module SearchSolrTools
2
+ module Helpers
3
+ class HarvestStatus
4
+
5
+ INGEST_OK = :ok
6
+ HARVEST_NO_DOCS = :harvest_none
7
+ HARVEST_FAILURE = :harvest_fail
8
+ INGEST_ERR_INVALID_DOC = :invalid
9
+ INGEST_ERR_SOLR_ERROR = :solr_error
10
+ OTHER_ERROR = :other
11
+ PING_SOLR = :ping_solr # used for initialize only
12
+ PING_SOURCE = :ping_source # used for initialize only
13
+
14
+ ERROR_STATUS = [HARVEST_NO_DOCS, HARVEST_FAILURE, INGEST_ERR_INVALID_DOC, INGEST_ERR_SOLR_ERROR, OTHER_ERROR]
15
+
16
+ # init_info is an optional hash that contains the various status keys and the documents to
17
+ # associate with them
18
+ def initialize(init_info={})
19
+ @status = { INGEST_OK => [] }
20
+ @ping_solr = true
21
+ @ping_source = true
22
+ ERROR_STATUS.each { |s| @status[s] = [] }
23
+
24
+ init_info.each do |key, docs|
25
+ @status[key] = docs if @status.include? key
26
+ end
27
+
28
+ @ping_solr = init_info[PING_SOLR] if init_info.include? PING_SOLR
29
+ @ping_source = init_info[PING_SOURCE] if init_info.include? PING_SOURCE
30
+ end
31
+
32
+ def record_multiple_document_status(documents, doc_status)
33
+ documents.each { |d| record_document_status d, doc_status }
34
+ end
35
+
36
+ def record_document_status(document, doc_status)
37
+ @status[doc_status] << document
38
+ end
39
+
40
+ def ping_solr=(newval)
41
+ @ping_solr = newval
42
+ end
43
+
44
+ def ping_source=(newval)
45
+ @ping_source = newval
46
+ end
47
+
48
+ def ok?
49
+ ERROR_STATUS.each { |s| return false unless @status[s].empty? }
50
+ @ping_solr && @ping_source
51
+ end
52
+
53
+ def ping_solr
54
+ @ping_solr
55
+ end
56
+
57
+ def ping_source
58
+ @ping_source
59
+ end
60
+
61
+ def documents_with_status(doc_status)
62
+ @status[doc_status]
63
+ end
64
+ end
65
+ end
66
+ end
@@ -1,3 +1,4 @@
1
+ require_relative 'selectors'
1
2
  require 'nokogiri'
2
3
 
3
4
  module SearchSolrTools
@@ -1,7 +1,7 @@
1
1
  require 'date'
2
2
 
3
- require_relative './iso_namespaces'
4
- require_relative './solr_format'
3
+ require_relative 'iso_namespaces'
4
+ require_relative 'solr_format'
5
5
 
6
6
  module SearchSolrTools
7
7
  module Helpers
@@ -1,8 +1,8 @@
1
1
  require 'date'
2
2
 
3
- require_relative './iso_namespaces'
4
- require_relative './solr_format'
5
- require_relative './iso_to_solr_format'
3
+ require_relative 'iso_namespaces'
4
+ require_relative 'solr_format'
5
+ require_relative 'iso_to_solr_format'
6
6
 
7
7
  module SearchSolrTools
8
8
  module Helpers
@@ -1,6 +1,6 @@
1
- require_relative './iso_namespaces'
2
- require_relative './iso_to_solr_format'
3
- require_relative './solr_format'
1
+ require_relative 'iso_namespaces'
2
+ require_relative 'iso_to_solr_format'
3
+ require_relative 'solr_format'
4
4
 
5
5
  module SearchSolrTools
6
6
  module Helpers
@@ -1,5 +1,4 @@
1
- require 'require_all'
2
- require_rel '../selectors'
1
+ Dir[File.join(__dir__, '..', 'selectors', '*.rb')].each { |file| require file }
3
2
 
4
3
  module SearchSolrTools
5
4
  module Helpers
@@ -2,6 +2,7 @@ require 'date'
2
2
  require 'iso8601'
3
3
 
4
4
  require_relative 'bounding_box_util'
5
+ require_relative 'facet_configuration'
5
6
 
6
7
  module SearchSolrTools
7
8
  module Helpers
@@ -1,6 +1,6 @@
1
- require_relative './iso_namespaces'
2
- require_relative './iso_to_solr_format'
3
- require_relative './solr_format'
1
+ require_relative 'iso_namespaces'
2
+ require_relative 'iso_to_solr_format'
3
+ require_relative 'solr_format'
4
4
 
5
5
  module SearchSolrTools
6
6
  module Helpers
@@ -1,7 +1,7 @@
1
1
  require 'rgeo/geo_json'
2
2
 
3
- require 'search_solr_tools/helpers/bounding_box_util'
4
- require 'search_solr_tools/helpers/iso_to_solr_format'
3
+ require_relative 'bounding_box_util'
4
+ require_relative 'iso_to_solr_format'
5
5
 
6
6
  module SearchSolrTools
7
7
  module Helpers
@@ -1,6 +1,6 @@
1
1
  require 'rgeo/geo_json'
2
2
 
3
- require 'search_solr_tools/helpers/solr_format'
3
+ require_relative 'solr_format'
4
4
 
5
5
  module SearchSolrTools
6
6
  module Helpers
@@ -1,5 +1,5 @@
1
- require_relative './iso_namespaces'
2
- require_relative './iso_to_solr_format'
1
+ require_relative 'iso_namespaces'
2
+ require_relative 'iso_to_solr_format'
3
3
 
4
4
  module SearchSolrTools
5
5
  module Helpers
@@ -1,4 +1,5 @@
1
- require 'search_solr_tools'
1
+ require_relative '../helpers/solr_format'
2
+ require_relative '../helpers/data_one_format'
2
3
 
3
4
  module SearchSolrTools
4
5
  module Selectors
@@ -1,4 +1,5 @@
1
- require 'search_solr_tools'
1
+ require_relative '../helpers/data_one_format'
2
+ require_relative '../helpers/solr_format'
2
3
 
3
4
  module SearchSolrTools
4
5
  module Selectors
@@ -1,4 +1,5 @@
1
- require 'search_solr_tools'
1
+ require_relative '../helpers/solr_format'
2
+ require_relative '../helpers/iso_to_solr_format'
2
3
 
3
4
  module SearchSolrTools
4
5
  module Selectors
@@ -1,4 +1,5 @@
1
- require 'search_solr_tools'
1
+ require_relative '../helpers/solr_format'
2
+ require_relative '../helpers/iso_to_solr_format'
2
3
 
3
4
  module SearchSolrTools
4
5
  module Selectors
@@ -1,4 +1,5 @@
1
- require 'search_solr_tools'
1
+ require_relative '../helpers/solr_format'
2
+ require_relative '../helpers/ncdc_paleo_format'
2
3
 
3
4
  module SearchSolrTools
4
5
  module Selectors
@@ -1,4 +1,5 @@
1
- require 'search_solr_tools'
1
+ require_relative '../helpers/solr_format'
2
+ require_relative '../helpers/iso_to_solr_format'
2
3
 
3
4
  module SearchSolrTools
4
5
  module Selectors
@@ -1,4 +1,5 @@
1
- require 'search_solr_tools'
1
+ require_relative '../helpers/solr_format'
2
+ require_relative '../helpers/iso_to_solr_format'
2
3
 
3
4
  module SearchSolrTools
4
5
  module Selectors
@@ -1,4 +1,5 @@
1
- require 'search_solr_tools'
1
+ require_relative '../helpers/solr_format'
2
+ require_relative '../helpers/iso_to_solr_format'
2
3
 
3
4
  module SearchSolrTools
4
5
  module Selectors
@@ -1,4 +1,6 @@
1
- require 'search_solr_tools'
1
+ require_relative '../helpers/solr_format'
2
+ require_relative '../helpers/iso_to_solr_format'
3
+ require_relative '../helpers/r2r_format'
2
4
 
3
5
  module SearchSolrTools
4
6
  module Selectors
@@ -1,4 +1,5 @@
1
- require 'search_solr_tools'
1
+ require_relative '../helpers/solr_format'
2
+ require_relative '../helpers/iso_to_solr_format'
2
3
 
3
4
  module SearchSolrTools
4
5
  module Selectors
@@ -1,4 +1,6 @@
1
- require 'search_solr_tools'
1
+ require_relative '../helpers/solr_format'
2
+ require_relative '../helpers/iso_to_solr_format'
3
+ require_relative '../helpers/tdar_format'
2
4
 
3
5
  module SearchSolrTools
4
6
  module Selectors