search_solr_tools 6.1.0 → 6.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/bin/search_solr_tools +1 -13
  4. data/lib/search_solr_tools/config/environments.yaml +0 -32
  5. data/lib/search_solr_tools/harvesters/base.rb +0 -1
  6. data/lib/search_solr_tools/helpers/solr_format.rb +0 -15
  7. data/lib/search_solr_tools/helpers/translate_spatial_coverage.rb +0 -1
  8. data/lib/search_solr_tools/version.rb +1 -1
  9. data/lib/search_solr_tools.rb +1 -2
  10. metadata +2 -44
  11. data/lib/search_solr_tools/harvesters/adc.rb +0 -49
  12. data/lib/search_solr_tools/harvesters/ade_auto_suggest.rb +0 -46
  13. data/lib/search_solr_tools/harvesters/bcodmo.rb +0 -64
  14. data/lib/search_solr_tools/harvesters/data_one.rb +0 -49
  15. data/lib/search_solr_tools/harvesters/echo.rb +0 -52
  16. data/lib/search_solr_tools/harvesters/eol.rb +0 -51
  17. data/lib/search_solr_tools/harvesters/gtnp.rb +0 -67
  18. data/lib/search_solr_tools/harvesters/ices.rb +0 -58
  19. data/lib/search_solr_tools/harvesters/ncdc_paleo.rb +0 -62
  20. data/lib/search_solr_tools/harvesters/nmi.rb +0 -34
  21. data/lib/search_solr_tools/harvesters/nodc.rb +0 -75
  22. data/lib/search_solr_tools/harvesters/oai.rb +0 -62
  23. data/lib/search_solr_tools/harvesters/pdc.rb +0 -40
  24. data/lib/search_solr_tools/harvesters/r2r.rb +0 -61
  25. data/lib/search_solr_tools/harvesters/rda.rb +0 -35
  26. data/lib/search_solr_tools/harvesters/tdar.rb +0 -71
  27. data/lib/search_solr_tools/harvesters/usgs.rb +0 -76
  28. data/lib/search_solr_tools/helpers/csw_iso_query_builder.rb +0 -29
  29. data/lib/search_solr_tools/helpers/data_one_format.rb +0 -74
  30. data/lib/search_solr_tools/helpers/iso_to_solr.rb +0 -97
  31. data/lib/search_solr_tools/helpers/iso_to_solr_format.rb +0 -197
  32. data/lib/search_solr_tools/helpers/ncdc_paleo_format.rb +0 -61
  33. data/lib/search_solr_tools/helpers/query_builder.rb +0 -13
  34. data/lib/search_solr_tools/helpers/r2r_format.rb +0 -25
  35. data/lib/search_solr_tools/helpers/selectors.rb +0 -22
  36. data/lib/search_solr_tools/helpers/tdar_format.rb +0 -70
  37. data/lib/search_solr_tools/helpers/usgs_format.rb +0 -50
  38. data/lib/search_solr_tools/selectors/adc.rb +0 -96
  39. data/lib/search_solr_tools/selectors/data_one.rb +0 -96
  40. data/lib/search_solr_tools/selectors/echo_iso.rb +0 -112
  41. data/lib/search_solr_tools/selectors/ices_iso.rb +0 -108
  42. data/lib/search_solr_tools/selectors/ncdc_paleo.rb +0 -90
  43. data/lib/search_solr_tools/selectors/nmi.rb +0 -107
  44. data/lib/search_solr_tools/selectors/nodc_iso.rb +0 -108
  45. data/lib/search_solr_tools/selectors/pdc_iso.rb +0 -109
  46. data/lib/search_solr_tools/selectors/r2r.rb +0 -115
  47. data/lib/search_solr_tools/selectors/rda.rb +0 -107
  48. data/lib/search_solr_tools/selectors/tdar_opensearch.rb +0 -91
  49. data/lib/search_solr_tools/selectors/usgs_iso.rb +0 -107
  50. data/lib/search_solr_tools/translators/bcodmo_json.rb +0 -89
  51. data/lib/search_solr_tools/translators/eol_to_solr.rb +0 -84
  52. data/lib/search_solr_tools/translators/gtnp_json.rb +0 -59
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cded5837c68e69c4dd22f9aea6b3efab1d4cc21570daba1b91b9561d7cde1216
4
- data.tar.gz: 73a840e47c0cbc41a3fb13e62ca85735e20d87dc9f4b0acfd3d522ef1874ad34
3
+ metadata.gz: afbece000e765162dcb622479c3b9da58b534e4609b83ccb194a719ae5bb9c03
4
+ data.tar.gz: 012cb04bdced985254701d6f50fc99340c909fcff8000263c184dd9519282bbf
5
5
  SHA512:
6
- metadata.gz: 5056c01da21c54b3b4c86ddef264d2e0f9dbe83db472677ac1076a31a5e42e97a6ad0cd0a037b0431374a2d243bfc398dfcfa631863d3d4f3b3b334751b6e2c6
7
- data.tar.gz: 39418c870ebada693b8ef2b75460d04e8ed0d1f9764e534d2195fa59fb5daaec4ae002ae4f016d05449a1a8a593b7e931261965b5756c664c6edfc7c94e6e3b1
6
+ metadata.gz: 9b0c53d6ff652e840a8014f1ee0e31db62ad5faba634cd7aebf66ac90cd9a6865953ed081169f4b52ca96bf3ac135954a26d8aa6e5e7aa2bc1cf3298f946f260
7
+ data.tar.gz: 3975437ccf9540013f5c74e71a8e6f676273aab0e4b8a9a78df853b0010120b660f06a7a6ae630d6b40c6c6b8e56fc01b7562a6e2127508218c716e5ff4b41fc
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## v6.2.0 (2022-07-18)
2
+
3
+ - Remove deprecated harvesters and associated tests, helpers, etc.
4
+
1
5
  ## v6.1.0 (2022-07-14)
2
6
 
3
7
  - Updated a few other dependencies that weren't at the newest versions.
@@ -120,20 +120,8 @@ class SolrHarvestCLI < Thor
120
120
  no_tasks do
121
121
  def harvester_map
122
122
  {
123
- 'bco_dmo' => SearchSolrTools::Harvesters::BcoDmo,
124
- 'adc' => SearchSolrTools::Harvesters::Adc,
125
- 'echo' => SearchSolrTools::Harvesters::Echo,
126
- 'ices' => SearchSolrTools::Harvesters::Ices,
127
- 'nmi' => SearchSolrTools::Harvesters::Nmi,
128
- 'nodc' => SearchSolrTools::Harvesters::Nodc,
129
- 'r2r' => SearchSolrTools::Harvesters::R2R,
130
- 'rda' => SearchSolrTools::Harvesters::Rda,
131
- 'usgs' => SearchSolrTools::Harvesters::Usgs,
132
- 'tdar' => SearchSolrTools::Harvesters::Tdar,
133
- 'pdc' => SearchSolrTools::Harvesters::Pdc,
134
123
  'nsidc' => SearchSolrTools::Harvesters::NsidcJson,
135
- 'nsidc_auto_suggest' => SearchSolrTools::Harvesters::NsidcAutoSuggest,
136
- 'ade_auto_suggest' => SearchSolrTools::Harvesters::AdeAutoSuggest
124
+ 'nsidc_auto_suggest' => SearchSolrTools::Harvesters::NsidcAutoSuggest
137
125
  }
138
126
  end
139
127
 
@@ -3,38 +3,6 @@
3
3
  :collection_name: nsidc_oai
4
4
  :collection_path: solr
5
5
  :port: 8983
6
- :bcodmo_url: http://www.bco-dmo.org/nsidc/arctic-deployments.json
7
- :adc_url: https://arcticdata.io/metacat/d1/mn/v2/query/solr/select?q=northBoundCoord:%5B45.0%20TO%2090.0%5D
8
- :data_one_url: https://cn.dataone.org/cn/v1/query/solr/select?q=northBoundCoord:%5B45.0%20TO%2090.0%5D
9
- :echo_url: https://api.echo.nasa.gov/catalog-rest/echo_catalog/datasets.echo10?bounding_box=-180,45,180,90
10
- :gtnp:
11
- - http://www.gtnpdatabase.org/rest/boreholes/json
12
- - http://www.gtnpdatabase.org/rest/activelayers/json
13
- :ices_url: http://geo.ices.dk/geonetwork/srv/en/csw
14
- :ncdc_paleo_url: https://gis.ncdc.noaa.gov/gptpaleo/csw
15
- :nmi_url: http://arcticdata.met.no/metamod/oai
16
- :nodc_url: https://data.nodc.noaa.gov/geoportal/csw
17
- :pdc_url: http://www.polardata.ca/oai/provider
18
- :rda_url: https://rda.ucar.edu/cgi-bin/oai
19
- :tdar_url: http://core.tdar.org/search/rss
20
- :usgs_url: https://www.sciencebase.gov/catalog/item/527cf4ede4b0850ea05182ee/csw
21
- :eol:
22
- - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.SHEBA.thredds.xml
23
- - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.SBI.thredds.xml
24
- - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.PacMARS.thredds.xml
25
- - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BASE.thredds.xml
26
- - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.ATLAS.thredds.xml
27
- - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.ARC_MIP.thredds.xml
28
- - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.AMTS.thredds.xml
29
- - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BOREAS.thredds.xml
30
- - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BeringSea.thredds.xml
31
- - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.ARCSS.thredds.xml
32
- - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BEST.thredds.xml
33
- - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BSIERP.thredds.xml
34
- - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BARROW.thredds.xml
35
- - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.DBO.thredds.xml
36
- - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.ITEX.thredds.xml
37
- :r2r_url: http://get.rvdata.us/services/cruise/
38
6
 
39
7
  # Not using DCS API v2 here because not all retired datasets have their "retired"
40
8
  # flag checked. For example, GLA01.033 is retired; GLA01.018 is not, but it
@@ -8,7 +8,6 @@ require 'time'
8
8
  require 'search_solr_tools'
9
9
  require_relative '../helpers/iso_namespaces'
10
10
  require_relative '../helpers/solr_format'
11
- require_relative '../helpers/iso_to_solr'
12
11
 
13
12
 
14
13
  module SearchSolrTools
@@ -10,22 +10,7 @@ module SearchSolrTools
10
10
  # rubocop:disable Metrics/ModuleLength
11
11
  module SolrFormat
12
12
  DATA_CENTER_NAMES = {
13
- BCODMO: { short_name: 'BCO-DMO', long_name: 'Biological and Chemical Oceanography Data Management Office' },
14
- ADC: { short_name: 'NSF ADC', long_name: 'NSF Arctic Data Center' },
15
- DATA_ONE: { short_name: 'DataONE', long_name: 'DataONE' },
16
- ECHO: { short_name: 'NASA ECHO', long_name: 'NASA Earth Observing System (EOS) Clearing House (ECHO)' },
17
- EOL: { short_name: 'UCAR NCAR EOL', long_name: 'UCAR NCAR - Earth Observing Laboratory' },
18
- GTNP: { short_name: 'GTN-P', long_name: 'Global Terrestrial Network for Permafrost' },
19
- ICES: { short_name: 'ICES', long_name: 'International Council for the Exploration of the Sea' },
20
- NCDC_PALEO: { short_name: 'NOAA WDS Paleo', long_name: 'NOAA’s National Centers for Environmental Information, World Data Service for Paleoclimatology' },
21
- NMI: { short_name: 'Met.no', long_name: 'Norwegian Meteorological Institute' },
22
- NODC: { short_name: 'NOAA NODC', long_name: 'NOAA National Oceanographic Data Center' },
23
13
  NSIDC: { short_name: 'NSIDC', long_name: 'National Snow and Ice Data Center' },
24
- PDC: { short_name: 'PDC', long_name: 'Polar Data Catalogue' },
25
- R2R: { short_name: 'R2R', long_name: 'Rolling Deck to Repository' },
26
- RDA: { short_name: 'UCAR NCAR RDA', long_name: 'UCAR NCAR Research Data Archive' },
27
- TDAR: { short_name: 'tDAR', long_name: 'tDAR: The Digital Archaeological Record' },
28
- USGS: { short_name: 'USGS ScienceBase', long_name: 'U.S. Geological Survey ScienceBase' }
29
14
  }
30
15
 
31
16
  NOT_SPECIFIED = 'Not specified'
@@ -1,7 +1,6 @@
1
1
  require 'rgeo/geo_json'
2
2
 
3
3
  require_relative 'bounding_box_util'
4
- require_relative 'iso_to_solr_format'
5
4
 
6
5
  module SearchSolrTools
7
6
  module Helpers
@@ -1,3 +1,3 @@
1
1
  module SearchSolrTools
2
- VERSION = '6.1.0'
2
+ VERSION = '6.2.0'
3
3
  end
@@ -1,10 +1,9 @@
1
1
  require_relative 'search_solr_tools/config/environments'
2
2
  require_relative 'search_solr_tools/version'
3
3
 
4
- require_relative 'search_solr_tools/helpers/selectors'
5
4
  require_relative 'search_solr_tools/helpers/harvest_status'
6
5
  require_relative 'search_solr_tools/errors/harvest_error'
7
6
 
8
- %w( selectors harvesters translators ).each do |subdir|
7
+ %w( harvesters translators ).each do |subdir|
9
8
  Dir[File.join(__dir__, 'search_solr_tools', subdir, '*.rb')].each { |file| require file }
10
9
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: search_solr_tools
3
3
  version: !ruby/object:Gem::Version
4
- version: 6.1.0
4
+ version: 6.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chris Chalstrom
@@ -14,7 +14,7 @@ authors:
14
14
  autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
- date: 2023-07-17 00:00:00.000000000 Z
17
+ date: 2023-07-18 00:00:00.000000000 Z
18
18
  dependencies:
19
19
  - !ruby/object:Gem::Dependency
20
20
  name: ffi-geos
@@ -321,59 +321,17 @@ files:
321
321
  - lib/search_solr_tools/config/environments.rb
322
322
  - lib/search_solr_tools/config/environments.yaml
323
323
  - lib/search_solr_tools/errors/harvest_error.rb
324
- - lib/search_solr_tools/harvesters/adc.rb
325
- - lib/search_solr_tools/harvesters/ade_auto_suggest.rb
326
324
  - lib/search_solr_tools/harvesters/auto_suggest.rb
327
325
  - lib/search_solr_tools/harvesters/base.rb
328
- - lib/search_solr_tools/harvesters/bcodmo.rb
329
- - lib/search_solr_tools/harvesters/data_one.rb
330
- - lib/search_solr_tools/harvesters/echo.rb
331
- - lib/search_solr_tools/harvesters/eol.rb
332
- - lib/search_solr_tools/harvesters/gtnp.rb
333
- - lib/search_solr_tools/harvesters/ices.rb
334
- - lib/search_solr_tools/harvesters/ncdc_paleo.rb
335
- - lib/search_solr_tools/harvesters/nmi.rb
336
- - lib/search_solr_tools/harvesters/nodc.rb
337
326
  - lib/search_solr_tools/harvesters/nsidc_auto_suggest.rb
338
327
  - lib/search_solr_tools/harvesters/nsidc_json.rb
339
- - lib/search_solr_tools/harvesters/oai.rb
340
- - lib/search_solr_tools/harvesters/pdc.rb
341
- - lib/search_solr_tools/harvesters/r2r.rb
342
- - lib/search_solr_tools/harvesters/rda.rb
343
- - lib/search_solr_tools/harvesters/tdar.rb
344
- - lib/search_solr_tools/harvesters/usgs.rb
345
328
  - lib/search_solr_tools/helpers/bounding_box_util.rb
346
- - lib/search_solr_tools/helpers/csw_iso_query_builder.rb
347
- - lib/search_solr_tools/helpers/data_one_format.rb
348
329
  - lib/search_solr_tools/helpers/facet_configuration.rb
349
330
  - lib/search_solr_tools/helpers/harvest_status.rb
350
331
  - lib/search_solr_tools/helpers/iso_namespaces.rb
351
- - lib/search_solr_tools/helpers/iso_to_solr.rb
352
- - lib/search_solr_tools/helpers/iso_to_solr_format.rb
353
- - lib/search_solr_tools/helpers/ncdc_paleo_format.rb
354
- - lib/search_solr_tools/helpers/query_builder.rb
355
- - lib/search_solr_tools/helpers/r2r_format.rb
356
- - lib/search_solr_tools/helpers/selectors.rb
357
332
  - lib/search_solr_tools/helpers/solr_format.rb
358
- - lib/search_solr_tools/helpers/tdar_format.rb
359
333
  - lib/search_solr_tools/helpers/translate_spatial_coverage.rb
360
334
  - lib/search_solr_tools/helpers/translate_temporal_coverage.rb
361
- - lib/search_solr_tools/helpers/usgs_format.rb
362
- - lib/search_solr_tools/selectors/adc.rb
363
- - lib/search_solr_tools/selectors/data_one.rb
364
- - lib/search_solr_tools/selectors/echo_iso.rb
365
- - lib/search_solr_tools/selectors/ices_iso.rb
366
- - lib/search_solr_tools/selectors/ncdc_paleo.rb
367
- - lib/search_solr_tools/selectors/nmi.rb
368
- - lib/search_solr_tools/selectors/nodc_iso.rb
369
- - lib/search_solr_tools/selectors/pdc_iso.rb
370
- - lib/search_solr_tools/selectors/r2r.rb
371
- - lib/search_solr_tools/selectors/rda.rb
372
- - lib/search_solr_tools/selectors/tdar_opensearch.rb
373
- - lib/search_solr_tools/selectors/usgs_iso.rb
374
- - lib/search_solr_tools/translators/bcodmo_json.rb
375
- - lib/search_solr_tools/translators/eol_to_solr.rb
376
- - lib/search_solr_tools/translators/gtnp_json.rb
377
335
  - lib/search_solr_tools/translators/nsidc_json.rb
378
336
  - lib/search_solr_tools/version.rb
379
337
  - search_solr_tools.gemspec
@@ -1,49 +0,0 @@
1
- require_relative 'base'
2
-
3
- module SearchSolrTools
4
- module Harvesters
5
- class Adc < Base
6
- def initialize(env = 'development', die_on_failure = false)
7
- super
8
- @page_size = 250
9
- @translator = Helpers::IsoToSolr.new :adc
10
- end
11
-
12
- def harvest_and_delete
13
- puts "Running harvest of adc catalog from #{metadata_url}"
14
- super(method(:harvest_adc_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:ADC][:long_name]}\"")
15
- end
16
-
17
- def harvest_adc_into_solr
18
- start = 0
19
- while (entries = get_results_from_adc(start)) && (entries.length > 0)
20
- begin
21
- insert_solr_docs(get_docs_with_translated_entries_from_adc(entries))
22
- rescue => e
23
- puts "ERROR: #{e}\n\n"
24
- raise e if @die_on_failure
25
- end
26
- start += @page_size
27
- end
28
- end
29
-
30
- def get_results_from_adc(start)
31
- get_results(build_request(start, @page_size), './response/result/doc')
32
- end
33
-
34
- def metadata_url
35
- SolrEnvironments[@environment][:adc_url]
36
- end
37
-
38
- def get_docs_with_translated_entries_from_adc(entries)
39
- entries.map do |e|
40
- create_new_solr_add_doc_with_child(@translator.translate(e).root)
41
- end
42
- end
43
-
44
- def build_request(start = 0, max_records = 100)
45
- "#{metadata_url}&start=#{start}&rows=#{max_records}"
46
- end
47
- end
48
- end
49
- end
@@ -1,46 +0,0 @@
1
- require_relative 'auto_suggest'
2
-
3
- module SearchSolrTools
4
- module Harvesters
5
- class AdeAutoSuggest < AutoSuggest
6
- def harvest_and_delete
7
- puts 'Building auto-suggest indexes for ADE'
8
- super(method(:harvest), 'source:"ADE"', @env_settings[:auto_suggest_collection_name])
9
- end
10
-
11
- def harvest
12
- url = "#{solr_url}/#{@env_settings[:collection_name]}/select?q=*%3A*&fq=source%3AADE&fq=spatial:[45.0,-180.0+TO+90.0,180.0]&rows=0&wt=json&indent=true&facet=true&facet.mincount=1&facet.sort=count&facet.limit=-1"
13
- super url, fields
14
- end
15
-
16
- def fields
17
- {
18
- 'full_keywords_and_parameters' => { weight: 2, source: 'ADE', creator: method(:keyword_creator) },
19
- 'full_authors' => { weight: 1, source: 'ADE', creator: method(:author_creator) }
20
- }
21
- end
22
-
23
- def split_creator(value, count, field_weight, source, split_regex)
24
- add_docs = []
25
- value.downcase.split(split_regex).each do |v|
26
- v = v.strip.chomp('/')
27
- add_docs.concat(ade_length_limit_creator(v, count, field_weight, source)) unless v.nil? || v.empty?
28
- end
29
- add_docs
30
- end
31
-
32
- def keyword_creator(value, count, field_weight, source)
33
- split_creator value, count, field_weight, source, %r{/ [\/ \>]+ /}
34
- end
35
-
36
- def author_creator(value, count, field_weight, source)
37
- split_creator value, count, field_weight, source, %r{/;/}
38
- end
39
-
40
- def ade_length_limit_creator(value, count, field_weight, source)
41
- return [] if value.length > 80
42
- standard_add_creator value, count, field_weight, source
43
- end
44
- end
45
- end
46
- end
@@ -1,64 +0,0 @@
1
- require_relative 'base'
2
- require 'json'
3
- require 'rest-client'
4
-
5
- module SearchSolrTools
6
- module Harvesters
7
- # Harvests data from BcoDmo endpoint, translates and adds it to solr
8
- class BcoDmo < Base
9
- def initialize(env = 'development', die_on_failure = false)
10
- super env, die_on_failure
11
- @translator = Translators::BcodmoJsonToSolr.new
12
- @wkt_parser = RGeo::WKRep::WKTParser.new(nil, {}) # (factory_generator_=nil,
13
- end
14
-
15
- def harvest_and_delete
16
- puts "Running harvest of BCO-DMO catalog from #{bcodmo_url}"
17
- super(method(:harvest_bcodmo_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:BCODMO][:long_name]}\"")
18
- end
19
-
20
- def bcodmo_url
21
- SolrEnvironments[@environment][:bcodmo_url]
22
- end
23
-
24
- def harvest_bcodmo_into_solr
25
- result = translate_bcodmo
26
- insert_solr_docs(result[:add_docs], Base::JSON_CONTENT_TYPE)
27
-
28
- errors_exist = result[:failure_ids].length > 0
29
- fail 'Failed to harvest some records from BCO-DMO' if errors_exist && @die_on_failure
30
- end
31
-
32
- def translate_bcodmo
33
- documents = []
34
- failure_ids = []
35
- request_json(SolrEnvironments[@environment][:bcodmo_url]).each do |record|
36
- geometry = request_json(record['geometryUrl'])
37
- results = parse_record(record, geometry)
38
- results[:documents].each { |d| documents << d }
39
- results[:failure_ids].each { |id| failure_ids << id }
40
- end
41
- { add_docs: documents, failure_ids: failure_ids }
42
- end
43
-
44
- def request_json(url)
45
- puts "Request: #{url}"
46
- JSON.parse(RestClient.get(url))
47
- end
48
-
49
- def parse_record(record, geometry)
50
- documents = []
51
- failure_ids = []
52
- begin
53
- JSON.parse(RestClient.get(record['datasets'])).each do |dataset|
54
- documents << { 'add' => { 'doc' => @translator.translate(dataset, record, geometry) } }
55
- end
56
- rescue => e
57
- puts "Failed to add record #{record['id']} with error #{e} (#{e.message}) : #{e.backtrace.join("\n")}"
58
- failure_ids << record['id']
59
- end
60
- { documents: documents, failure_ids: failure_ids }
61
- end
62
- end
63
- end
64
- end
@@ -1,49 +0,0 @@
1
- require_relative 'base'
2
-
3
- module SearchSolrTools
4
- module Harvesters
5
- class DataOne < Base
6
- def initialize(env = 'development', die_on_failure = false)
7
- super
8
- @page_size = 250
9
- @translator = Helpers::IsoToSolr.new :data_one
10
- end
11
-
12
- def harvest_and_delete
13
- puts "Running harvest of dataONE catalog from #{metadata_url}"
14
- super(method(:harvest_data_one_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:DATA_ONE][:long_name]}\"")
15
- end
16
-
17
- def harvest_data_one_into_solr
18
- start = 0
19
- while (entries = get_results_from_data_one(start)) && (entries.length > 0)
20
- begin
21
- insert_solr_docs(get_docs_with_translated_entries_from_data_one(entries))
22
- rescue => e
23
- puts "ERROR: #{e}\n\n"
24
- raise e if @die_on_failure
25
- end
26
- start += @page_size
27
- end
28
- end
29
-
30
- def get_results_from_data_one(start)
31
- get_results(build_request(start, @page_size), './response/result/doc')
32
- end
33
-
34
- def metadata_url
35
- SolrEnvironments[@environment][:data_one_url]
36
- end
37
-
38
- def get_docs_with_translated_entries_from_data_one(entries)
39
- entries.map do |e|
40
- create_new_solr_add_doc_with_child(@translator.translate(e).root)
41
- end
42
- end
43
-
44
- def build_request(start = 0, max_records = 100)
45
- "#{metadata_url}&start=#{start}&rows=#{max_records}"
46
- end
47
- end
48
- end
49
- end
@@ -1,52 +0,0 @@
1
- require_relative 'base'
2
-
3
- module SearchSolrTools
4
- module Harvesters
5
- # Harvests data from ECHO and inserts it into Solr after it has been translated
6
- class Echo < Base
7
- def initialize(env = 'development', die_on_failure = false)
8
- super env, die_on_failure
9
- @page_size = 100
10
- @translator = Helpers::IsoToSolr.new :echo
11
- end
12
-
13
- def harvest_and_delete
14
- puts "Running harvest of ECHO catalog from #{echo_url}"
15
- super(method(:harvest_echo_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:ECHO][:long_name]}\"")
16
- end
17
-
18
- # get translated entries from ECHO and add them to Solr
19
- # this is the main entry point for the class
20
- def harvest_echo_into_solr
21
- page_num = 1
22
- while (entries = get_results_from_echo(page_num)) && (entries.length > 0)
23
- begin
24
- insert_solr_docs get_docs_with_translated_entries_from_echo(entries)
25
- rescue => e
26
- puts "ERROR: #{e}\n\n"
27
- raise e if @die_on_failure
28
- end
29
- page_num += 1
30
- end
31
- end
32
-
33
- def echo_url
34
- SolrEnvironments[@environment][:echo_url]
35
- end
36
-
37
- def get_results_from_echo(page_num)
38
- get_results build_request(@page_size, page_num), './/results/result', 'application/echo10+xml'
39
- end
40
-
41
- def get_docs_with_translated_entries_from_echo(entries)
42
- entries.map do |entry|
43
- create_new_solr_add_doc_with_child(@translator.translate(entry).root)
44
- end
45
- end
46
-
47
- def build_request(max_records = '25', page_num = '1')
48
- echo_url + '&page_size=' + max_records.to_s + '&page_num=' + page_num.to_s
49
- end
50
- end
51
- end
52
- end
@@ -1,51 +0,0 @@
1
- require_relative 'base'
2
- require 'json'
3
- require 'rgeo/geo_json'
4
-
5
- module SearchSolrTools
6
- module Harvesters
7
- class Eol < Base
8
- def initialize(env = 'development', die_on_failure = false)
9
- super env, die_on_failure
10
- @translator = SearchSolrTools::Translators::EolToSolr.new
11
- end
12
-
13
- def harvest_and_delete
14
- puts 'Running harvest of EOL catalog using the following configured EOL URLs:'
15
- SearchSolrTools::SolrEnvironments[:common][:eol].each { |x| puts x }
16
- super(method(:harvest_eol_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:EOL][:long_name]}\"")
17
- end
18
-
19
- def harvest_eol_into_solr
20
- solr_add_queries = eol_dataset_urls.map do |dataset|
21
- begin
22
- doc = open_xml_document(dataset)
23
- if doc.xpath('//xmlns:metadata').size > 1
24
- # THREDDS allows for a dataset of datasests, EOL should not utilize this
25
- fail "Complex dataset encountered at #{doc.xpath('//xmlns:catalog').to_html}"
26
- end
27
- metadata_doc = open_xml_document(doc.xpath('//xmlns:metadata')[0]['xlink:href'])
28
- { 'add' => { 'doc' => @translator.translate(doc, metadata_doc) } }
29
- rescue => e
30
- puts "ERROR: #{e}"
31
- puts "Failed to translate this record: #{doc} -> #{metadata_doc}"
32
- raise e if @die_on_failure
33
- next
34
- end
35
- end
36
- insert_solr_docs solr_add_queries, Base::JSON_CONTENT_TYPE
37
- end
38
-
39
- def eol_dataset_urls
40
- SearchSolrTools::SolrEnvironments[:common][:eol].flat_map do |endpoint|
41
- doc = open_xml_document(endpoint)
42
- doc.xpath('//xmlns:catalogRef').map { |node| node['xlink:href'] }
43
- end
44
- end
45
-
46
- def open_xml_document(url)
47
- Nokogiri::XML(open(url), &:strict)
48
- end
49
- end
50
- end
51
- end
@@ -1,67 +0,0 @@
1
- require_relative 'base'
2
- require 'json'
3
- require 'rest-client'
4
-
5
- module SearchSolrTools
6
- module Harvesters
7
- # Harvests data from GTN-P endpoints, translates and adds it to solr
8
- class GtnP < Base
9
- def initialize(env = 'development', die_on_failure = false)
10
- super env, die_on_failure
11
- @translator = Translators::GtnpJsonToSolr.new
12
- end
13
-
14
- def gtnp_service_urls
15
- json_records = []
16
- SearchSolrTools::SolrEnvironments[:common][:gtnp].flat_map do |endpoint|
17
- record = request_json(endpoint)
18
- json_records << record
19
- end
20
- json_records
21
- end
22
-
23
- def harvest_and_delete
24
- puts 'Running harvest of GTN-P catalog using the following configured GTN-P URLs:'
25
- SearchSolrTools::SolrEnvironments[:common][:gtnp].each { |x| puts x }
26
- super(method(:harvest_gtnp_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:GTNP][:long_name]}\"")
27
- end
28
-
29
- def harvest_gtnp_into_solr
30
- result = translate_gtnp
31
- insert_solr_docs result[:add_docs], Base::JSON_CONTENT_TYPE
32
- fail 'Failed to harvest some records from the provider' if result[:failure_ids].length > 0
33
- end
34
-
35
- def translate_gtnp
36
- documents = []
37
- failure_ids = []
38
- gtnp_records = gtnp_service_urls
39
- gtnp_records.each do |record|
40
- results = parse_record(record)
41
- results[:documents].each { |d| documents << d }
42
- results[:failure_ids].each { |id| failure_ids << id }
43
- end
44
- { add_docs: documents, failure_ids: failure_ids }
45
- end
46
-
47
- def request_json(url)
48
- JSON.parse(RestClient.get(url))
49
- end
50
-
51
- def parse_record(record)
52
- documents = []
53
- failure_ids = []
54
- begin
55
- record.drop(1).each do |dataset|
56
- trans_doc = @translator.translate(dataset, record[0])
57
- documents << { 'add' => { 'doc' => trans_doc } }
58
- end
59
- rescue => e
60
- puts "Failed to add record #{record[0][:title]} with error #{e} (#{e.message}) : #{e.backtrace.join("\n")}"
61
- failure_ids << record[0][:title]
62
- end
63
- { documents: documents, failure_ids: failure_ids }
64
- end
65
- end
66
- end
67
- end
@@ -1,58 +0,0 @@
1
- require_relative 'base'
2
- require_relative '../helpers/csw_iso_query_builder'
3
-
4
- module SearchSolrTools
5
- module Harvesters
6
- # Harvests data from ICES and inserts it into Solr after it has been translated
7
- class Ices < Base
8
- def initialize(env = 'development', die_on_failure = false)
9
- super env, die_on_failure
10
- @page_size = 100
11
- @translator = Helpers::IsoToSolr.new :ices
12
- end
13
-
14
- def harvest_and_delete
15
- puts "Running harvest of ICES catalog from #{ices_url}"
16
- super(method(:harvest_ices_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:ICES][:long_name]}\"")
17
- end
18
-
19
- # get translated entries from ICES and add them to Solr
20
- # this is the main entry point for the class
21
- def harvest_ices_into_solr
22
- start_index = 1
23
- while (entries = get_results_from_ices(start_index)) && (entries.length > 0)
24
- begin
25
- insert_solr_docs get_docs_with_translated_entries_from_ices(entries)
26
- rescue => e
27
- puts "ERROR: #{e}"
28
- raise e if @die_on_failure
29
- end
30
- start_index += @page_size
31
- end
32
- end
33
-
34
- def ices_url
35
- SolrEnvironments[@environment][:ices_url]
36
- end
37
-
38
- def get_results_from_ices(start_index)
39
- get_results build_csw_request('results', @page_size, start_index), '//gmd:MD_Metadata'
40
- end
41
-
42
- def get_docs_with_translated_entries_from_ices(entries)
43
- entries.map do |entry|
44
- create_new_solr_add_doc_with_child(@translator.translate(entry).root)
45
- end
46
- end
47
-
48
- def build_csw_request(resultType = 'results', maxRecords = '25', startPosition = '1')
49
- Helpers::CswIsoQueryBuilder.get_query_string(ices_url,
50
- 'resultType' => resultType,
51
- 'maxRecords' => maxRecords,
52
- 'startPosition' => startPosition,
53
- 'constraintLanguage' => 'CQL_TEXT',
54
- 'outputSchema' => 'http://www.isotc211.org/2005/gmd')
55
- end
56
- end
57
- end
58
- end