search_solr_tools 6.1.0 → 6.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/bin/search_solr_tools +1 -13
- data/lib/search_solr_tools/config/environments.yaml +0 -32
- data/lib/search_solr_tools/harvesters/base.rb +0 -1
- data/lib/search_solr_tools/helpers/solr_format.rb +0 -15
- data/lib/search_solr_tools/helpers/translate_spatial_coverage.rb +0 -1
- data/lib/search_solr_tools/version.rb +1 -1
- data/lib/search_solr_tools.rb +1 -2
- metadata +2 -44
- data/lib/search_solr_tools/harvesters/adc.rb +0 -49
- data/lib/search_solr_tools/harvesters/ade_auto_suggest.rb +0 -46
- data/lib/search_solr_tools/harvesters/bcodmo.rb +0 -64
- data/lib/search_solr_tools/harvesters/data_one.rb +0 -49
- data/lib/search_solr_tools/harvesters/echo.rb +0 -52
- data/lib/search_solr_tools/harvesters/eol.rb +0 -51
- data/lib/search_solr_tools/harvesters/gtnp.rb +0 -67
- data/lib/search_solr_tools/harvesters/ices.rb +0 -58
- data/lib/search_solr_tools/harvesters/ncdc_paleo.rb +0 -62
- data/lib/search_solr_tools/harvesters/nmi.rb +0 -34
- data/lib/search_solr_tools/harvesters/nodc.rb +0 -75
- data/lib/search_solr_tools/harvesters/oai.rb +0 -62
- data/lib/search_solr_tools/harvesters/pdc.rb +0 -40
- data/lib/search_solr_tools/harvesters/r2r.rb +0 -61
- data/lib/search_solr_tools/harvesters/rda.rb +0 -35
- data/lib/search_solr_tools/harvesters/tdar.rb +0 -71
- data/lib/search_solr_tools/harvesters/usgs.rb +0 -76
- data/lib/search_solr_tools/helpers/csw_iso_query_builder.rb +0 -29
- data/lib/search_solr_tools/helpers/data_one_format.rb +0 -74
- data/lib/search_solr_tools/helpers/iso_to_solr.rb +0 -97
- data/lib/search_solr_tools/helpers/iso_to_solr_format.rb +0 -197
- data/lib/search_solr_tools/helpers/ncdc_paleo_format.rb +0 -61
- data/lib/search_solr_tools/helpers/query_builder.rb +0 -13
- data/lib/search_solr_tools/helpers/r2r_format.rb +0 -25
- data/lib/search_solr_tools/helpers/selectors.rb +0 -22
- data/lib/search_solr_tools/helpers/tdar_format.rb +0 -70
- data/lib/search_solr_tools/helpers/usgs_format.rb +0 -50
- data/lib/search_solr_tools/selectors/adc.rb +0 -96
- data/lib/search_solr_tools/selectors/data_one.rb +0 -96
- data/lib/search_solr_tools/selectors/echo_iso.rb +0 -112
- data/lib/search_solr_tools/selectors/ices_iso.rb +0 -108
- data/lib/search_solr_tools/selectors/ncdc_paleo.rb +0 -90
- data/lib/search_solr_tools/selectors/nmi.rb +0 -107
- data/lib/search_solr_tools/selectors/nodc_iso.rb +0 -108
- data/lib/search_solr_tools/selectors/pdc_iso.rb +0 -109
- data/lib/search_solr_tools/selectors/r2r.rb +0 -115
- data/lib/search_solr_tools/selectors/rda.rb +0 -107
- data/lib/search_solr_tools/selectors/tdar_opensearch.rb +0 -91
- data/lib/search_solr_tools/selectors/usgs_iso.rb +0 -107
- data/lib/search_solr_tools/translators/bcodmo_json.rb +0 -89
- data/lib/search_solr_tools/translators/eol_to_solr.rb +0 -84
- data/lib/search_solr_tools/translators/gtnp_json.rb +0 -59
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: afbece000e765162dcb622479c3b9da58b534e4609b83ccb194a719ae5bb9c03
|
4
|
+
data.tar.gz: 012cb04bdced985254701d6f50fc99340c909fcff8000263c184dd9519282bbf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9b0c53d6ff652e840a8014f1ee0e31db62ad5faba634cd7aebf66ac90cd9a6865953ed081169f4b52ca96bf3ac135954a26d8aa6e5e7aa2bc1cf3298f946f260
|
7
|
+
data.tar.gz: 3975437ccf9540013f5c74e71a8e6f676273aab0e4b8a9a78df853b0010120b660f06a7a6ae630d6b40c6c6b8e56fc01b7562a6e2127508218c716e5ff4b41fc
|
data/CHANGELOG.md
CHANGED
data/bin/search_solr_tools
CHANGED
@@ -120,20 +120,8 @@ class SolrHarvestCLI < Thor
|
|
120
120
|
no_tasks do
|
121
121
|
def harvester_map
|
122
122
|
{
|
123
|
-
'bco_dmo' => SearchSolrTools::Harvesters::BcoDmo,
|
124
|
-
'adc' => SearchSolrTools::Harvesters::Adc,
|
125
|
-
'echo' => SearchSolrTools::Harvesters::Echo,
|
126
|
-
'ices' => SearchSolrTools::Harvesters::Ices,
|
127
|
-
'nmi' => SearchSolrTools::Harvesters::Nmi,
|
128
|
-
'nodc' => SearchSolrTools::Harvesters::Nodc,
|
129
|
-
'r2r' => SearchSolrTools::Harvesters::R2R,
|
130
|
-
'rda' => SearchSolrTools::Harvesters::Rda,
|
131
|
-
'usgs' => SearchSolrTools::Harvesters::Usgs,
|
132
|
-
'tdar' => SearchSolrTools::Harvesters::Tdar,
|
133
|
-
'pdc' => SearchSolrTools::Harvesters::Pdc,
|
134
123
|
'nsidc' => SearchSolrTools::Harvesters::NsidcJson,
|
135
|
-
'nsidc_auto_suggest' => SearchSolrTools::Harvesters::NsidcAutoSuggest
|
136
|
-
'ade_auto_suggest' => SearchSolrTools::Harvesters::AdeAutoSuggest
|
124
|
+
'nsidc_auto_suggest' => SearchSolrTools::Harvesters::NsidcAutoSuggest
|
137
125
|
}
|
138
126
|
end
|
139
127
|
|
@@ -3,38 +3,6 @@
|
|
3
3
|
:collection_name: nsidc_oai
|
4
4
|
:collection_path: solr
|
5
5
|
:port: 8983
|
6
|
-
:bcodmo_url: http://www.bco-dmo.org/nsidc/arctic-deployments.json
|
7
|
-
:adc_url: https://arcticdata.io/metacat/d1/mn/v2/query/solr/select?q=northBoundCoord:%5B45.0%20TO%2090.0%5D
|
8
|
-
:data_one_url: https://cn.dataone.org/cn/v1/query/solr/select?q=northBoundCoord:%5B45.0%20TO%2090.0%5D
|
9
|
-
:echo_url: https://api.echo.nasa.gov/catalog-rest/echo_catalog/datasets.echo10?bounding_box=-180,45,180,90
|
10
|
-
:gtnp:
|
11
|
-
- http://www.gtnpdatabase.org/rest/boreholes/json
|
12
|
-
- http://www.gtnpdatabase.org/rest/activelayers/json
|
13
|
-
:ices_url: http://geo.ices.dk/geonetwork/srv/en/csw
|
14
|
-
:ncdc_paleo_url: https://gis.ncdc.noaa.gov/gptpaleo/csw
|
15
|
-
:nmi_url: http://arcticdata.met.no/metamod/oai
|
16
|
-
:nodc_url: https://data.nodc.noaa.gov/geoportal/csw
|
17
|
-
:pdc_url: http://www.polardata.ca/oai/provider
|
18
|
-
:rda_url: https://rda.ucar.edu/cgi-bin/oai
|
19
|
-
:tdar_url: http://core.tdar.org/search/rss
|
20
|
-
:usgs_url: https://www.sciencebase.gov/catalog/item/527cf4ede4b0850ea05182ee/csw
|
21
|
-
:eol:
|
22
|
-
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.SHEBA.thredds.xml
|
23
|
-
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.SBI.thredds.xml
|
24
|
-
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.PacMARS.thredds.xml
|
25
|
-
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BASE.thredds.xml
|
26
|
-
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.ATLAS.thredds.xml
|
27
|
-
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.ARC_MIP.thredds.xml
|
28
|
-
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.AMTS.thredds.xml
|
29
|
-
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BOREAS.thredds.xml
|
30
|
-
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BeringSea.thredds.xml
|
31
|
-
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.ARCSS.thredds.xml
|
32
|
-
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BEST.thredds.xml
|
33
|
-
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BSIERP.thredds.xml
|
34
|
-
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BARROW.thredds.xml
|
35
|
-
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.DBO.thredds.xml
|
36
|
-
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.ITEX.thredds.xml
|
37
|
-
:r2r_url: http://get.rvdata.us/services/cruise/
|
38
6
|
|
39
7
|
# Not using DCS API v2 here because not all retired datasets have their "retired"
|
40
8
|
# flag checked. For example, GLA01.033 is retired; GLA01.018 is not, but it
|
@@ -10,22 +10,7 @@ module SearchSolrTools
|
|
10
10
|
# rubocop:disable Metrics/ModuleLength
|
11
11
|
module SolrFormat
|
12
12
|
DATA_CENTER_NAMES = {
|
13
|
-
BCODMO: { short_name: 'BCO-DMO', long_name: 'Biological and Chemical Oceanography Data Management Office' },
|
14
|
-
ADC: { short_name: 'NSF ADC', long_name: 'NSF Arctic Data Center' },
|
15
|
-
DATA_ONE: { short_name: 'DataONE', long_name: 'DataONE' },
|
16
|
-
ECHO: { short_name: 'NASA ECHO', long_name: 'NASA Earth Observing System (EOS) Clearing House (ECHO)' },
|
17
|
-
EOL: { short_name: 'UCAR NCAR EOL', long_name: 'UCAR NCAR - Earth Observing Laboratory' },
|
18
|
-
GTNP: { short_name: 'GTN-P', long_name: 'Global Terrestrial Network for Permafrost' },
|
19
|
-
ICES: { short_name: 'ICES', long_name: 'International Council for the Exploration of the Sea' },
|
20
|
-
NCDC_PALEO: { short_name: 'NOAA WDS Paleo', long_name: 'NOAA’s National Centers for Environmental Information, World Data Service for Paleoclimatology' },
|
21
|
-
NMI: { short_name: 'Met.no', long_name: 'Norwegian Meteorological Institute' },
|
22
|
-
NODC: { short_name: 'NOAA NODC', long_name: 'NOAA National Oceanographic Data Center' },
|
23
13
|
NSIDC: { short_name: 'NSIDC', long_name: 'National Snow and Ice Data Center' },
|
24
|
-
PDC: { short_name: 'PDC', long_name: 'Polar Data Catalogue' },
|
25
|
-
R2R: { short_name: 'R2R', long_name: 'Rolling Deck to Repository' },
|
26
|
-
RDA: { short_name: 'UCAR NCAR RDA', long_name: 'UCAR NCAR Research Data Archive' },
|
27
|
-
TDAR: { short_name: 'tDAR', long_name: 'tDAR: The Digital Archaeological Record' },
|
28
|
-
USGS: { short_name: 'USGS ScienceBase', long_name: 'U.S. Geological Survey ScienceBase' }
|
29
14
|
}
|
30
15
|
|
31
16
|
NOT_SPECIFIED = 'Not specified'
|
data/lib/search_solr_tools.rb
CHANGED
@@ -1,10 +1,9 @@
|
|
1
1
|
require_relative 'search_solr_tools/config/environments'
|
2
2
|
require_relative 'search_solr_tools/version'
|
3
3
|
|
4
|
-
require_relative 'search_solr_tools/helpers/selectors'
|
5
4
|
require_relative 'search_solr_tools/helpers/harvest_status'
|
6
5
|
require_relative 'search_solr_tools/errors/harvest_error'
|
7
6
|
|
8
|
-
%w(
|
7
|
+
%w( harvesters translators ).each do |subdir|
|
9
8
|
Dir[File.join(__dir__, 'search_solr_tools', subdir, '*.rb')].each { |file| require file }
|
10
9
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: search_solr_tools
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 6.
|
4
|
+
version: 6.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Chris Chalstrom
|
@@ -14,7 +14,7 @@ authors:
|
|
14
14
|
autorequire:
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
|
-
date: 2023-07-
|
17
|
+
date: 2023-07-18 00:00:00.000000000 Z
|
18
18
|
dependencies:
|
19
19
|
- !ruby/object:Gem::Dependency
|
20
20
|
name: ffi-geos
|
@@ -321,59 +321,17 @@ files:
|
|
321
321
|
- lib/search_solr_tools/config/environments.rb
|
322
322
|
- lib/search_solr_tools/config/environments.yaml
|
323
323
|
- lib/search_solr_tools/errors/harvest_error.rb
|
324
|
-
- lib/search_solr_tools/harvesters/adc.rb
|
325
|
-
- lib/search_solr_tools/harvesters/ade_auto_suggest.rb
|
326
324
|
- lib/search_solr_tools/harvesters/auto_suggest.rb
|
327
325
|
- lib/search_solr_tools/harvesters/base.rb
|
328
|
-
- lib/search_solr_tools/harvesters/bcodmo.rb
|
329
|
-
- lib/search_solr_tools/harvesters/data_one.rb
|
330
|
-
- lib/search_solr_tools/harvesters/echo.rb
|
331
|
-
- lib/search_solr_tools/harvesters/eol.rb
|
332
|
-
- lib/search_solr_tools/harvesters/gtnp.rb
|
333
|
-
- lib/search_solr_tools/harvesters/ices.rb
|
334
|
-
- lib/search_solr_tools/harvesters/ncdc_paleo.rb
|
335
|
-
- lib/search_solr_tools/harvesters/nmi.rb
|
336
|
-
- lib/search_solr_tools/harvesters/nodc.rb
|
337
326
|
- lib/search_solr_tools/harvesters/nsidc_auto_suggest.rb
|
338
327
|
- lib/search_solr_tools/harvesters/nsidc_json.rb
|
339
|
-
- lib/search_solr_tools/harvesters/oai.rb
|
340
|
-
- lib/search_solr_tools/harvesters/pdc.rb
|
341
|
-
- lib/search_solr_tools/harvesters/r2r.rb
|
342
|
-
- lib/search_solr_tools/harvesters/rda.rb
|
343
|
-
- lib/search_solr_tools/harvesters/tdar.rb
|
344
|
-
- lib/search_solr_tools/harvesters/usgs.rb
|
345
328
|
- lib/search_solr_tools/helpers/bounding_box_util.rb
|
346
|
-
- lib/search_solr_tools/helpers/csw_iso_query_builder.rb
|
347
|
-
- lib/search_solr_tools/helpers/data_one_format.rb
|
348
329
|
- lib/search_solr_tools/helpers/facet_configuration.rb
|
349
330
|
- lib/search_solr_tools/helpers/harvest_status.rb
|
350
331
|
- lib/search_solr_tools/helpers/iso_namespaces.rb
|
351
|
-
- lib/search_solr_tools/helpers/iso_to_solr.rb
|
352
|
-
- lib/search_solr_tools/helpers/iso_to_solr_format.rb
|
353
|
-
- lib/search_solr_tools/helpers/ncdc_paleo_format.rb
|
354
|
-
- lib/search_solr_tools/helpers/query_builder.rb
|
355
|
-
- lib/search_solr_tools/helpers/r2r_format.rb
|
356
|
-
- lib/search_solr_tools/helpers/selectors.rb
|
357
332
|
- lib/search_solr_tools/helpers/solr_format.rb
|
358
|
-
- lib/search_solr_tools/helpers/tdar_format.rb
|
359
333
|
- lib/search_solr_tools/helpers/translate_spatial_coverage.rb
|
360
334
|
- lib/search_solr_tools/helpers/translate_temporal_coverage.rb
|
361
|
-
- lib/search_solr_tools/helpers/usgs_format.rb
|
362
|
-
- lib/search_solr_tools/selectors/adc.rb
|
363
|
-
- lib/search_solr_tools/selectors/data_one.rb
|
364
|
-
- lib/search_solr_tools/selectors/echo_iso.rb
|
365
|
-
- lib/search_solr_tools/selectors/ices_iso.rb
|
366
|
-
- lib/search_solr_tools/selectors/ncdc_paleo.rb
|
367
|
-
- lib/search_solr_tools/selectors/nmi.rb
|
368
|
-
- lib/search_solr_tools/selectors/nodc_iso.rb
|
369
|
-
- lib/search_solr_tools/selectors/pdc_iso.rb
|
370
|
-
- lib/search_solr_tools/selectors/r2r.rb
|
371
|
-
- lib/search_solr_tools/selectors/rda.rb
|
372
|
-
- lib/search_solr_tools/selectors/tdar_opensearch.rb
|
373
|
-
- lib/search_solr_tools/selectors/usgs_iso.rb
|
374
|
-
- lib/search_solr_tools/translators/bcodmo_json.rb
|
375
|
-
- lib/search_solr_tools/translators/eol_to_solr.rb
|
376
|
-
- lib/search_solr_tools/translators/gtnp_json.rb
|
377
335
|
- lib/search_solr_tools/translators/nsidc_json.rb
|
378
336
|
- lib/search_solr_tools/version.rb
|
379
337
|
- search_solr_tools.gemspec
|
@@ -1,49 +0,0 @@
|
|
1
|
-
require_relative 'base'
|
2
|
-
|
3
|
-
module SearchSolrTools
|
4
|
-
module Harvesters
|
5
|
-
class Adc < Base
|
6
|
-
def initialize(env = 'development', die_on_failure = false)
|
7
|
-
super
|
8
|
-
@page_size = 250
|
9
|
-
@translator = Helpers::IsoToSolr.new :adc
|
10
|
-
end
|
11
|
-
|
12
|
-
def harvest_and_delete
|
13
|
-
puts "Running harvest of adc catalog from #{metadata_url}"
|
14
|
-
super(method(:harvest_adc_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:ADC][:long_name]}\"")
|
15
|
-
end
|
16
|
-
|
17
|
-
def harvest_adc_into_solr
|
18
|
-
start = 0
|
19
|
-
while (entries = get_results_from_adc(start)) && (entries.length > 0)
|
20
|
-
begin
|
21
|
-
insert_solr_docs(get_docs_with_translated_entries_from_adc(entries))
|
22
|
-
rescue => e
|
23
|
-
puts "ERROR: #{e}\n\n"
|
24
|
-
raise e if @die_on_failure
|
25
|
-
end
|
26
|
-
start += @page_size
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
def get_results_from_adc(start)
|
31
|
-
get_results(build_request(start, @page_size), './response/result/doc')
|
32
|
-
end
|
33
|
-
|
34
|
-
def metadata_url
|
35
|
-
SolrEnvironments[@environment][:adc_url]
|
36
|
-
end
|
37
|
-
|
38
|
-
def get_docs_with_translated_entries_from_adc(entries)
|
39
|
-
entries.map do |e|
|
40
|
-
create_new_solr_add_doc_with_child(@translator.translate(e).root)
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
def build_request(start = 0, max_records = 100)
|
45
|
-
"#{metadata_url}&start=#{start}&rows=#{max_records}"
|
46
|
-
end
|
47
|
-
end
|
48
|
-
end
|
49
|
-
end
|
@@ -1,46 +0,0 @@
|
|
1
|
-
require_relative 'auto_suggest'
|
2
|
-
|
3
|
-
module SearchSolrTools
|
4
|
-
module Harvesters
|
5
|
-
class AdeAutoSuggest < AutoSuggest
|
6
|
-
def harvest_and_delete
|
7
|
-
puts 'Building auto-suggest indexes for ADE'
|
8
|
-
super(method(:harvest), 'source:"ADE"', @env_settings[:auto_suggest_collection_name])
|
9
|
-
end
|
10
|
-
|
11
|
-
def harvest
|
12
|
-
url = "#{solr_url}/#{@env_settings[:collection_name]}/select?q=*%3A*&fq=source%3AADE&fq=spatial:[45.0,-180.0+TO+90.0,180.0]&rows=0&wt=json&indent=true&facet=true&facet.mincount=1&facet.sort=count&facet.limit=-1"
|
13
|
-
super url, fields
|
14
|
-
end
|
15
|
-
|
16
|
-
def fields
|
17
|
-
{
|
18
|
-
'full_keywords_and_parameters' => { weight: 2, source: 'ADE', creator: method(:keyword_creator) },
|
19
|
-
'full_authors' => { weight: 1, source: 'ADE', creator: method(:author_creator) }
|
20
|
-
}
|
21
|
-
end
|
22
|
-
|
23
|
-
def split_creator(value, count, field_weight, source, split_regex)
|
24
|
-
add_docs = []
|
25
|
-
value.downcase.split(split_regex).each do |v|
|
26
|
-
v = v.strip.chomp('/')
|
27
|
-
add_docs.concat(ade_length_limit_creator(v, count, field_weight, source)) unless v.nil? || v.empty?
|
28
|
-
end
|
29
|
-
add_docs
|
30
|
-
end
|
31
|
-
|
32
|
-
def keyword_creator(value, count, field_weight, source)
|
33
|
-
split_creator value, count, field_weight, source, %r{/ [\/ \>]+ /}
|
34
|
-
end
|
35
|
-
|
36
|
-
def author_creator(value, count, field_weight, source)
|
37
|
-
split_creator value, count, field_weight, source, %r{/;/}
|
38
|
-
end
|
39
|
-
|
40
|
-
def ade_length_limit_creator(value, count, field_weight, source)
|
41
|
-
return [] if value.length > 80
|
42
|
-
standard_add_creator value, count, field_weight, source
|
43
|
-
end
|
44
|
-
end
|
45
|
-
end
|
46
|
-
end
|
@@ -1,64 +0,0 @@
|
|
1
|
-
require_relative 'base'
|
2
|
-
require 'json'
|
3
|
-
require 'rest-client'
|
4
|
-
|
5
|
-
module SearchSolrTools
|
6
|
-
module Harvesters
|
7
|
-
# Harvests data from BcoDmo endpoint, translates and adds it to solr
|
8
|
-
class BcoDmo < Base
|
9
|
-
def initialize(env = 'development', die_on_failure = false)
|
10
|
-
super env, die_on_failure
|
11
|
-
@translator = Translators::BcodmoJsonToSolr.new
|
12
|
-
@wkt_parser = RGeo::WKRep::WKTParser.new(nil, {}) # (factory_generator_=nil,
|
13
|
-
end
|
14
|
-
|
15
|
-
def harvest_and_delete
|
16
|
-
puts "Running harvest of BCO-DMO catalog from #{bcodmo_url}"
|
17
|
-
super(method(:harvest_bcodmo_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:BCODMO][:long_name]}\"")
|
18
|
-
end
|
19
|
-
|
20
|
-
def bcodmo_url
|
21
|
-
SolrEnvironments[@environment][:bcodmo_url]
|
22
|
-
end
|
23
|
-
|
24
|
-
def harvest_bcodmo_into_solr
|
25
|
-
result = translate_bcodmo
|
26
|
-
insert_solr_docs(result[:add_docs], Base::JSON_CONTENT_TYPE)
|
27
|
-
|
28
|
-
errors_exist = result[:failure_ids].length > 0
|
29
|
-
fail 'Failed to harvest some records from BCO-DMO' if errors_exist && @die_on_failure
|
30
|
-
end
|
31
|
-
|
32
|
-
def translate_bcodmo
|
33
|
-
documents = []
|
34
|
-
failure_ids = []
|
35
|
-
request_json(SolrEnvironments[@environment][:bcodmo_url]).each do |record|
|
36
|
-
geometry = request_json(record['geometryUrl'])
|
37
|
-
results = parse_record(record, geometry)
|
38
|
-
results[:documents].each { |d| documents << d }
|
39
|
-
results[:failure_ids].each { |id| failure_ids << id }
|
40
|
-
end
|
41
|
-
{ add_docs: documents, failure_ids: failure_ids }
|
42
|
-
end
|
43
|
-
|
44
|
-
def request_json(url)
|
45
|
-
puts "Request: #{url}"
|
46
|
-
JSON.parse(RestClient.get(url))
|
47
|
-
end
|
48
|
-
|
49
|
-
def parse_record(record, geometry)
|
50
|
-
documents = []
|
51
|
-
failure_ids = []
|
52
|
-
begin
|
53
|
-
JSON.parse(RestClient.get(record['datasets'])).each do |dataset|
|
54
|
-
documents << { 'add' => { 'doc' => @translator.translate(dataset, record, geometry) } }
|
55
|
-
end
|
56
|
-
rescue => e
|
57
|
-
puts "Failed to add record #{record['id']} with error #{e} (#{e.message}) : #{e.backtrace.join("\n")}"
|
58
|
-
failure_ids << record['id']
|
59
|
-
end
|
60
|
-
{ documents: documents, failure_ids: failure_ids }
|
61
|
-
end
|
62
|
-
end
|
63
|
-
end
|
64
|
-
end
|
@@ -1,49 +0,0 @@
|
|
1
|
-
require_relative 'base'
|
2
|
-
|
3
|
-
module SearchSolrTools
|
4
|
-
module Harvesters
|
5
|
-
class DataOne < Base
|
6
|
-
def initialize(env = 'development', die_on_failure = false)
|
7
|
-
super
|
8
|
-
@page_size = 250
|
9
|
-
@translator = Helpers::IsoToSolr.new :data_one
|
10
|
-
end
|
11
|
-
|
12
|
-
def harvest_and_delete
|
13
|
-
puts "Running harvest of dataONE catalog from #{metadata_url}"
|
14
|
-
super(method(:harvest_data_one_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:DATA_ONE][:long_name]}\"")
|
15
|
-
end
|
16
|
-
|
17
|
-
def harvest_data_one_into_solr
|
18
|
-
start = 0
|
19
|
-
while (entries = get_results_from_data_one(start)) && (entries.length > 0)
|
20
|
-
begin
|
21
|
-
insert_solr_docs(get_docs_with_translated_entries_from_data_one(entries))
|
22
|
-
rescue => e
|
23
|
-
puts "ERROR: #{e}\n\n"
|
24
|
-
raise e if @die_on_failure
|
25
|
-
end
|
26
|
-
start += @page_size
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
def get_results_from_data_one(start)
|
31
|
-
get_results(build_request(start, @page_size), './response/result/doc')
|
32
|
-
end
|
33
|
-
|
34
|
-
def metadata_url
|
35
|
-
SolrEnvironments[@environment][:data_one_url]
|
36
|
-
end
|
37
|
-
|
38
|
-
def get_docs_with_translated_entries_from_data_one(entries)
|
39
|
-
entries.map do |e|
|
40
|
-
create_new_solr_add_doc_with_child(@translator.translate(e).root)
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
def build_request(start = 0, max_records = 100)
|
45
|
-
"#{metadata_url}&start=#{start}&rows=#{max_records}"
|
46
|
-
end
|
47
|
-
end
|
48
|
-
end
|
49
|
-
end
|
@@ -1,52 +0,0 @@
|
|
1
|
-
require_relative 'base'
|
2
|
-
|
3
|
-
module SearchSolrTools
|
4
|
-
module Harvesters
|
5
|
-
# Harvests data from ECHO and inserts it into Solr after it has been translated
|
6
|
-
class Echo < Base
|
7
|
-
def initialize(env = 'development', die_on_failure = false)
|
8
|
-
super env, die_on_failure
|
9
|
-
@page_size = 100
|
10
|
-
@translator = Helpers::IsoToSolr.new :echo
|
11
|
-
end
|
12
|
-
|
13
|
-
def harvest_and_delete
|
14
|
-
puts "Running harvest of ECHO catalog from #{echo_url}"
|
15
|
-
super(method(:harvest_echo_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:ECHO][:long_name]}\"")
|
16
|
-
end
|
17
|
-
|
18
|
-
# get translated entries from ECHO and add them to Solr
|
19
|
-
# this is the main entry point for the class
|
20
|
-
def harvest_echo_into_solr
|
21
|
-
page_num = 1
|
22
|
-
while (entries = get_results_from_echo(page_num)) && (entries.length > 0)
|
23
|
-
begin
|
24
|
-
insert_solr_docs get_docs_with_translated_entries_from_echo(entries)
|
25
|
-
rescue => e
|
26
|
-
puts "ERROR: #{e}\n\n"
|
27
|
-
raise e if @die_on_failure
|
28
|
-
end
|
29
|
-
page_num += 1
|
30
|
-
end
|
31
|
-
end
|
32
|
-
|
33
|
-
def echo_url
|
34
|
-
SolrEnvironments[@environment][:echo_url]
|
35
|
-
end
|
36
|
-
|
37
|
-
def get_results_from_echo(page_num)
|
38
|
-
get_results build_request(@page_size, page_num), './/results/result', 'application/echo10+xml'
|
39
|
-
end
|
40
|
-
|
41
|
-
def get_docs_with_translated_entries_from_echo(entries)
|
42
|
-
entries.map do |entry|
|
43
|
-
create_new_solr_add_doc_with_child(@translator.translate(entry).root)
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
def build_request(max_records = '25', page_num = '1')
|
48
|
-
echo_url + '&page_size=' + max_records.to_s + '&page_num=' + page_num.to_s
|
49
|
-
end
|
50
|
-
end
|
51
|
-
end
|
52
|
-
end
|
@@ -1,51 +0,0 @@
|
|
1
|
-
require_relative 'base'
|
2
|
-
require 'json'
|
3
|
-
require 'rgeo/geo_json'
|
4
|
-
|
5
|
-
module SearchSolrTools
|
6
|
-
module Harvesters
|
7
|
-
class Eol < Base
|
8
|
-
def initialize(env = 'development', die_on_failure = false)
|
9
|
-
super env, die_on_failure
|
10
|
-
@translator = SearchSolrTools::Translators::EolToSolr.new
|
11
|
-
end
|
12
|
-
|
13
|
-
def harvest_and_delete
|
14
|
-
puts 'Running harvest of EOL catalog using the following configured EOL URLs:'
|
15
|
-
SearchSolrTools::SolrEnvironments[:common][:eol].each { |x| puts x }
|
16
|
-
super(method(:harvest_eol_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:EOL][:long_name]}\"")
|
17
|
-
end
|
18
|
-
|
19
|
-
def harvest_eol_into_solr
|
20
|
-
solr_add_queries = eol_dataset_urls.map do |dataset|
|
21
|
-
begin
|
22
|
-
doc = open_xml_document(dataset)
|
23
|
-
if doc.xpath('//xmlns:metadata').size > 1
|
24
|
-
# THREDDS allows for a dataset of datasests, EOL should not utilize this
|
25
|
-
fail "Complex dataset encountered at #{doc.xpath('//xmlns:catalog').to_html}"
|
26
|
-
end
|
27
|
-
metadata_doc = open_xml_document(doc.xpath('//xmlns:metadata')[0]['xlink:href'])
|
28
|
-
{ 'add' => { 'doc' => @translator.translate(doc, metadata_doc) } }
|
29
|
-
rescue => e
|
30
|
-
puts "ERROR: #{e}"
|
31
|
-
puts "Failed to translate this record: #{doc} -> #{metadata_doc}"
|
32
|
-
raise e if @die_on_failure
|
33
|
-
next
|
34
|
-
end
|
35
|
-
end
|
36
|
-
insert_solr_docs solr_add_queries, Base::JSON_CONTENT_TYPE
|
37
|
-
end
|
38
|
-
|
39
|
-
def eol_dataset_urls
|
40
|
-
SearchSolrTools::SolrEnvironments[:common][:eol].flat_map do |endpoint|
|
41
|
-
doc = open_xml_document(endpoint)
|
42
|
-
doc.xpath('//xmlns:catalogRef').map { |node| node['xlink:href'] }
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
def open_xml_document(url)
|
47
|
-
Nokogiri::XML(open(url), &:strict)
|
48
|
-
end
|
49
|
-
end
|
50
|
-
end
|
51
|
-
end
|
@@ -1,67 +0,0 @@
|
|
1
|
-
require_relative 'base'
|
2
|
-
require 'json'
|
3
|
-
require 'rest-client'
|
4
|
-
|
5
|
-
module SearchSolrTools
|
6
|
-
module Harvesters
|
7
|
-
# Harvests data from GTN-P endpoints, translates and adds it to solr
|
8
|
-
class GtnP < Base
|
9
|
-
def initialize(env = 'development', die_on_failure = false)
|
10
|
-
super env, die_on_failure
|
11
|
-
@translator = Translators::GtnpJsonToSolr.new
|
12
|
-
end
|
13
|
-
|
14
|
-
def gtnp_service_urls
|
15
|
-
json_records = []
|
16
|
-
SearchSolrTools::SolrEnvironments[:common][:gtnp].flat_map do |endpoint|
|
17
|
-
record = request_json(endpoint)
|
18
|
-
json_records << record
|
19
|
-
end
|
20
|
-
json_records
|
21
|
-
end
|
22
|
-
|
23
|
-
def harvest_and_delete
|
24
|
-
puts 'Running harvest of GTN-P catalog using the following configured GTN-P URLs:'
|
25
|
-
SearchSolrTools::SolrEnvironments[:common][:gtnp].each { |x| puts x }
|
26
|
-
super(method(:harvest_gtnp_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:GTNP][:long_name]}\"")
|
27
|
-
end
|
28
|
-
|
29
|
-
def harvest_gtnp_into_solr
|
30
|
-
result = translate_gtnp
|
31
|
-
insert_solr_docs result[:add_docs], Base::JSON_CONTENT_TYPE
|
32
|
-
fail 'Failed to harvest some records from the provider' if result[:failure_ids].length > 0
|
33
|
-
end
|
34
|
-
|
35
|
-
def translate_gtnp
|
36
|
-
documents = []
|
37
|
-
failure_ids = []
|
38
|
-
gtnp_records = gtnp_service_urls
|
39
|
-
gtnp_records.each do |record|
|
40
|
-
results = parse_record(record)
|
41
|
-
results[:documents].each { |d| documents << d }
|
42
|
-
results[:failure_ids].each { |id| failure_ids << id }
|
43
|
-
end
|
44
|
-
{ add_docs: documents, failure_ids: failure_ids }
|
45
|
-
end
|
46
|
-
|
47
|
-
def request_json(url)
|
48
|
-
JSON.parse(RestClient.get(url))
|
49
|
-
end
|
50
|
-
|
51
|
-
def parse_record(record)
|
52
|
-
documents = []
|
53
|
-
failure_ids = []
|
54
|
-
begin
|
55
|
-
record.drop(1).each do |dataset|
|
56
|
-
trans_doc = @translator.translate(dataset, record[0])
|
57
|
-
documents << { 'add' => { 'doc' => trans_doc } }
|
58
|
-
end
|
59
|
-
rescue => e
|
60
|
-
puts "Failed to add record #{record[0][:title]} with error #{e} (#{e.message}) : #{e.backtrace.join("\n")}"
|
61
|
-
failure_ids << record[0][:title]
|
62
|
-
end
|
63
|
-
{ documents: documents, failure_ids: failure_ids }
|
64
|
-
end
|
65
|
-
end
|
66
|
-
end
|
67
|
-
end
|
@@ -1,58 +0,0 @@
|
|
1
|
-
require_relative 'base'
|
2
|
-
require_relative '../helpers/csw_iso_query_builder'
|
3
|
-
|
4
|
-
module SearchSolrTools
|
5
|
-
module Harvesters
|
6
|
-
# Harvests data from ICES and inserts it into Solr after it has been translated
|
7
|
-
class Ices < Base
|
8
|
-
def initialize(env = 'development', die_on_failure = false)
|
9
|
-
super env, die_on_failure
|
10
|
-
@page_size = 100
|
11
|
-
@translator = Helpers::IsoToSolr.new :ices
|
12
|
-
end
|
13
|
-
|
14
|
-
def harvest_and_delete
|
15
|
-
puts "Running harvest of ICES catalog from #{ices_url}"
|
16
|
-
super(method(:harvest_ices_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:ICES][:long_name]}\"")
|
17
|
-
end
|
18
|
-
|
19
|
-
# get translated entries from ICES and add them to Solr
|
20
|
-
# this is the main entry point for the class
|
21
|
-
def harvest_ices_into_solr
|
22
|
-
start_index = 1
|
23
|
-
while (entries = get_results_from_ices(start_index)) && (entries.length > 0)
|
24
|
-
begin
|
25
|
-
insert_solr_docs get_docs_with_translated_entries_from_ices(entries)
|
26
|
-
rescue => e
|
27
|
-
puts "ERROR: #{e}"
|
28
|
-
raise e if @die_on_failure
|
29
|
-
end
|
30
|
-
start_index += @page_size
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
def ices_url
|
35
|
-
SolrEnvironments[@environment][:ices_url]
|
36
|
-
end
|
37
|
-
|
38
|
-
def get_results_from_ices(start_index)
|
39
|
-
get_results build_csw_request('results', @page_size, start_index), '//gmd:MD_Metadata'
|
40
|
-
end
|
41
|
-
|
42
|
-
def get_docs_with_translated_entries_from_ices(entries)
|
43
|
-
entries.map do |entry|
|
44
|
-
create_new_solr_add_doc_with_child(@translator.translate(entry).root)
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
def build_csw_request(resultType = 'results', maxRecords = '25', startPosition = '1')
|
49
|
-
Helpers::CswIsoQueryBuilder.get_query_string(ices_url,
|
50
|
-
'resultType' => resultType,
|
51
|
-
'maxRecords' => maxRecords,
|
52
|
-
'startPosition' => startPosition,
|
53
|
-
'constraintLanguage' => 'CQL_TEXT',
|
54
|
-
'outputSchema' => 'http://www.isotc211.org/2005/gmd')
|
55
|
-
end
|
56
|
-
end
|
57
|
-
end
|
58
|
-
end
|