search_solr_tools 3.7.1 → 3.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 187e12f711b38e3274387d3575728d8d5250bf48
4
- data.tar.gz: db79a42d420a46430dcf32e4bf442bf790d0d18b
3
+ metadata.gz: 2c338059d0ec2049e415517a2e6b9b89df18cdd1
4
+ data.tar.gz: 3cb8c2c60e269ec9b66c305730522b7da3ed8243
5
5
  SHA512:
6
- metadata.gz: a69bcf39d6c09009d399cd2d029f8691ed6d00b3760a217f7ed7c5324c4171d0332b6ed2982c12caa9f663bc489a4edd5ce5ce0a8717a8dde2df78cdf2215c5c
7
- data.tar.gz: 613a293d8808099b554551932c37b8c3a709cdbb94488c61cd4f65bd5d0737ba7cd30537e217beccb032d3792195759496e785f4f1ffeb6560e1f1a0560ca7e0
6
+ metadata.gz: 5334498934992c587a414e3672f98130b4125d7013035a1d35cbd335eecf248a5438696255da3f456fd51b6f7fef48b3e79ecd199a5af9200cc06c4aca7f563d
7
+ data.tar.gz: 303c74385390406d79ff379efbc44a22405427d512d8e369a685f851cc50a77d54dc07229eb105529059e06f3b5f4fb4b7a70e1902a3817f9ccad424c2a9d71d
data/CHANGELOG.md CHANGED
@@ -1,24 +1,85 @@
1
- ## v3.3.3 (2015-05-10)
1
+ ## v3.8.0 (2017-03-28)
2
+
3
+ Changes
4
+
5
+ - Change ECHO harvester to harvest 100 records at a time, rather than 1000 to
6
+ avoid timeout/hanging issues with the large requests.
7
+ - Change "CISL"/ACADIS Gateway harvester to "NSF Arctic Data Center";
8
+ aoncadis.org redirects to another site, and the data center's name was
9
+ changed. The feed format was also changed; the harvester was updated to
10
+ consume the new feed.
11
+
12
+ Bugfixes
13
+
14
+ - Update NODC feed URL to use https.
15
+ - Update RDA feed URL to use https.
16
+ - Update handling of geometries to match new format provided by BCO-DMO feed.
17
+ - Update NMI feed URL; the feed was relocated.
18
+ - Harvesting tDAR starts from record 0 instead of record 1.
19
+ - tDAR harvester no longer attempts to obtain another page of records after
20
+ all the records have been harvested; where other feeds return an empty
21
+ response that our harvester handles without issue, tDAR throws an error if
22
+ the "startRecord" parameter is higher than their last record.
23
+ - Exit with a non-0 status when a problem with the whole feed is encountered,
24
+ even if `--die-on-failure` is not passed. That flag should only cause
25
+ failures when there are issues with individual records; we don't want
26
+ harvesting to stop due to a metadata issue with a small number of
27
+ records.
28
+ - Include BCO-DMO URL in the harvester output the same way all the other URLs
29
+ are displayed.
30
+
31
+ ## v3.7.1 (2016-05-18)
32
+
33
+ - RuboCop fixes.
34
+
35
+ ## v3.7.0 (2016-05-18)
2
36
 
3
37
  New Features
4
38
 
39
+ - Add sponsored programs to NSIDC harvesting.
5
40
  - Add support for ingesting Data Access Links from NSIDC JSON
6
41
 
42
+ Bugfixes
43
+
44
+ - Fix dependency issue with gem "listen".
45
+ - Fix bad configuration for OAI feed URLs.
46
+
47
+ ## v3.5.1 (2016-02-15)
48
+
49
+ Bugfixes
50
+
51
+ - Add temporal duration facet for GTN-P data center.
52
+
53
+ ## v3.5.0 (2016-02-11)
54
+
55
+ Changes
56
+
57
+ - Update long name for GTN-P data center.
58
+
59
+ ## v3.4.0 (2016-02-11)
60
+
61
+ New Features
62
+
63
+ - Add harvester for GTN-P.
64
+
65
+ ## v3.3.4 (2016-02-08)
66
+
67
+ See v3.4.0.
7
68
 
8
- ## v3.3.2
69
+ ## v3.3.3 (2016-01-14)
9
70
 
10
71
  Bugfix
11
72
 
12
73
  - Added quote checking for cisl offset parsing check
13
74
 
14
- ## v3.3.1
75
+ ## v3.3.1 (2015-09-25)
15
76
 
16
77
  Bugfix
17
78
 
18
79
  - Remove strange facet string for temporal duration from NOAA Paleo search
19
80
  results.
20
81
 
21
- ## v3.3.0
82
+ ## v3.3.0 (2015-09-24)
22
83
 
23
84
  New Features
24
85
 
@@ -19,11 +19,11 @@ class SolrHarvestCLI < Thor
19
19
  puts target
20
20
  begin
21
21
  harvest_class = get_harvester_class(target)
22
- harvester = harvest_class.new options[:environment], die_on_failure
22
+ harvester = harvest_class.new(options[:environment], die_on_failure)
23
23
  harvester.harvest_and_delete
24
24
  rescue => e
25
25
  puts "harvest failed for #{target}: #{e.message}"
26
- raise e if die_on_failure
26
+ raise e
27
27
  end
28
28
  end
29
29
  end
@@ -62,11 +62,10 @@ class SolrHarvestCLI < Thor
62
62
  end
63
63
 
64
64
  no_tasks do
65
- # rubocop: disable MethodLength
66
65
  def harvester_map
67
66
  {
68
67
  'bco_dmo' => SearchSolrTools::Harvesters::BcoDmo,
69
- 'cisl' => SearchSolrTools::Harvesters::Cisl,
68
+ 'adc' => SearchSolrTools::Harvesters::Adc,
70
69
  'data_one' => SearchSolrTools::Harvesters::DataOne,
71
70
  'echo' => SearchSolrTools::Harvesters::Echo,
72
71
  'eol' => SearchSolrTools::Harvesters::Eol,
@@ -85,11 +84,10 @@ class SolrHarvestCLI < Thor
85
84
  'ade_auto_suggest' => SearchSolrTools::Harvesters::AdeAutoSuggest
86
85
  }
87
86
  end
88
- # rubocop: enable MethodLength
89
87
 
90
88
  def get_harvester_class(data_center_name)
91
89
  name = data_center_name.downcase.to_s
92
- raise("Invalid data center #{name}") unless harvester_map.key?(name)
90
+ fail("Invalid data center #{name}") unless harvester_map.key?(name)
93
91
 
94
92
  harvester_map[name]
95
93
  end
@@ -4,7 +4,7 @@
4
4
  :collection_path: solr
5
5
  :port: 8983
6
6
  :bcodmo_url: http://www.bco-dmo.org/nsidc/arctic-deployments.json
7
- :cisl_url: https://www.aoncadis.org/oai/repository
7
+ :adc_url: https://arcticdata.io/metacat/d1/mn/v2/query/solr/
8
8
  :data_one_url: https://cn.dataone.org/cn/v1/query/solr/select?q=northBoundCoord:%5B45.0%20TO%2090.0%5D
9
9
  :echo_url: https://api.echo.nasa.gov/catalog-rest/echo_catalog/datasets.echo10
10
10
  :gtnp:
@@ -12,10 +12,10 @@
12
12
  - http://www.gtnpdatabase.org/rest/activelayers/json
13
13
  :ices_url: http://geo.ices.dk/geonetwork/srv/en/csw
14
14
  :ncdc_paleo_url: http://gis.ncdc.noaa.gov/gptpaleo/csw
15
- :nmi_url: http://access.met.no/metamod/oai
16
- :nodc_url: http://data.nodc.noaa.gov/geoportal/csw
15
+ :nmi_url: http://arcticdata.met.no/metamod/oai
16
+ :nodc_url: https://data.nodc.noaa.gov/geoportal/csw
17
17
  :pdc_url: http://www.polardata.ca/oai/provider
18
- :rda_url: http://rda.ucar.edu/cgi-bin/oai
18
+ :rda_url: https://rda.ucar.edu/cgi-bin/oai
19
19
  :tdar_url: http://core.tdar.org/search/rss
20
20
  :usgs_url: https://www.sciencebase.gov/catalog/item/527cf4ede4b0850ea05182ee/csw
21
21
  :eol:
@@ -0,0 +1,47 @@
1
+ module SearchSolrTools
2
+ module Harvesters
3
+ class Adc < Base
4
+ def initialize(env = 'development', die_on_failure = false)
5
+ super
6
+ @page_size = 250
7
+ @translator = Helpers::IsoToSolr.new :adc
8
+ end
9
+
10
+ def harvest_and_delete
11
+ puts "Running harvest of adc catalog from #{metadata_url}"
12
+ super(method(:harvest_adc_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:ADC][:long_name]}\"")
13
+ end
14
+
15
+ def harvest_adc_into_solr
16
+ start = 0
17
+ while (entries = get_results_from_adc(start)) && (entries.length > 0)
18
+ begin
19
+ insert_solr_docs(get_docs_with_translated_entries_from_adc(entries))
20
+ rescue => e
21
+ puts "ERROR: #{e}\n\n"
22
+ raise e if @die_on_failure
23
+ end
24
+ start += @page_size
25
+ end
26
+ end
27
+
28
+ def get_results_from_adc(start)
29
+ get_results(build_request(start, @page_size), './response/result/doc')
30
+ end
31
+
32
+ def metadata_url
33
+ SolrEnvironments[@environment][:adc_url]
34
+ end
35
+
36
+ def get_docs_with_translated_entries_from_adc(entries)
37
+ entries.map do |e|
38
+ create_new_solr_add_doc_with_child(@translator.translate(e).root)
39
+ end
40
+ end
41
+
42
+ def build_request(start = 0, max_records = 100)
43
+ "#{metadata_url}?q=*:*&start=#{start}&rows=#{max_records}"
44
+ end
45
+ end
46
+ end
47
+ end
@@ -81,7 +81,7 @@ module SearchSolrTools
81
81
  end
82
82
  puts "#{success} document#{success == 1 ? '' : 's'} successfully added to Solr."
83
83
  puts "#{failure} document#{failure == 1 ? '' : 's'} not added to Solr."
84
- raise 'Some documents failed to be inserted into Solr' if failure > 0
84
+ fail 'Some documents failed to be inserted into Solr' if failure > 0
85
85
  end
86
86
 
87
87
  def insert_solr_doc(doc, content_type = XML_CONTENT_TYPE, core = SolrEnvironments[@environment][:collection_name])
@@ -12,13 +12,18 @@ module SearchSolrTools
12
12
  end
13
13
 
14
14
  def harvest_and_delete
15
+ puts "Running harvest of BCO-DMO catalog from #{bcodmo_url}"
15
16
  super(method(:harvest_bcodmo_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:BCODMO][:long_name]}\"")
16
17
  end
17
18
 
19
+ def bcodmo_url
20
+ SolrEnvironments[@environment][:bcodmo_url]
21
+ end
22
+
18
23
  def harvest_bcodmo_into_solr
19
24
  result = translate_bcodmo
20
25
  insert_solr_docs result[:add_docs], Base::JSON_CONTENT_TYPE
21
- raise 'Failed to harvest some records from the provider' if result[:failure_ids].length > 0
26
+ fail 'Failed to harvest some records from the provider' if result[:failure_ids].length > 0
22
27
  end
23
28
 
24
29
  def translate_bcodmo
@@ -34,6 +39,7 @@ module SearchSolrTools
34
39
  end
35
40
 
36
41
  def request_json(url)
42
+ puts "Request: #{url}"
37
43
  JSON.parse(RestClient.get(url))
38
44
  end
39
45
 
@@ -4,7 +4,7 @@ module SearchSolrTools
4
4
  class Echo < Base
5
5
  def initialize(env = 'development', die_on_failure = false)
6
6
  super env, die_on_failure
7
- @page_size = 1000
7
+ @page_size = 100
8
8
  @translator = Helpers::IsoToSolr.new :echo
9
9
  end
10
10
 
@@ -37,9 +37,9 @@ module SearchSolrTools
37
37
  end
38
38
 
39
39
  def get_docs_with_translated_entries_from_echo(entries)
40
- docs = []
41
- entries.each { |r| docs.push(create_new_solr_add_doc_with_child(@translator.translate(r).root)) }
42
- docs
40
+ entries.map do |entry|
41
+ create_new_solr_add_doc_with_child(@translator.translate(entry).root)
42
+ end
43
43
  end
44
44
 
45
45
  def build_request(max_records = '25', page_num = '1')
@@ -22,7 +22,7 @@ module SearchSolrTools
22
22
  doc = open_xml_document(dataset)
23
23
  if doc.xpath('//xmlns:metadata').size > 1
24
24
  # THREDDS allows for a dataset of datasests, EOL should not utilize this
25
- raise "Complex dataset encountered at #{doc.xpath('//xmlns:catalog').to_html}"
25
+ fail "Complex dataset encountered at #{doc.xpath('//xmlns:catalog').to_html}"
26
26
  end
27
27
  metadata_doc = open_xml_document(doc.xpath('//xmlns:metadata')[0]['xlink:href'])
28
28
  { 'add' => { 'doc' => @translator.translate(doc, metadata_doc) } }
@@ -28,7 +28,7 @@ module SearchSolrTools
28
28
  def harvest_gtnp_into_solr
29
29
  result = translate_gtnp
30
30
  insert_solr_docs result[:add_docs], Base::JSON_CONTENT_TYPE
31
- raise 'Failed to harvest some records from the provider' if result[:failure_ids].length > 0
31
+ fail 'Failed to harvest some records from the provider' if result[:failure_ids].length > 0
32
32
  end
33
33
 
34
34
  def translate_gtnp
@@ -37,9 +37,9 @@ module SearchSolrTools
37
37
  end
38
38
 
39
39
  def get_docs_with_translated_entries_from_ices(entries)
40
- docs = []
41
- entries.each { |r| docs.push(create_new_solr_add_doc_with_child(@translator.translate(r).root)) }
42
- docs
40
+ entries.map do |entry|
41
+ create_new_solr_add_doc_with_child(@translator.translate(entry).root)
42
+ end
43
43
  end
44
44
 
45
45
  def build_csw_request(resultType = 'results', maxRecords = '25', startPosition = '1')
@@ -37,9 +37,9 @@ module SearchSolrTools
37
37
  end
38
38
 
39
39
  def get_docs_with_translated_entries_from_nodc(entries)
40
- docs = []
41
- entries.each { |r| docs.push(create_new_solr_add_doc_with_child(@translator.translate(r).root)) }
42
- docs
40
+ entries.map do |entry|
41
+ create_new_solr_add_doc_with_child(@translator.translate(entry).root)
42
+ end
43
43
  end
44
44
 
45
45
  def build_csw_request(resultType = 'results', maxRecords = '25', startPosition = '1')
@@ -23,7 +23,7 @@ module SearchSolrTools
23
23
  def harvest_nsidc_json_into_solr
24
24
  result = docs_with_translated_entries_from_nsidc
25
25
  insert_solr_docs result[:add_docs], Base::JSON_CONTENT_TYPE
26
- raise 'Failed to harvest and insert some authoritative IDs' if result[:failure_ids].length > 0
26
+ fail 'Failed to harvest and insert some authoritative IDs' if result[:failure_ids].length > 0
27
27
  end
28
28
 
29
29
  def nsidc_json_url
@@ -34,11 +34,11 @@ module SearchSolrTools
34
34
  end
35
35
 
36
36
  def results
37
- raise NotImplementedError
37
+ fail NotImplementedError
38
38
  end
39
39
 
40
40
  def metadata_url
41
- raise NotImplementedError
41
+ fail NotImplementedError
42
42
  end
43
43
 
44
44
  def translated_docs(entries)
@@ -48,7 +48,7 @@ module SearchSolrTools
48
48
  private
49
49
 
50
50
  def request_params
51
- raise NotImplementedError
51
+ fail NotImplementedError
52
52
  end
53
53
 
54
54
  def request_string
@@ -14,14 +14,22 @@ module SearchSolrTools
14
14
  end
15
15
 
16
16
  def harvest_tdar_into_solr
17
- start_record = 1
17
+ start_record = 0
18
+ total_harvested = 0
19
+ total_expected = total_results
18
20
  while (entries = get_results_from_tdar(start_record)) && (entries.length > 0)
19
21
  begin
20
- insert_solr_docs get_docs_with_translated_entries_from_tdar(entries)
22
+ insert_solr_docs(get_docs_with_translated_entries_from_tdar(entries))
21
23
  rescue => e
22
24
  puts "ERROR: #{e}\n\n"
23
25
  raise e if @die_on_failure
24
26
  end
27
+
28
+ # if we have all the records we expect, don't attempt another request;
29
+ # it would result in an error
30
+ total_harvested += entries.length
31
+ break if total_harvested >= total_expected
32
+
25
33
  start_record += @page_size
26
34
  end
27
35
  end
@@ -31,16 +39,16 @@ module SearchSolrTools
31
39
  end
32
40
 
33
41
  def get_results_from_tdar(start_record)
34
- get_results build_request(@page_size, start_record), './/atom:entry', 'application/xml'
42
+ get_results(build_request(@page_size, start_record), './/atom:entry', 'application/xml')
35
43
  end
36
44
 
37
45
  def get_docs_with_translated_entries_from_tdar(entries)
38
- docs = []
39
- entries.each { |r| docs.push(create_new_solr_add_doc_with_child(@translator.translate(r).root)) }
40
- docs
46
+ entries.map do |entry|
47
+ create_new_solr_add_doc_with_child(@translator.translate(entry).root)
48
+ end
41
49
  end
42
50
 
43
- def build_request(max_records = '25', start_record = '1')
51
+ def build_request(max_records = '25', start_record = '0')
44
52
  request_url = tdar_url + '?_tDAR.searchType=ACADIS_RSS&'\
45
53
  'resourceTypes=DATASET&'\
46
54
  'groups[0].latitudeLongitudeBoxes[0].maximumLongitude=180&'\
@@ -52,6 +60,10 @@ module SearchSolrTools
52
60
 
53
61
  request_url
54
62
  end
63
+
64
+ def total_results
65
+ get_results(build_request(0, 0), './/opensearch:totalResults').text.to_i
66
+ end
55
67
  end
56
68
  end
57
69
  end
@@ -37,9 +37,9 @@ module SearchSolrTools
37
37
  end
38
38
 
39
39
  def get_docs_with_translated_entries_from_usgs(entries)
40
- docs = []
41
- entries.each { |r| docs.push(create_new_solr_add_doc_with_child(@translator.translate(r).root)) }
42
- docs
40
+ entries.map do |entry|
41
+ create_new_solr_add_doc_with_child(@translator.translate(entry).root)
42
+ end
43
43
  end
44
44
 
45
45
  def build_csw_request(resultType = 'results', maxRecords = '25', startPosition = '1')
@@ -6,7 +6,7 @@ module SearchSolrTools
6
6
  # This hash grabs all the selector files inside the selectors directory,
7
7
  # to add a new source we need to create a selector file and add it to this hash.
8
8
  SELECTORS = {
9
- cisl: Selectors::CISL,
9
+ adc: Selectors::ADC,
10
10
  data_one: Selectors::DATA_ONE,
11
11
  echo: Selectors::ECHO,
12
12
  ices: Selectors::ICES,
@@ -10,7 +10,7 @@ module SearchSolrTools
10
10
  module SolrFormat
11
11
  DATA_CENTER_NAMES = {
12
12
  BCODMO: { short_name: 'BCO-DMO', long_name: 'Biological and Chemical Oceanography Data Management Office' },
13
- CISL: { short_name: 'ACADIS Gateway', long_name: 'Advanced Cooperative Arctic Data and Information Service' },
13
+ ADC: { short_name: 'NSF ADC', long_name: 'NSF Arctic Data Center' },
14
14
  DATA_ONE: { short_name: 'DataONE', long_name: 'DataONE' },
15
15
  ECHO: { short_name: 'NASA ECHO', long_name: 'NASA Earth Observing System (EOS) Clearing House (ECHO)' },
16
16
  EOL: { short_name: 'UCAR NCAR EOL', long_name: 'UCAR NCAR - Earth Observing Laboratory' },
@@ -134,7 +134,7 @@ module SearchSolrTools
134
134
  j = send(find_index_method, resolution['max_resolution'])
135
135
  return resolution_values[i..j]
136
136
  end
137
- raise "Invalid resolution #{resolution['type']}"
137
+ fail "Invalid resolution #{resolution['type']}"
138
138
  end
139
139
 
140
140
  def self.resolution_not_specified?(resolution)
@@ -0,0 +1,95 @@
1
+ require 'search_solr_tools'
2
+
3
+ module SearchSolrTools
4
+ module Selectors
5
+ ADC = {
6
+ authoritative_id: {
7
+ xpaths: ['.//str[@name="id"]'],
8
+ multivalue: false
9
+ },
10
+ title: {
11
+ xpaths: ['.//str[@name="title"]'],
12
+ multivalue: false
13
+ },
14
+ summary: {
15
+ xpaths: ['.//str[@name="abstract"]'],
16
+ multivalue: false
17
+ },
18
+ data_centers: {
19
+ xpaths: [''],
20
+ default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:ADC][:long_name]],
21
+ multivalue: false
22
+ },
23
+ authors: {
24
+ xpaths: ['.//str[@name="author"]'],
25
+ multivalue: false
26
+ },
27
+ keywords: {
28
+ xpaths: ['.//arr[@name="keywords"]/str'],
29
+ multivalue: true
30
+ },
31
+ last_revision_date: {
32
+ xpaths: ['.//date[@name="updateDate"]'],
33
+ default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
34
+ multivalue: false,
35
+ format: Helpers::SolrFormat::DATE
36
+ },
37
+ dataset_url: {
38
+ xpaths: ['.//str[@name="dataUrl"]'],
39
+ default_values: [''],
40
+ multivalue: false
41
+ },
42
+ spatial_coverages: {
43
+ xpaths: ['.'],
44
+ multivalue: false,
45
+ format: Helpers::DataOneFormat.method(:spatial_display)
46
+ },
47
+ spatial: {
48
+ xpaths: ['.'],
49
+ multivalue: false,
50
+ format: Helpers::DataOneFormat.method(:spatial_index)
51
+ },
52
+ spatial_area: {
53
+ xpaths: ['.'],
54
+ multivalue: false,
55
+ format: Helpers::DataOneFormat.method(:spatial_area)
56
+ },
57
+ temporal_coverages: {
58
+ xpaths: ['.'],
59
+ multivalue: false,
60
+ format: Helpers::DataOneFormat.method(:temporal_coverage)
61
+ },
62
+ temporal_duration: {
63
+ xpaths: ['.'],
64
+ multivalue: false,
65
+ format: Helpers::DataOneFormat.method(:temporal_duration)
66
+ },
67
+ temporal: {
68
+ xpaths: ['.'],
69
+ multivalue: false,
70
+ format: Helpers::DataOneFormat.method(:temporal_index_string)
71
+ },
72
+ source: {
73
+ xpaths: [''],
74
+ default_values: ['ADE'],
75
+ multivalue: false
76
+ },
77
+ facet_data_center: {
78
+ xpaths: [''],
79
+ default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:ADC][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:ADC][:short_name]}"],
80
+ multivalue: false
81
+ },
82
+ facet_spatial_scope: {
83
+ xpaths: ['.'],
84
+ multivalue: false,
85
+ format: Helpers::DataOneFormat.method(:facet_spatial_scope)
86
+ },
87
+ facet_temporal_duration: {
88
+ xpaths: ['.'],
89
+ default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
90
+ format: Helpers::DataOneFormat.method(:facet_temporal_duration),
91
+ multivalue: false
92
+ }
93
+ }
94
+ end
95
+ end
@@ -50,12 +50,29 @@ module SearchSolrTools
50
50
  end
51
51
 
52
52
  def translate_geometry(wkt_geom)
53
- wkt_geom['geometry'].sub! '<http://www.opengis.net/def/crs/OGC/1.3/CRS84> ', ''
54
- # Consider all linestring and polygon geometries to be multipoint for this provider
55
- wkt_geom['geometry'].sub! 'LINESTRING', 'MULTIPOINT'
56
- wkt_geom['geometry'].sub! 'POLYGON', 'MULTIPOINT'
57
- parser = RGeo::WKRep::WKTParser.new(nil, {})
58
- geometry = parser.parse(wkt_geom['geometry'])
53
+ if wkt_geom['geometry']['type'] == 'LineString'
54
+ wkt_geom['geometry']['type'] = 'MultiPoint'
55
+ end
56
+ geometry = RGeo::GeoJSON.decode(wkt_geom).geometry
57
+ geometry = RGeo::Feature.cast(geometry, RGeo::Feature::MultiPoint)
58
+
59
+ # This feed sometimes returns MultiLineString but wrongly calls them 'LineString'
60
+ # If the above fails, we assume this is why. If the feed gets fixed, this code
61
+ # should still handle that.
62
+ if geometry.nil? || geometry.num_geometries == 0
63
+ # Try to decode as an actual MultiLineString.
64
+ wkt_geom['geometry']['type'] = 'MultiLineString'
65
+ geometry = RGeo::GeoJSON.decode(wkt_geom).geometry
66
+
67
+ # Convert to a MultiPoint, for passing into the helper functions below.
68
+ coords = geometry.coordinates.flatten
69
+ coords = coords.each_slice(2).to_a
70
+ f = RGeo::Geos.factory
71
+ points = []
72
+ coords.each { |x, y| points << f.point(x, y) }
73
+ geometry = f.multi_point(points)
74
+ end
75
+
59
76
  {
60
77
  spatial_display: Helpers::TranslateSpatialCoverage.geojson_to_spatial_display_str([geometry]),
61
78
  spatial_index: Helpers::TranslateSpatialCoverage.geojson_to_spatial_index_str([geometry]),
@@ -8,7 +8,6 @@ module SearchSolrTools
8
8
  module Translators
9
9
  # Translates GTN-P json to solr json format
10
10
  class GtnpJsonToSolr
11
- # rubocop:disable Metrics/MethodLength
12
11
  # rubocop:disable AbcSize
13
12
  def translate(json_doc, json_record)
14
13
  json_geo = json_doc['geo'].nil? ? json_doc['coordinates'] : json_doc['geo']['coordinates']
@@ -1,3 +1,3 @@
1
1
  module SearchSolrTools
2
- VERSION = '3.7.1'
2
+ VERSION = '3.8.0'
3
3
  end
@@ -40,7 +40,7 @@ Gem::Specification.new do |spec|
40
40
  spec.add_development_dependency 'guard-rubocop', '~> 1.2'
41
41
  spec.add_development_dependency 'rake', '~> 10.4'
42
42
  spec.add_development_dependency 'rspec', '~> 3.2'
43
- spec.add_development_dependency 'rubocop', '~> 0.32'
43
+ spec.add_development_dependency 'rubocop', '~> 0.32.1'
44
44
  spec.add_development_dependency 'webmock', '~> 1.13'
45
45
  spec.add_development_dependency 'listen', '3.0.5'
46
46
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: search_solr_tools
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.7.1
4
+ version: 3.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chris Chalstrom
@@ -12,7 +12,7 @@ authors:
12
12
  autorequire:
13
13
  bindir: bin
14
14
  cert_chain: []
15
- date: 2016-05-18 00:00:00.000000000 Z
15
+ date: 2017-03-28 00:00:00.000000000 Z
16
16
  dependencies:
17
17
  - !ruby/object:Gem::Dependency
18
18
  name: iso8601
@@ -230,14 +230,14 @@ dependencies:
230
230
  requirements:
231
231
  - - "~>"
232
232
  - !ruby/object:Gem::Version
233
- version: '0.32'
233
+ version: 0.32.1
234
234
  type: :development
235
235
  prerelease: false
236
236
  version_requirements: !ruby/object:Gem::Requirement
237
237
  requirements:
238
238
  - - "~>"
239
239
  - !ruby/object:Gem::Version
240
- version: '0.32'
240
+ version: 0.32.1
241
241
  - !ruby/object:Gem::Dependency
242
242
  name: webmock
243
243
  requirement: !ruby/object:Gem::Requirement
@@ -288,11 +288,11 @@ files:
288
288
  - lib/search_solr_tools.rb
289
289
  - lib/search_solr_tools/config/environments.rb
290
290
  - lib/search_solr_tools/config/environments.yaml
291
+ - lib/search_solr_tools/harvesters/adc.rb
291
292
  - lib/search_solr_tools/harvesters/ade_auto_suggest.rb
292
293
  - lib/search_solr_tools/harvesters/auto_suggest.rb
293
294
  - lib/search_solr_tools/harvesters/base.rb
294
295
  - lib/search_solr_tools/harvesters/bcodmo.rb
295
- - lib/search_solr_tools/harvesters/cisl.rb
296
296
  - lib/search_solr_tools/harvesters/data_one.rb
297
297
  - lib/search_solr_tools/harvesters/echo.rb
298
298
  - lib/search_solr_tools/harvesters/eol.rb
@@ -325,7 +325,7 @@ files:
325
325
  - lib/search_solr_tools/helpers/translate_spatial_coverage.rb
326
326
  - lib/search_solr_tools/helpers/translate_temporal_coverage.rb
327
327
  - lib/search_solr_tools/helpers/usgs_format.rb
328
- - lib/search_solr_tools/selectors/cisl.rb
328
+ - lib/search_solr_tools/selectors/adc.rb
329
329
  - lib/search_solr_tools/selectors/data_one.rb
330
330
  - lib/search_solr_tools/selectors/echo_iso.rb
331
331
  - lib/search_solr_tools/selectors/ices_iso.rb
@@ -363,7 +363,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
363
363
  version: '0'
364
364
  requirements: []
365
365
  rubyforge_project:
366
- rubygems_version: 2.4.5
366
+ rubygems_version: 2.4.8
367
367
  signing_key:
368
368
  specification_version: 4
369
369
  summary: Tools to harvest and manage various scientific dataset feeds in a Solr instance.
@@ -1,65 +0,0 @@
1
-
2
- module SearchSolrTools
3
- module Harvesters
4
- # Harvests data from CISL and inserts it into Solr after it has been translated
5
- class Cisl < Oai
6
- def initialize(env = 'development', die_on_failure = false)
7
- super
8
- @data_centers = Helpers::SolrFormat::DATA_CENTER_NAMES[:CISL][:long_name]
9
- @translator = Helpers::IsoToSolr.new :cisl
10
-
11
- # Used in query string params, resumptionToken
12
- @dataset = '0bdd2d39-3493-4fa2-98f9-6766596bdc50'
13
- end
14
-
15
- def metadata_url
16
- SolrEnvironments[@environment][:cisl_url]
17
- end
18
-
19
- def results
20
- list_records_oai_response = get_results(request_string, '//oai:ListRecords', '')
21
-
22
- @resumption_token = list_records_oai_response.xpath('.//oai:resumptionToken', Helpers::IsoNamespaces.namespaces)
23
- @resumption_token = format_resumption_token(@resumption_token.first.text)
24
-
25
- list_records_oai_response.xpath('.//oai:record', Helpers::IsoNamespaces.namespaces)
26
- end
27
-
28
- private
29
-
30
- def request_params
31
- {
32
- verb: 'ListRecords',
33
- metadataPrefix: 'dif',
34
- set: @dataset,
35
- resumptionToken: @resumption_token
36
- }.delete_if { |_k, v| v.nil? }
37
- end
38
-
39
- # The ruby response is lacking quotes, which the token requires in order to work...
40
- # Also, the response back seems to be inconsistent - sometimes it adds &quot; instead of '"',
41
- # which makes the token fail to work.
42
- # To get around this I'd prefer to make assumptions about the token and let it break if
43
- # they change the formatting. For now, all fields other than offset should be able to be
44
- # assumed to remain constant.
45
- # glewis 2016-01-15: It broke, offset has quotes around it, so I updated the regex to account for
46
- # the possibility, including '"' or '&quot;'
47
- # If the input is empty, then we are done - return an empty string, which is checked for
48
- # in the harvest loop.
49
- def format_resumption_token(resumption_token)
50
- return '' if resumption_token.empty?
51
-
52
- resumption_token =~ /offset(?:"|&quot;)?:(\d+)/
53
- offset = Regexp.last_match(1)
54
-
55
- {
56
- from: nil,
57
- until: nil,
58
- set: @dataset,
59
- metadataPrefix: 'dif',
60
- offset: offset
61
- }.to_json
62
- end
63
- end
64
- end
65
- end
@@ -1,112 +0,0 @@
1
- require 'search_solr_tools'
2
-
3
- module SearchSolrTools
4
- module Selectors
5
- # The hash contains keys that should map to the fields in the solr schema,
6
- # the keys are called selectors and are in charge of selecting the nodes
7
- # from the ISO document, applying the default value if none of the xpaths
8
- # resolved to a value and formatting the field. xpaths and multivalue are
9
- # required, default_value, format, and reduce are optional.
10
- #
11
- # reduce takes the formatted result of multiple nodes and produces a single
12
- # result. This is for fields that are not multivalued, but their value
13
- # should consider information from all the nodes (for example, storing
14
- # only the maximum duration from multiple temporal coverage fields, taking
15
- # the sum of multiple spatial areas)
16
- CISL = {
17
- authoritative_id: {
18
- xpaths: ['.//oai:header/oai:identifier'],
19
- multivalue: false
20
- },
21
- title: {
22
- xpaths: ['.//dif:Entry_Title'],
23
- multivalue: false
24
- },
25
- summary: {
26
- xpaths: ['.//dif:Summary/dif:Abstract'],
27
- multivalue: false
28
- },
29
- data_centers: {
30
- xpaths: [''],
31
- default_values: [SearchSolrTools::Helpers::SolrFormat::DATA_CENTER_NAMES[:CISL][:long_name]],
32
- multivalue: false
33
- },
34
- authors: {
35
- xpaths: [''],
36
- multivalue: true
37
- },
38
- keywords: {
39
- xpaths: [
40
- './/dif:Parameters/dif:Category',
41
- './/dif:Parameters/dif:Topic',
42
- './/dif:Parameters/dif:Term',
43
- './/dif:Parameters/dif:Variable_Level_1'
44
- ].reverse,
45
- multivalue: true
46
- },
47
- last_revision_date: {
48
- xpaths: ['.//dif:Last_DIF_Revision_Date'],
49
- default_values: [SearchSolrTools::Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
50
- multivalue: false,
51
- format: SearchSolrTools::Helpers::SolrFormat::DATE
52
- },
53
- dataset_url: {
54
- xpaths: ['.//dif:Related_URL/dif:URL'],
55
- multivalue: false
56
- },
57
- spatial_coverages: {
58
- xpaths: ['.//dif:Spatial_Coverage'],
59
- multivalue: true,
60
- format: SearchSolrTools::Helpers::IsoToSolrFormat::SPATIAL_DISPLAY
61
- },
62
- spatial: {
63
- xpaths: ['.//dif:Spatial_Coverage'],
64
- multivalue: true,
65
- format: SearchSolrTools::Helpers::IsoToSolrFormat::SPATIAL_INDEX
66
- },
67
- spatial_area: {
68
- xpaths: ['.//dif:Spatial_Coverage'],
69
- multivalue: false,
70
- reduce: SearchSolrTools::Helpers::IsoToSolrFormat::MAX_SPATIAL_AREA,
71
- format: SearchSolrTools::Helpers::IsoToSolrFormat::SPATIAL_AREA
72
- },
73
- temporal: {
74
- xpaths: ['.//dif:Temporal_Coverage'],
75
- multivalue: true,
76
- format: SearchSolrTools::Helpers::IsoToSolrFormat::TEMPORAL_INDEX_STRING
77
- },
78
- temporal_coverages: {
79
- xpaths: ['.//dif:Temporal_Coverage'],
80
- multivalue: true,
81
- format: SearchSolrTools::Helpers::IsoToSolrFormat::TEMPORAL_DISPLAY_STRING
82
- },
83
- temporal_duration: {
84
- xpaths: ['.//dif:Temporal_Coverage'],
85
- multivalue: false,
86
- reduce: SearchSolrTools::Helpers::SolrFormat::REDUCE_TEMPORAL_DURATION,
87
- format: SearchSolrTools::Helpers::IsoToSolrFormat::TEMPORAL_DURATION
88
- },
89
- source: {
90
- xpaths: [''],
91
- default_values: ['ADE'],
92
- multivalue: false
93
- },
94
- facet_data_center: {
95
- xpaths: [''],
96
- default_values: ["#{SearchSolrTools::Helpers::SolrFormat::DATA_CENTER_NAMES[:CISL][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:CISL][:short_name]}"],
97
- multivalue: false
98
- },
99
- facet_spatial_scope: {
100
- xpaths: ['.//dif:Spatial_Coverage'],
101
- multivalue: true,
102
- format: SearchSolrTools::Helpers::IsoToSolrFormat::FACET_SPATIAL_SCOPE
103
- },
104
- facet_temporal_duration: {
105
- xpaths: ['.//dif:Temporal_Coverage'],
106
- default_values: [SearchSolrTools::Helpers::SolrFormat::NOT_SPECIFIED],
107
- format: SearchSolrTools::Helpers::IsoToSolrFormat::FACET_TEMPORAL_DURATION,
108
- multivalue: true
109
- }
110
- }
111
- end
112
- end