search_solr_tools 3.7.1 → 3.8.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 187e12f711b38e3274387d3575728d8d5250bf48
4
- data.tar.gz: db79a42d420a46430dcf32e4bf442bf790d0d18b
3
+ metadata.gz: 2c338059d0ec2049e415517a2e6b9b89df18cdd1
4
+ data.tar.gz: 3cb8c2c60e269ec9b66c305730522b7da3ed8243
5
5
  SHA512:
6
- metadata.gz: a69bcf39d6c09009d399cd2d029f8691ed6d00b3760a217f7ed7c5324c4171d0332b6ed2982c12caa9f663bc489a4edd5ce5ce0a8717a8dde2df78cdf2215c5c
7
- data.tar.gz: 613a293d8808099b554551932c37b8c3a709cdbb94488c61cd4f65bd5d0737ba7cd30537e217beccb032d3792195759496e785f4f1ffeb6560e1f1a0560ca7e0
6
+ metadata.gz: 5334498934992c587a414e3672f98130b4125d7013035a1d35cbd335eecf248a5438696255da3f456fd51b6f7fef48b3e79ecd199a5af9200cc06c4aca7f563d
7
+ data.tar.gz: 303c74385390406d79ff379efbc44a22405427d512d8e369a685f851cc50a77d54dc07229eb105529059e06f3b5f4fb4b7a70e1902a3817f9ccad424c2a9d71d
data/CHANGELOG.md CHANGED
@@ -1,24 +1,85 @@
1
- ## v3.3.3 (2015-05-10)
1
+ ## v3.8.0 (2017-03-28)
2
+
3
+ Changes
4
+
5
+ - Change ECHO harvester to harvest 100 records at a time, rather than 1000 to
6
+ avoid timeout/hanging issues with the large requests.
7
+ - Change "CISL"/ACADIS Gateway harvester to "NSF Arctic Data Center";
8
+ aoncadis.org redirects to another site, and the data center's name was
9
+ changed. The feed format was also changed; the harvester was updated to
10
+ consume the new feed.
11
+
12
+ Bugfixes
13
+
14
+ - Update NODC feed URL to use https.
15
+ - Update RDA feed URL to use https.
16
+ - Update handling of geometries to match new format provided by BCO-DMO feed.
17
+ - Update NMI feed URL; the feed was relocated.
18
+ - Harvesting tDAR starts from record 0 instead of record 1.
19
+ - tDAR harvester no longer attempts to obtain another page of records after
20
+ all the records have been harvested; where other feeds return an empty
21
+ response that our harvester handles without issue, tDAR throws an error if
22
+ the "startRecord" parameter is higher than their last record.
23
+ - Exit with a non-0 status when a problem with the whole feed is encountered,
24
+ even if `--die-on-failure` is not passed. That flag should only cause
25
+ failures when there are issues with individual records; we don't want
26
+ harvesting to stop due to a metadata issue with a small number of
27
+ records.
28
+ - Include BCO-DMO URL in the harvester output the same way all the other URLs
29
+ are displayed.
30
+
31
+ ## v3.7.1 (2016-05-18)
32
+
33
+ - RuboCop fixes.
34
+
35
+ ## v3.7.0 (2016-05-18)
2
36
 
3
37
  New Features
4
38
 
39
+ - Add sponsored programs to NSIDC harvesting.
5
40
  - Add support for ingesting Data Access Links from NSIDC JSON
6
41
 
42
+ Bugfixes
43
+
44
+ - Fix dependency issue with gem "listen".
45
+ - Fix bad configuration for OAI feed URLs.
46
+
47
+ ## v3.5.1 (2016-02-15)
48
+
49
+ Bugfixes
50
+
51
+ - Add temporal duration facet for GTN-P data center.
52
+
53
+ ## v3.5.0 (2016-02-11)
54
+
55
+ Changes
56
+
57
+ - Update long name for GTN-P data center.
58
+
59
+ ## v3.4.0 (2016-02-11)
60
+
61
+ New Features
62
+
63
+ - Add harvester for GTN-P.
64
+
65
+ ## v3.3.4 (2016-02-08)
66
+
67
+ See v3.4.0.
7
68
 
8
- ## v3.3.2
69
+ ## v3.3.3 (2016-01-14)
9
70
 
10
71
  Bugfix
11
72
 
12
73
  - Added quote checking for cisl offset parsing check
13
74
 
14
- ## v3.3.1
75
+ ## v3.3.1 (2015-09-25)
15
76
 
16
77
  Bugfix
17
78
 
18
79
  - Remove strange facet string for temporal duration from NOAA Paleo search
19
80
  results.
20
81
 
21
- ## v3.3.0
82
+ ## v3.3.0 (2015-09-24)
22
83
 
23
84
  New Features
24
85
 
@@ -19,11 +19,11 @@ class SolrHarvestCLI < Thor
19
19
  puts target
20
20
  begin
21
21
  harvest_class = get_harvester_class(target)
22
- harvester = harvest_class.new options[:environment], die_on_failure
22
+ harvester = harvest_class.new(options[:environment], die_on_failure)
23
23
  harvester.harvest_and_delete
24
24
  rescue => e
25
25
  puts "harvest failed for #{target}: #{e.message}"
26
- raise e if die_on_failure
26
+ raise e
27
27
  end
28
28
  end
29
29
  end
@@ -62,11 +62,10 @@ class SolrHarvestCLI < Thor
62
62
  end
63
63
 
64
64
  no_tasks do
65
- # rubocop: disable MethodLength
66
65
  def harvester_map
67
66
  {
68
67
  'bco_dmo' => SearchSolrTools::Harvesters::BcoDmo,
69
- 'cisl' => SearchSolrTools::Harvesters::Cisl,
68
+ 'adc' => SearchSolrTools::Harvesters::Adc,
70
69
  'data_one' => SearchSolrTools::Harvesters::DataOne,
71
70
  'echo' => SearchSolrTools::Harvesters::Echo,
72
71
  'eol' => SearchSolrTools::Harvesters::Eol,
@@ -85,11 +84,10 @@ class SolrHarvestCLI < Thor
85
84
  'ade_auto_suggest' => SearchSolrTools::Harvesters::AdeAutoSuggest
86
85
  }
87
86
  end
88
- # rubocop: enable MethodLength
89
87
 
90
88
  def get_harvester_class(data_center_name)
91
89
  name = data_center_name.downcase.to_s
92
- raise("Invalid data center #{name}") unless harvester_map.key?(name)
90
+ fail("Invalid data center #{name}") unless harvester_map.key?(name)
93
91
 
94
92
  harvester_map[name]
95
93
  end
@@ -4,7 +4,7 @@
4
4
  :collection_path: solr
5
5
  :port: 8983
6
6
  :bcodmo_url: http://www.bco-dmo.org/nsidc/arctic-deployments.json
7
- :cisl_url: https://www.aoncadis.org/oai/repository
7
+ :adc_url: https://arcticdata.io/metacat/d1/mn/v2/query/solr/
8
8
  :data_one_url: https://cn.dataone.org/cn/v1/query/solr/select?q=northBoundCoord:%5B45.0%20TO%2090.0%5D
9
9
  :echo_url: https://api.echo.nasa.gov/catalog-rest/echo_catalog/datasets.echo10
10
10
  :gtnp:
@@ -12,10 +12,10 @@
12
12
  - http://www.gtnpdatabase.org/rest/activelayers/json
13
13
  :ices_url: http://geo.ices.dk/geonetwork/srv/en/csw
14
14
  :ncdc_paleo_url: http://gis.ncdc.noaa.gov/gptpaleo/csw
15
- :nmi_url: http://access.met.no/metamod/oai
16
- :nodc_url: http://data.nodc.noaa.gov/geoportal/csw
15
+ :nmi_url: http://arcticdata.met.no/metamod/oai
16
+ :nodc_url: https://data.nodc.noaa.gov/geoportal/csw
17
17
  :pdc_url: http://www.polardata.ca/oai/provider
18
- :rda_url: http://rda.ucar.edu/cgi-bin/oai
18
+ :rda_url: https://rda.ucar.edu/cgi-bin/oai
19
19
  :tdar_url: http://core.tdar.org/search/rss
20
20
  :usgs_url: https://www.sciencebase.gov/catalog/item/527cf4ede4b0850ea05182ee/csw
21
21
  :eol:
@@ -0,0 +1,47 @@
1
+ module SearchSolrTools
2
+ module Harvesters
3
+ class Adc < Base
4
+ def initialize(env = 'development', die_on_failure = false)
5
+ super
6
+ @page_size = 250
7
+ @translator = Helpers::IsoToSolr.new :adc
8
+ end
9
+
10
+ def harvest_and_delete
11
+ puts "Running harvest of adc catalog from #{metadata_url}"
12
+ super(method(:harvest_adc_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:ADC][:long_name]}\"")
13
+ end
14
+
15
+ def harvest_adc_into_solr
16
+ start = 0
17
+ while (entries = get_results_from_adc(start)) && (entries.length > 0)
18
+ begin
19
+ insert_solr_docs(get_docs_with_translated_entries_from_adc(entries))
20
+ rescue => e
21
+ puts "ERROR: #{e}\n\n"
22
+ raise e if @die_on_failure
23
+ end
24
+ start += @page_size
25
+ end
26
+ end
27
+
28
+ def get_results_from_adc(start)
29
+ get_results(build_request(start, @page_size), './response/result/doc')
30
+ end
31
+
32
+ def metadata_url
33
+ SolrEnvironments[@environment][:adc_url]
34
+ end
35
+
36
+ def get_docs_with_translated_entries_from_adc(entries)
37
+ entries.map do |e|
38
+ create_new_solr_add_doc_with_child(@translator.translate(e).root)
39
+ end
40
+ end
41
+
42
+ def build_request(start = 0, max_records = 100)
43
+ "#{metadata_url}?q=*:*&start=#{start}&rows=#{max_records}"
44
+ end
45
+ end
46
+ end
47
+ end
@@ -81,7 +81,7 @@ module SearchSolrTools
81
81
  end
82
82
  puts "#{success} document#{success == 1 ? '' : 's'} successfully added to Solr."
83
83
  puts "#{failure} document#{failure == 1 ? '' : 's'} not added to Solr."
84
- raise 'Some documents failed to be inserted into Solr' if failure > 0
84
+ fail 'Some documents failed to be inserted into Solr' if failure > 0
85
85
  end
86
86
 
87
87
  def insert_solr_doc(doc, content_type = XML_CONTENT_TYPE, core = SolrEnvironments[@environment][:collection_name])
@@ -12,13 +12,18 @@ module SearchSolrTools
12
12
  end
13
13
 
14
14
  def harvest_and_delete
15
+ puts "Running harvest of BCO-DMO catalog from #{bcodmo_url}"
15
16
  super(method(:harvest_bcodmo_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:BCODMO][:long_name]}\"")
16
17
  end
17
18
 
19
+ def bcodmo_url
20
+ SolrEnvironments[@environment][:bcodmo_url]
21
+ end
22
+
18
23
  def harvest_bcodmo_into_solr
19
24
  result = translate_bcodmo
20
25
  insert_solr_docs result[:add_docs], Base::JSON_CONTENT_TYPE
21
- raise 'Failed to harvest some records from the provider' if result[:failure_ids].length > 0
26
+ fail 'Failed to harvest some records from the provider' if result[:failure_ids].length > 0
22
27
  end
23
28
 
24
29
  def translate_bcodmo
@@ -34,6 +39,7 @@ module SearchSolrTools
34
39
  end
35
40
 
36
41
  def request_json(url)
42
+ puts "Request: #{url}"
37
43
  JSON.parse(RestClient.get(url))
38
44
  end
39
45
 
@@ -4,7 +4,7 @@ module SearchSolrTools
4
4
  class Echo < Base
5
5
  def initialize(env = 'development', die_on_failure = false)
6
6
  super env, die_on_failure
7
- @page_size = 1000
7
+ @page_size = 100
8
8
  @translator = Helpers::IsoToSolr.new :echo
9
9
  end
10
10
 
@@ -37,9 +37,9 @@ module SearchSolrTools
37
37
  end
38
38
 
39
39
  def get_docs_with_translated_entries_from_echo(entries)
40
- docs = []
41
- entries.each { |r| docs.push(create_new_solr_add_doc_with_child(@translator.translate(r).root)) }
42
- docs
40
+ entries.map do |entry|
41
+ create_new_solr_add_doc_with_child(@translator.translate(entry).root)
42
+ end
43
43
  end
44
44
 
45
45
  def build_request(max_records = '25', page_num = '1')
@@ -22,7 +22,7 @@ module SearchSolrTools
22
22
  doc = open_xml_document(dataset)
23
23
  if doc.xpath('//xmlns:metadata').size > 1
24
24
  # THREDDS allows for a dataset of datasests, EOL should not utilize this
25
- raise "Complex dataset encountered at #{doc.xpath('//xmlns:catalog').to_html}"
25
+ fail "Complex dataset encountered at #{doc.xpath('//xmlns:catalog').to_html}"
26
26
  end
27
27
  metadata_doc = open_xml_document(doc.xpath('//xmlns:metadata')[0]['xlink:href'])
28
28
  { 'add' => { 'doc' => @translator.translate(doc, metadata_doc) } }
@@ -28,7 +28,7 @@ module SearchSolrTools
28
28
  def harvest_gtnp_into_solr
29
29
  result = translate_gtnp
30
30
  insert_solr_docs result[:add_docs], Base::JSON_CONTENT_TYPE
31
- raise 'Failed to harvest some records from the provider' if result[:failure_ids].length > 0
31
+ fail 'Failed to harvest some records from the provider' if result[:failure_ids].length > 0
32
32
  end
33
33
 
34
34
  def translate_gtnp
@@ -37,9 +37,9 @@ module SearchSolrTools
37
37
  end
38
38
 
39
39
  def get_docs_with_translated_entries_from_ices(entries)
40
- docs = []
41
- entries.each { |r| docs.push(create_new_solr_add_doc_with_child(@translator.translate(r).root)) }
42
- docs
40
+ entries.map do |entry|
41
+ create_new_solr_add_doc_with_child(@translator.translate(entry).root)
42
+ end
43
43
  end
44
44
 
45
45
  def build_csw_request(resultType = 'results', maxRecords = '25', startPosition = '1')
@@ -37,9 +37,9 @@ module SearchSolrTools
37
37
  end
38
38
 
39
39
  def get_docs_with_translated_entries_from_nodc(entries)
40
- docs = []
41
- entries.each { |r| docs.push(create_new_solr_add_doc_with_child(@translator.translate(r).root)) }
42
- docs
40
+ entries.map do |entry|
41
+ create_new_solr_add_doc_with_child(@translator.translate(entry).root)
42
+ end
43
43
  end
44
44
 
45
45
  def build_csw_request(resultType = 'results', maxRecords = '25', startPosition = '1')
@@ -23,7 +23,7 @@ module SearchSolrTools
23
23
  def harvest_nsidc_json_into_solr
24
24
  result = docs_with_translated_entries_from_nsidc
25
25
  insert_solr_docs result[:add_docs], Base::JSON_CONTENT_TYPE
26
- raise 'Failed to harvest and insert some authoritative IDs' if result[:failure_ids].length > 0
26
+ fail 'Failed to harvest and insert some authoritative IDs' if result[:failure_ids].length > 0
27
27
  end
28
28
 
29
29
  def nsidc_json_url
@@ -34,11 +34,11 @@ module SearchSolrTools
34
34
  end
35
35
 
36
36
  def results
37
- raise NotImplementedError
37
+ fail NotImplementedError
38
38
  end
39
39
 
40
40
  def metadata_url
41
- raise NotImplementedError
41
+ fail NotImplementedError
42
42
  end
43
43
 
44
44
  def translated_docs(entries)
@@ -48,7 +48,7 @@ module SearchSolrTools
48
48
  private
49
49
 
50
50
  def request_params
51
- raise NotImplementedError
51
+ fail NotImplementedError
52
52
  end
53
53
 
54
54
  def request_string
@@ -14,14 +14,22 @@ module SearchSolrTools
14
14
  end
15
15
 
16
16
  def harvest_tdar_into_solr
17
- start_record = 1
17
+ start_record = 0
18
+ total_harvested = 0
19
+ total_expected = total_results
18
20
  while (entries = get_results_from_tdar(start_record)) && (entries.length > 0)
19
21
  begin
20
- insert_solr_docs get_docs_with_translated_entries_from_tdar(entries)
22
+ insert_solr_docs(get_docs_with_translated_entries_from_tdar(entries))
21
23
  rescue => e
22
24
  puts "ERROR: #{e}\n\n"
23
25
  raise e if @die_on_failure
24
26
  end
27
+
28
+ # if we have all the records we expect, don't attempt another request;
29
+ # it would result in an error
30
+ total_harvested += entries.length
31
+ break if total_harvested >= total_expected
32
+
25
33
  start_record += @page_size
26
34
  end
27
35
  end
@@ -31,16 +39,16 @@ module SearchSolrTools
31
39
  end
32
40
 
33
41
  def get_results_from_tdar(start_record)
34
- get_results build_request(@page_size, start_record), './/atom:entry', 'application/xml'
42
+ get_results(build_request(@page_size, start_record), './/atom:entry', 'application/xml')
35
43
  end
36
44
 
37
45
  def get_docs_with_translated_entries_from_tdar(entries)
38
- docs = []
39
- entries.each { |r| docs.push(create_new_solr_add_doc_with_child(@translator.translate(r).root)) }
40
- docs
46
+ entries.map do |entry|
47
+ create_new_solr_add_doc_with_child(@translator.translate(entry).root)
48
+ end
41
49
  end
42
50
 
43
- def build_request(max_records = '25', start_record = '1')
51
+ def build_request(max_records = '25', start_record = '0')
44
52
  request_url = tdar_url + '?_tDAR.searchType=ACADIS_RSS&'\
45
53
  'resourceTypes=DATASET&'\
46
54
  'groups[0].latitudeLongitudeBoxes[0].maximumLongitude=180&'\
@@ -52,6 +60,10 @@ module SearchSolrTools
52
60
 
53
61
  request_url
54
62
  end
63
+
64
+ def total_results
65
+ get_results(build_request(0, 0), './/opensearch:totalResults').text.to_i
66
+ end
55
67
  end
56
68
  end
57
69
  end
@@ -37,9 +37,9 @@ module SearchSolrTools
37
37
  end
38
38
 
39
39
  def get_docs_with_translated_entries_from_usgs(entries)
40
- docs = []
41
- entries.each { |r| docs.push(create_new_solr_add_doc_with_child(@translator.translate(r).root)) }
42
- docs
40
+ entries.map do |entry|
41
+ create_new_solr_add_doc_with_child(@translator.translate(entry).root)
42
+ end
43
43
  end
44
44
 
45
45
  def build_csw_request(resultType = 'results', maxRecords = '25', startPosition = '1')
@@ -6,7 +6,7 @@ module SearchSolrTools
6
6
  # This hash grabs all the selector files inside the selectors directory,
7
7
  # to add a new source we need to create a selector file and add it to this hash.
8
8
  SELECTORS = {
9
- cisl: Selectors::CISL,
9
+ adc: Selectors::ADC,
10
10
  data_one: Selectors::DATA_ONE,
11
11
  echo: Selectors::ECHO,
12
12
  ices: Selectors::ICES,
@@ -10,7 +10,7 @@ module SearchSolrTools
10
10
  module SolrFormat
11
11
  DATA_CENTER_NAMES = {
12
12
  BCODMO: { short_name: 'BCO-DMO', long_name: 'Biological and Chemical Oceanography Data Management Office' },
13
- CISL: { short_name: 'ACADIS Gateway', long_name: 'Advanced Cooperative Arctic Data and Information Service' },
13
+ ADC: { short_name: 'NSF ADC', long_name: 'NSF Arctic Data Center' },
14
14
  DATA_ONE: { short_name: 'DataONE', long_name: 'DataONE' },
15
15
  ECHO: { short_name: 'NASA ECHO', long_name: 'NASA Earth Observing System (EOS) Clearing House (ECHO)' },
16
16
  EOL: { short_name: 'UCAR NCAR EOL', long_name: 'UCAR NCAR - Earth Observing Laboratory' },
@@ -134,7 +134,7 @@ module SearchSolrTools
134
134
  j = send(find_index_method, resolution['max_resolution'])
135
135
  return resolution_values[i..j]
136
136
  end
137
- raise "Invalid resolution #{resolution['type']}"
137
+ fail "Invalid resolution #{resolution['type']}"
138
138
  end
139
139
 
140
140
  def self.resolution_not_specified?(resolution)
@@ -0,0 +1,95 @@
1
+ require 'search_solr_tools'
2
+
3
+ module SearchSolrTools
4
+ module Selectors
5
+ ADC = {
6
+ authoritative_id: {
7
+ xpaths: ['.//str[@name="id"]'],
8
+ multivalue: false
9
+ },
10
+ title: {
11
+ xpaths: ['.//str[@name="title"]'],
12
+ multivalue: false
13
+ },
14
+ summary: {
15
+ xpaths: ['.//str[@name="abstract"]'],
16
+ multivalue: false
17
+ },
18
+ data_centers: {
19
+ xpaths: [''],
20
+ default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:ADC][:long_name]],
21
+ multivalue: false
22
+ },
23
+ authors: {
24
+ xpaths: ['.//str[@name="author"]'],
25
+ multivalue: false
26
+ },
27
+ keywords: {
28
+ xpaths: ['.//arr[@name="keywords"]/str'],
29
+ multivalue: true
30
+ },
31
+ last_revision_date: {
32
+ xpaths: ['.//date[@name="updateDate"]'],
33
+ default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
34
+ multivalue: false,
35
+ format: Helpers::SolrFormat::DATE
36
+ },
37
+ dataset_url: {
38
+ xpaths: ['.//str[@name="dataUrl"]'],
39
+ default_values: [''],
40
+ multivalue: false
41
+ },
42
+ spatial_coverages: {
43
+ xpaths: ['.'],
44
+ multivalue: false,
45
+ format: Helpers::DataOneFormat.method(:spatial_display)
46
+ },
47
+ spatial: {
48
+ xpaths: ['.'],
49
+ multivalue: false,
50
+ format: Helpers::DataOneFormat.method(:spatial_index)
51
+ },
52
+ spatial_area: {
53
+ xpaths: ['.'],
54
+ multivalue: false,
55
+ format: Helpers::DataOneFormat.method(:spatial_area)
56
+ },
57
+ temporal_coverages: {
58
+ xpaths: ['.'],
59
+ multivalue: false,
60
+ format: Helpers::DataOneFormat.method(:temporal_coverage)
61
+ },
62
+ temporal_duration: {
63
+ xpaths: ['.'],
64
+ multivalue: false,
65
+ format: Helpers::DataOneFormat.method(:temporal_duration)
66
+ },
67
+ temporal: {
68
+ xpaths: ['.'],
69
+ multivalue: false,
70
+ format: Helpers::DataOneFormat.method(:temporal_index_string)
71
+ },
72
+ source: {
73
+ xpaths: [''],
74
+ default_values: ['ADE'],
75
+ multivalue: false
76
+ },
77
+ facet_data_center: {
78
+ xpaths: [''],
79
+ default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:ADC][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:ADC][:short_name]}"],
80
+ multivalue: false
81
+ },
82
+ facet_spatial_scope: {
83
+ xpaths: ['.'],
84
+ multivalue: false,
85
+ format: Helpers::DataOneFormat.method(:facet_spatial_scope)
86
+ },
87
+ facet_temporal_duration: {
88
+ xpaths: ['.'],
89
+ default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
90
+ format: Helpers::DataOneFormat.method(:facet_temporal_duration),
91
+ multivalue: false
92
+ }
93
+ }
94
+ end
95
+ end
@@ -50,12 +50,29 @@ module SearchSolrTools
50
50
  end
51
51
 
52
52
  def translate_geometry(wkt_geom)
53
- wkt_geom['geometry'].sub! '<http://www.opengis.net/def/crs/OGC/1.3/CRS84> ', ''
54
- # Consider all linestring and polygon geometries to be multipoint for this provider
55
- wkt_geom['geometry'].sub! 'LINESTRING', 'MULTIPOINT'
56
- wkt_geom['geometry'].sub! 'POLYGON', 'MULTIPOINT'
57
- parser = RGeo::WKRep::WKTParser.new(nil, {})
58
- geometry = parser.parse(wkt_geom['geometry'])
53
+ if wkt_geom['geometry']['type'] == 'LineString'
54
+ wkt_geom['geometry']['type'] = 'MultiPoint'
55
+ end
56
+ geometry = RGeo::GeoJSON.decode(wkt_geom).geometry
57
+ geometry = RGeo::Feature.cast(geometry, RGeo::Feature::MultiPoint)
58
+
59
+ # This feed sometimes returns MultiLineString but wrongly calls them 'LineString'
60
+ # If the above fails, we assume this is why. If the feed gets fixed, this code
61
+ # should still handle that.
62
+ if geometry.nil? || geometry.num_geometries == 0
63
+ # Try to decode as an actual MultiLineString.
64
+ wkt_geom['geometry']['type'] = 'MultiLineString'
65
+ geometry = RGeo::GeoJSON.decode(wkt_geom).geometry
66
+
67
+ # Convert to a MultiPoint, for passing into the helper functions below.
68
+ coords = geometry.coordinates.flatten
69
+ coords = coords.each_slice(2).to_a
70
+ f = RGeo::Geos.factory
71
+ points = []
72
+ coords.each { |x, y| points << f.point(x, y) }
73
+ geometry = f.multi_point(points)
74
+ end
75
+
59
76
  {
60
77
  spatial_display: Helpers::TranslateSpatialCoverage.geojson_to_spatial_display_str([geometry]),
61
78
  spatial_index: Helpers::TranslateSpatialCoverage.geojson_to_spatial_index_str([geometry]),
@@ -8,7 +8,6 @@ module SearchSolrTools
8
8
  module Translators
9
9
  # Translates GTN-P json to solr json format
10
10
  class GtnpJsonToSolr
11
- # rubocop:disable Metrics/MethodLength
12
11
  # rubocop:disable AbcSize
13
12
  def translate(json_doc, json_record)
14
13
  json_geo = json_doc['geo'].nil? ? json_doc['coordinates'] : json_doc['geo']['coordinates']
@@ -1,3 +1,3 @@
1
1
  module SearchSolrTools
2
- VERSION = '3.7.1'
2
+ VERSION = '3.8.0'
3
3
  end
@@ -40,7 +40,7 @@ Gem::Specification.new do |spec|
40
40
  spec.add_development_dependency 'guard-rubocop', '~> 1.2'
41
41
  spec.add_development_dependency 'rake', '~> 10.4'
42
42
  spec.add_development_dependency 'rspec', '~> 3.2'
43
- spec.add_development_dependency 'rubocop', '~> 0.32'
43
+ spec.add_development_dependency 'rubocop', '~> 0.32.1'
44
44
  spec.add_development_dependency 'webmock', '~> 1.13'
45
45
  spec.add_development_dependency 'listen', '3.0.5'
46
46
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: search_solr_tools
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.7.1
4
+ version: 3.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chris Chalstrom
@@ -12,7 +12,7 @@ authors:
12
12
  autorequire:
13
13
  bindir: bin
14
14
  cert_chain: []
15
- date: 2016-05-18 00:00:00.000000000 Z
15
+ date: 2017-03-28 00:00:00.000000000 Z
16
16
  dependencies:
17
17
  - !ruby/object:Gem::Dependency
18
18
  name: iso8601
@@ -230,14 +230,14 @@ dependencies:
230
230
  requirements:
231
231
  - - "~>"
232
232
  - !ruby/object:Gem::Version
233
- version: '0.32'
233
+ version: 0.32.1
234
234
  type: :development
235
235
  prerelease: false
236
236
  version_requirements: !ruby/object:Gem::Requirement
237
237
  requirements:
238
238
  - - "~>"
239
239
  - !ruby/object:Gem::Version
240
- version: '0.32'
240
+ version: 0.32.1
241
241
  - !ruby/object:Gem::Dependency
242
242
  name: webmock
243
243
  requirement: !ruby/object:Gem::Requirement
@@ -288,11 +288,11 @@ files:
288
288
  - lib/search_solr_tools.rb
289
289
  - lib/search_solr_tools/config/environments.rb
290
290
  - lib/search_solr_tools/config/environments.yaml
291
+ - lib/search_solr_tools/harvesters/adc.rb
291
292
  - lib/search_solr_tools/harvesters/ade_auto_suggest.rb
292
293
  - lib/search_solr_tools/harvesters/auto_suggest.rb
293
294
  - lib/search_solr_tools/harvesters/base.rb
294
295
  - lib/search_solr_tools/harvesters/bcodmo.rb
295
- - lib/search_solr_tools/harvesters/cisl.rb
296
296
  - lib/search_solr_tools/harvesters/data_one.rb
297
297
  - lib/search_solr_tools/harvesters/echo.rb
298
298
  - lib/search_solr_tools/harvesters/eol.rb
@@ -325,7 +325,7 @@ files:
325
325
  - lib/search_solr_tools/helpers/translate_spatial_coverage.rb
326
326
  - lib/search_solr_tools/helpers/translate_temporal_coverage.rb
327
327
  - lib/search_solr_tools/helpers/usgs_format.rb
328
- - lib/search_solr_tools/selectors/cisl.rb
328
+ - lib/search_solr_tools/selectors/adc.rb
329
329
  - lib/search_solr_tools/selectors/data_one.rb
330
330
  - lib/search_solr_tools/selectors/echo_iso.rb
331
331
  - lib/search_solr_tools/selectors/ices_iso.rb
@@ -363,7 +363,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
363
363
  version: '0'
364
364
  requirements: []
365
365
  rubyforge_project:
366
- rubygems_version: 2.4.5
366
+ rubygems_version: 2.4.8
367
367
  signing_key:
368
368
  specification_version: 4
369
369
  summary: Tools to harvest and manage various scientific dataset feeds in a Solr instance.
@@ -1,65 +0,0 @@
1
-
2
- module SearchSolrTools
3
- module Harvesters
4
- # Harvests data from CISL and inserts it into Solr after it has been translated
5
- class Cisl < Oai
6
- def initialize(env = 'development', die_on_failure = false)
7
- super
8
- @data_centers = Helpers::SolrFormat::DATA_CENTER_NAMES[:CISL][:long_name]
9
- @translator = Helpers::IsoToSolr.new :cisl
10
-
11
- # Used in query string params, resumptionToken
12
- @dataset = '0bdd2d39-3493-4fa2-98f9-6766596bdc50'
13
- end
14
-
15
- def metadata_url
16
- SolrEnvironments[@environment][:cisl_url]
17
- end
18
-
19
- def results
20
- list_records_oai_response = get_results(request_string, '//oai:ListRecords', '')
21
-
22
- @resumption_token = list_records_oai_response.xpath('.//oai:resumptionToken', Helpers::IsoNamespaces.namespaces)
23
- @resumption_token = format_resumption_token(@resumption_token.first.text)
24
-
25
- list_records_oai_response.xpath('.//oai:record', Helpers::IsoNamespaces.namespaces)
26
- end
27
-
28
- private
29
-
30
- def request_params
31
- {
32
- verb: 'ListRecords',
33
- metadataPrefix: 'dif',
34
- set: @dataset,
35
- resumptionToken: @resumption_token
36
- }.delete_if { |_k, v| v.nil? }
37
- end
38
-
39
- # The ruby response is lacking quotes, which the token requires in order to work...
40
- # Also, the response back seems to be inconsistent - sometimes it adds &quot; instead of '"',
41
- # which makes the token fail to work.
42
- # To get around this I'd prefer to make assumptions about the token and let it break if
43
- # they change the formatting. For now, all fields other than offset should be able to be
44
- # assumed to remain constant.
45
- # glewis 2016-01-15: It broke, offset has quotes around it, so I updated the regex to account for
46
- # the possibility, including '"' or '&quot;'
47
- # If the input is empty, then we are done - return an empty string, which is checked for
48
- # in the harvest loop.
49
- def format_resumption_token(resumption_token)
50
- return '' if resumption_token.empty?
51
-
52
- resumption_token =~ /offset(?:"|&quot;)?:(\d+)/
53
- offset = Regexp.last_match(1)
54
-
55
- {
56
- from: nil,
57
- until: nil,
58
- set: @dataset,
59
- metadataPrefix: 'dif',
60
- offset: offset
61
- }.to_json
62
- end
63
- end
64
- end
65
- end
@@ -1,112 +0,0 @@
1
- require 'search_solr_tools'
2
-
3
- module SearchSolrTools
4
- module Selectors
5
- # The hash contains keys that should map to the fields in the solr schema,
6
- # the keys are called selectors and are in charge of selecting the nodes
7
- # from the ISO document, applying the default value if none of the xpaths
8
- # resolved to a value and formatting the field. xpaths and multivalue are
9
- # required, default_value, format, and reduce are optional.
10
- #
11
- # reduce takes the formatted result of multiple nodes and produces a single
12
- # result. This is for fields that are not multivalued, but their value
13
- # should consider information from all the nodes (for example, storing
14
- # only the maximum duration from multiple temporal coverage fields, taking
15
- # the sum of multiple spatial areas)
16
- CISL = {
17
- authoritative_id: {
18
- xpaths: ['.//oai:header/oai:identifier'],
19
- multivalue: false
20
- },
21
- title: {
22
- xpaths: ['.//dif:Entry_Title'],
23
- multivalue: false
24
- },
25
- summary: {
26
- xpaths: ['.//dif:Summary/dif:Abstract'],
27
- multivalue: false
28
- },
29
- data_centers: {
30
- xpaths: [''],
31
- default_values: [SearchSolrTools::Helpers::SolrFormat::DATA_CENTER_NAMES[:CISL][:long_name]],
32
- multivalue: false
33
- },
34
- authors: {
35
- xpaths: [''],
36
- multivalue: true
37
- },
38
- keywords: {
39
- xpaths: [
40
- './/dif:Parameters/dif:Category',
41
- './/dif:Parameters/dif:Topic',
42
- './/dif:Parameters/dif:Term',
43
- './/dif:Parameters/dif:Variable_Level_1'
44
- ].reverse,
45
- multivalue: true
46
- },
47
- last_revision_date: {
48
- xpaths: ['.//dif:Last_DIF_Revision_Date'],
49
- default_values: [SearchSolrTools::Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
50
- multivalue: false,
51
- format: SearchSolrTools::Helpers::SolrFormat::DATE
52
- },
53
- dataset_url: {
54
- xpaths: ['.//dif:Related_URL/dif:URL'],
55
- multivalue: false
56
- },
57
- spatial_coverages: {
58
- xpaths: ['.//dif:Spatial_Coverage'],
59
- multivalue: true,
60
- format: SearchSolrTools::Helpers::IsoToSolrFormat::SPATIAL_DISPLAY
61
- },
62
- spatial: {
63
- xpaths: ['.//dif:Spatial_Coverage'],
64
- multivalue: true,
65
- format: SearchSolrTools::Helpers::IsoToSolrFormat::SPATIAL_INDEX
66
- },
67
- spatial_area: {
68
- xpaths: ['.//dif:Spatial_Coverage'],
69
- multivalue: false,
70
- reduce: SearchSolrTools::Helpers::IsoToSolrFormat::MAX_SPATIAL_AREA,
71
- format: SearchSolrTools::Helpers::IsoToSolrFormat::SPATIAL_AREA
72
- },
73
- temporal: {
74
- xpaths: ['.//dif:Temporal_Coverage'],
75
- multivalue: true,
76
- format: SearchSolrTools::Helpers::IsoToSolrFormat::TEMPORAL_INDEX_STRING
77
- },
78
- temporal_coverages: {
79
- xpaths: ['.//dif:Temporal_Coverage'],
80
- multivalue: true,
81
- format: SearchSolrTools::Helpers::IsoToSolrFormat::TEMPORAL_DISPLAY_STRING
82
- },
83
- temporal_duration: {
84
- xpaths: ['.//dif:Temporal_Coverage'],
85
- multivalue: false,
86
- reduce: SearchSolrTools::Helpers::SolrFormat::REDUCE_TEMPORAL_DURATION,
87
- format: SearchSolrTools::Helpers::IsoToSolrFormat::TEMPORAL_DURATION
88
- },
89
- source: {
90
- xpaths: [''],
91
- default_values: ['ADE'],
92
- multivalue: false
93
- },
94
- facet_data_center: {
95
- xpaths: [''],
96
- default_values: ["#{SearchSolrTools::Helpers::SolrFormat::DATA_CENTER_NAMES[:CISL][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:CISL][:short_name]}"],
97
- multivalue: false
98
- },
99
- facet_spatial_scope: {
100
- xpaths: ['.//dif:Spatial_Coverage'],
101
- multivalue: true,
102
- format: SearchSolrTools::Helpers::IsoToSolrFormat::FACET_SPATIAL_SCOPE
103
- },
104
- facet_temporal_duration: {
105
- xpaths: ['.//dif:Temporal_Coverage'],
106
- default_values: [SearchSolrTools::Helpers::SolrFormat::NOT_SPECIFIED],
107
- format: SearchSolrTools::Helpers::IsoToSolrFormat::FACET_TEMPORAL_DURATION,
108
- multivalue: true
109
- }
110
- }
111
- end
112
- end