search_solr_tools 6.1.0 → 6.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +11 -2
  3. data/bin/search_solr_tools +5 -17
  4. data/lib/search_solr_tools/config/environments.rb +3 -1
  5. data/lib/search_solr_tools/config/environments.yaml +0 -32
  6. data/lib/search_solr_tools/errors/harvest_error.rb +44 -31
  7. data/lib/search_solr_tools/harvesters/auto_suggest.rb +5 -3
  8. data/lib/search_solr_tools/harvesters/base.rb +21 -20
  9. data/lib/search_solr_tools/harvesters/nsidc_auto_suggest.rb +7 -5
  10. data/lib/search_solr_tools/harvesters/nsidc_json.rb +9 -8
  11. data/lib/search_solr_tools/helpers/bounding_box_util.rb +8 -8
  12. data/lib/search_solr_tools/helpers/facet_configuration.rb +3 -1
  13. data/lib/search_solr_tools/helpers/harvest_status.rb +10 -8
  14. data/lib/search_solr_tools/helpers/iso_namespaces.rb +3 -1
  15. data/lib/search_solr_tools/helpers/solr_format.rb +25 -45
  16. data/lib/search_solr_tools/helpers/translate_spatial_coverage.rb +13 -10
  17. data/lib/search_solr_tools/helpers/translate_temporal_coverage.rb +2 -0
  18. data/lib/search_solr_tools/translators/nsidc_json.rb +48 -44
  19. data/lib/search_solr_tools/version.rb +3 -1
  20. data/lib/search_solr_tools.rb +3 -2
  21. metadata +3 -45
  22. data/lib/search_solr_tools/harvesters/adc.rb +0 -49
  23. data/lib/search_solr_tools/harvesters/ade_auto_suggest.rb +0 -46
  24. data/lib/search_solr_tools/harvesters/bcodmo.rb +0 -64
  25. data/lib/search_solr_tools/harvesters/data_one.rb +0 -49
  26. data/lib/search_solr_tools/harvesters/echo.rb +0 -52
  27. data/lib/search_solr_tools/harvesters/eol.rb +0 -51
  28. data/lib/search_solr_tools/harvesters/gtnp.rb +0 -67
  29. data/lib/search_solr_tools/harvesters/ices.rb +0 -58
  30. data/lib/search_solr_tools/harvesters/ncdc_paleo.rb +0 -62
  31. data/lib/search_solr_tools/harvesters/nmi.rb +0 -34
  32. data/lib/search_solr_tools/harvesters/nodc.rb +0 -75
  33. data/lib/search_solr_tools/harvesters/oai.rb +0 -62
  34. data/lib/search_solr_tools/harvesters/pdc.rb +0 -40
  35. data/lib/search_solr_tools/harvesters/r2r.rb +0 -61
  36. data/lib/search_solr_tools/harvesters/rda.rb +0 -35
  37. data/lib/search_solr_tools/harvesters/tdar.rb +0 -71
  38. data/lib/search_solr_tools/harvesters/usgs.rb +0 -76
  39. data/lib/search_solr_tools/helpers/csw_iso_query_builder.rb +0 -29
  40. data/lib/search_solr_tools/helpers/data_one_format.rb +0 -74
  41. data/lib/search_solr_tools/helpers/iso_to_solr.rb +0 -97
  42. data/lib/search_solr_tools/helpers/iso_to_solr_format.rb +0 -197
  43. data/lib/search_solr_tools/helpers/ncdc_paleo_format.rb +0 -61
  44. data/lib/search_solr_tools/helpers/query_builder.rb +0 -13
  45. data/lib/search_solr_tools/helpers/r2r_format.rb +0 -25
  46. data/lib/search_solr_tools/helpers/selectors.rb +0 -22
  47. data/lib/search_solr_tools/helpers/tdar_format.rb +0 -70
  48. data/lib/search_solr_tools/helpers/usgs_format.rb +0 -50
  49. data/lib/search_solr_tools/selectors/adc.rb +0 -96
  50. data/lib/search_solr_tools/selectors/data_one.rb +0 -96
  51. data/lib/search_solr_tools/selectors/echo_iso.rb +0 -112
  52. data/lib/search_solr_tools/selectors/ices_iso.rb +0 -108
  53. data/lib/search_solr_tools/selectors/ncdc_paleo.rb +0 -90
  54. data/lib/search_solr_tools/selectors/nmi.rb +0 -107
  55. data/lib/search_solr_tools/selectors/nodc_iso.rb +0 -108
  56. data/lib/search_solr_tools/selectors/pdc_iso.rb +0 -109
  57. data/lib/search_solr_tools/selectors/r2r.rb +0 -115
  58. data/lib/search_solr_tools/selectors/rda.rb +0 -107
  59. data/lib/search_solr_tools/selectors/tdar_opensearch.rb +0 -91
  60. data/lib/search_solr_tools/selectors/usgs_iso.rb +0 -107
  61. data/lib/search_solr_tools/translators/bcodmo_json.rb +0 -89
  62. data/lib/search_solr_tools/translators/eol_to_solr.rb +0 -84
  63. data/lib/search_solr_tools/translators/gtnp_json.rb +0 -59
@@ -1,35 +0,0 @@
1
- require_relative 'oai'
2
-
3
- module SearchSolrTools
4
- module Harvesters
5
- # Harvests the RDA feed
6
- class Rda < Oai
7
- def initialize(env = 'development', die_on_failure = false)
8
- super
9
- @data_centers = Helpers::SolrFormat::DATA_CENTER_NAMES[:RDA][:long_name]
10
- @translator = Helpers::IsoToSolr.new :rda
11
- end
12
-
13
- def metadata_url
14
- SolrEnvironments[@environment][:rda_url]
15
- end
16
-
17
- # resumption_token must be empty to stop the harvest loop; RDA's feed does not
18
- # provide any resumption token and gets all the records in just one go
19
- def results
20
- @resumption_token = ''
21
- list_records_oai_response = get_results(request_string, '//oai:ListRecords', '')
22
- list_records_oai_response.xpath('.//oai:record', Helpers::IsoNamespaces.namespaces)
23
- end
24
-
25
- private
26
-
27
- def request_params
28
- {
29
- verb: 'ListRecords',
30
- metadataPrefix: 'dif'
31
- }
32
- end
33
- end
34
- end
35
- end
@@ -1,71 +0,0 @@
1
- require_relative 'base'
2
-
3
- module SearchSolrTools
4
- module Harvesters
5
- # Harvests data from TDAR and inserts it into Solr after it has been translated
6
- class Tdar < Base
7
- def initialize(env = 'development', die_on_failure = false)
8
- super env, die_on_failure
9
- @page_size = 100
10
- @translator = Helpers::IsoToSolr.new :tdar
11
- end
12
-
13
- def harvest_and_delete
14
- puts "Running harvest of TDAR catalog from #{tdar_url}"
15
- super(method(:harvest_tdar_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:TDAR][:long_name]}\"")
16
- end
17
-
18
- def harvest_tdar_into_solr
19
- start_record = 0
20
- total_harvested = 0
21
- total_expected = total_results
22
- while (entries = get_results_from_tdar(start_record)) && (entries.length > 0)
23
- begin
24
- insert_solr_docs(get_docs_with_translated_entries_from_tdar(entries))
25
- rescue => e
26
- puts "ERROR: #{e}\n\n"
27
- raise e if @die_on_failure
28
- end
29
-
30
- # if we have all the records we expect, don't attempt another request;
31
- # it would result in an error
32
- total_harvested += entries.length
33
- break if total_harvested >= total_expected
34
-
35
- start_record += @page_size
36
- end
37
- end
38
-
39
- def tdar_url
40
- SolrEnvironments[@environment][:tdar_url]
41
- end
42
-
43
- def get_results_from_tdar(start_record)
44
- get_results(build_request(@page_size, start_record), './/atom:entry', 'application/xml')
45
- end
46
-
47
- def get_docs_with_translated_entries_from_tdar(entries)
48
- entries.map do |entry|
49
- create_new_solr_add_doc_with_child(@translator.translate(entry).root)
50
- end
51
- end
52
-
53
- def build_request(max_records = '25', start_record = '0')
54
- request_url = tdar_url + '?_tDAR.searchType=ACADIS_RSS&'\
55
- 'resourceTypes=DATASET&'\
56
- 'groups[0].latitudeLongitudeBoxes[0].maximumLongitude=180&'\
57
- 'groups[0].latitudeLongitudeBoxes[0].minimumLatitude=45&'\
58
- 'groups[0].latitudeLongitudeBoxes[0].minimumLongitude=-180&'\
59
- 'groups[0].latitudeLongitudeBoxes[0].maximumLatitude=90&'\
60
- 'geoMode=ENVELOPE&'\
61
- 'recordsPerPage=' + max_records.to_s + '&startRecord=' + start_record.to_s
62
-
63
- request_url
64
- end
65
-
66
- def total_results
67
- get_results(build_request(0, 0), './/opensearch:totalResults').text.to_i
68
- end
69
- end
70
- end
71
- end
@@ -1,76 +0,0 @@
1
- require_relative 'base'
2
- require_relative '../helpers/csw_iso_query_builder'
3
-
4
- module SearchSolrTools
5
- module Harvesters
6
- # Harvests data from USGS and inserts it into Solr after it has been translated
7
- class Usgs < Base
8
- def initialize(env = 'development', die_on_failure = false)
9
- super env, die_on_failure
10
- @page_size = 100
11
- @translator = Helpers::IsoToSolr.new :usgs
12
- end
13
-
14
- def harvest_and_delete
15
- puts "Running harvest of USGS catalog from #{usgs_url}"
16
- super(method(:harvest_usgs_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:USGS][:long_name]}\"")
17
- end
18
-
19
- # get translated entries from USGS and add them to Solr
20
- # this is the main entry point for the class
21
- def harvest_usgs_into_solr
22
- start_index = 1
23
- while (entries = get_results_from_usgs(start_index)) && (entries.length > 0)
24
- begin
25
- insert_solr_docs get_docs_with_translated_entries_from_usgs(entries)
26
- rescue => e
27
- puts "ERROR: #{e}"
28
- raise e if @die_on_failure
29
- end
30
- start_index += @page_size
31
- end
32
- end
33
-
34
- def usgs_url
35
- SolrEnvironments[@environment][:usgs_url]
36
- end
37
-
38
- def get_results_from_usgs(start_index)
39
- get_results build_csw_request('results', @page_size, start_index), '//gmd:MD_Metadata', ''
40
- end
41
-
42
- def get_docs_with_translated_entries_from_usgs(entries)
43
- entries.map do |entry|
44
- create_new_solr_add_doc_with_child(@translator.translate(entry).root)
45
- end
46
- end
47
-
48
- def build_csw_request(resultType = 'results', maxRecords = '25', startPosition = '1')
49
- Helpers::CswIsoQueryBuilder.get_query_string(usgs_url,
50
- 'resultType' => resultType,
51
- 'maxRecords' => maxRecords,
52
- 'startPosition' => startPosition,
53
- 'TypeNames' => '',
54
- 'constraint' => bbox_constraint,
55
- 'outputSchema' => 'http://www.isotc211.org/2005/gmd')
56
- end
57
-
58
- def bbox_constraint
59
- bbox = {
60
- west: '-180',
61
- south: '45',
62
- east: '180',
63
- north: '90'
64
- }
65
-
66
- URI.encode '<Filter xmlns:ogc="http://www.opengis.net/ogc" ' \
67
- 'xmlns:gml="http://www.opengis.net/gml" ' \
68
- 'xmlns:apiso="http://www.opengis.net/cat/csw/apiso/1.0">' \
69
- '<ogc:BBOX><PropertyName>apiso:BoundingBox</PropertyName><gml:Envelope>' \
70
- '<gml:lowerCorner>' + bbox[:west] + ' ' + bbox[:south] + '</gml:lowerCorner>' \
71
- '<gml:upperCorner>' + bbox[:east] + ' ' + bbox[:north] + '</gml:upperCorner>' \
72
- '</gml:Envelope></ogc:BBOX></Filter>'
73
- end
74
- end
75
- end
76
- end
@@ -1,29 +0,0 @@
1
- require 'search_solr_tools/helpers/query_builder'
2
-
3
- module SearchSolrTools
4
- module Helpers
5
- # Constructs the string to query a CSW endpoint
6
- class CswIsoQueryBuilder
7
- DEFAULT_PARAMS = {
8
- service: 'CSW',
9
- version: '2.0.2',
10
- request: 'GetRecords',
11
- 'TypeNames' => 'gmd:MD_Metadata',
12
- 'ElementSetName' => 'full',
13
- 'resultType' => 'results',
14
- 'outputFormat' => 'application/xml',
15
- 'maxRecords' => '25',
16
- 'startPosition' => '1'
17
- }
18
-
19
- def self.get_query_string(url, query_params = {})
20
- all_params = query_params(query_params)
21
- QueryBuilder.build(all_params).prepend(url)
22
- end
23
-
24
- def self.query_params(query_params = {})
25
- DEFAULT_PARAMS.merge(query_params)
26
- end
27
- end
28
- end
29
- end
@@ -1,74 +0,0 @@
1
- require_relative 'iso_namespaces'
2
- require_relative 'iso_to_solr_format'
3
- require_relative 'solr_format'
4
-
5
- module SearchSolrTools
6
- module Helpers
7
- class DataOneFormat < IsoToSolrFormat
8
- class << self
9
- def date_range(node)
10
- {
11
- start: SolrFormat.date_str(node.xpath('.//date[@name="beginDate"]').text.strip),
12
- end: SolrFormat.date_str(node.xpath('.//date[@name="endDate"]').text.strip)
13
- }
14
- end
15
-
16
- def bounding_box(node)
17
- {
18
- north: node.xpath('.//float[@name="northBoundCoord"]').text.strip,
19
- south: node.xpath('.//float[@name="southBoundCoord"]').text.strip,
20
- east: node.xpath('.//float[@name="eastBoundCoord"]').text.strip,
21
- west: node.xpath('.//float[@name="westBoundCoord"]').text.strip
22
- }
23
- end
24
-
25
- def spatial_display(node)
26
- box = bounding_box(node)
27
-
28
- [box[:south], box[:west], box[:north], box[:east]].join(' ')
29
- end
30
-
31
- def spatial_index(node)
32
- box = bounding_box(node)
33
-
34
- if box[:west] == box[:east] && box[:south] == box[:north]
35
- [box[:west], box[:south]]
36
- else
37
- [box[:west], box[:south], box[:east], box[:north]]
38
- end.join(' ')
39
- end
40
-
41
- def spatial_area(node)
42
- box = bounding_box(node)
43
-
44
- box[:north].to_f - box[:south].to_f
45
- end
46
-
47
- def temporal_coverage(node)
48
- SolrFormat.temporal_display_str(date_range(node))
49
- end
50
-
51
- def temporal_duration(node)
52
- dr = date_range(node)
53
- end_time = dr[:end].to_s.empty? ? Time.now : Time.parse(dr[:end])
54
- SolrFormat.get_temporal_duration(Time.parse(dr[:start]), end_time) unless dr[:start].to_s.empty?
55
- end
56
-
57
- def temporal_index_string(node)
58
- dr = date_range(node)
59
- SolrFormat.temporal_index_str(dr)
60
- end
61
-
62
- def facet_spatial_scope(node)
63
- box = bounding_box(node)
64
- SolrFormat.get_spatial_scope_facet_with_bounding_box(box)
65
- end
66
-
67
- def facet_temporal_duration(node)
68
- duration = temporal_duration(node)
69
- SolrFormat.get_temporal_duration_facet(duration)
70
- end
71
- end
72
- end
73
- end
74
- end
@@ -1,97 +0,0 @@
1
- require_relative 'selectors'
2
- require 'nokogiri'
3
-
4
- module SearchSolrTools
5
- module Helpers
6
- # Translates ISO nokogiri documents into solr nokogiri documents using a hash driver object.
7
- # This class should be constructed passing the selector file hash as a parameter (see selectors.rb).
8
- # After creating an instance we call translate with a nokogiri iso document as a parameter.
9
- class IsoToSolr
10
- def initialize(selector)
11
- @fields = SELECTORS[selector]
12
- @multiple_whitespace = /\s{2,}/ # save the regex so it is not recompiled every time format_field() is called
13
- end
14
-
15
- # this will return a nodeset with all the elements that matched the xpath
16
- def eval_xpath(iso_xml_doc, xpath, multivalue, reduce)
17
- fields = []
18
- begin
19
- iso_xml_doc.xpath(xpath, IsoNamespaces.namespaces(iso_xml_doc)).each do |f|
20
- fields.push(f)
21
- break if multivalue == false && reduce.nil?
22
- end
23
- rescue
24
- fields = []
25
- end
26
- fields
27
- end
28
-
29
- def get_default_values(selector)
30
- selector.key?(:default_values) ? selector[:default_values] : ['']
31
- end
32
-
33
- def format_text(field)
34
- field.respond_to?(:text) ? field.text : field
35
- end
36
-
37
- def format_field(selector, field)
38
- formatted = selector.key?(:format) ? selector[:format].call(field) : format_text(field) rescue format_text(field)
39
- formatted = strip_invalid_utf8_bytes(formatted)
40
- formatted.strip! if formatted.respond_to?(:strip!)
41
- formatted.gsub!(@multiple_whitespace, ' ') if formatted.respond_to?(:gsub!)
42
- formatted
43
- end
44
-
45
- def format_fields(selector, fields, reduce = nil)
46
- formatted = fields.map { |f| format_field(selector, f) }.flatten
47
- formatted = [reduce.call(formatted)] unless reduce.nil?
48
- selector[:unique] ? formatted.uniq : formatted
49
- end
50
-
51
- def create_solr_fields(iso_xml_doc, selector)
52
- selector[:xpaths].each do |xpath|
53
- fields = eval_xpath(iso_xml_doc, xpath, selector[:multivalue], selector[:reduce])
54
-
55
- # stop evaluating xpaths once we find data in one of them
56
- if fields.size > 0 && fields.any? { |f| strip_invalid_utf8_bytes(f.text).strip.length > 0 }
57
- return format_fields(selector, fields, selector[:reduce])
58
- end
59
- end
60
- format_fields(selector, get_default_values(selector))
61
- end
62
-
63
- def translate(iso_xml_doc)
64
- solr_xml_doc = Nokogiri::XML::Builder.new do |xml|
65
- xml.doc_ do
66
- build_fields(xml, iso_xml_doc)
67
- end
68
- end
69
- solr_xml_doc.doc
70
- end
71
-
72
- def build_fields(xml, iso_xml_doc)
73
- @fields.each do |field_name, selector|
74
- create_solr_fields(iso_xml_doc, selector).each do |value|
75
- if value.is_a? Array
76
- value.each do |v|
77
- xml.field_({ name: field_name }, v) unless v.nil? || v.eql?('')
78
- end
79
- else
80
- xml.field_({ name: field_name }, value) unless value.nil? || value.eql?('')
81
- end
82
- end
83
- end
84
- end
85
-
86
- def strip_invalid_utf8_bytes(text)
87
- if text.respond_to?(:encode) && !text.valid_encoding?
88
- text.encode!('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
89
- end
90
-
91
- text.delete!("\u00BF") if text.respond_to?(:delete!)
92
-
93
- text
94
- end
95
- end
96
- end
97
- end
@@ -1,197 +0,0 @@
1
- require 'date'
2
-
3
- require_relative 'iso_namespaces'
4
- require_relative 'solr_format'
5
-
6
- module SearchSolrTools
7
- module Helpers
8
- # Methods for generating formatted strings from ISO xml nodes that can be indexed by SOLR
9
- # rubocop:disable ClassLength
10
- class IsoToSolrFormat
11
- KEYWORDS = proc { |keywords| build_keyword_list keywords }
12
-
13
- SPATIAL_DISPLAY = proc { |node| IsoToSolrFormat.spatial_display_str(node) }
14
- SPATIAL_INDEX = proc { |node| IsoToSolrFormat.spatial_index_str(node) }
15
- SPATIAL_AREA = proc { |node| IsoToSolrFormat.spatial_area_str(node) }
16
- MAX_SPATIAL_AREA = proc { |values| IsoToSolrFormat.get_max_spatial_area(values) }
17
-
18
- FACET_SPONSORED_PROGRAM = proc { |node| IsoToSolrFormat.sponsored_program_facet node }
19
- FACET_SPATIAL_COVERAGE = proc { |node| IsoToSolrFormat.get_spatial_facet(node) }
20
- FACET_SPATIAL_SCOPE = proc { |node| IsoToSolrFormat.get_spatial_scope_facet(node) }
21
- FACET_TEMPORAL_DURATION = proc { |node| IsoToSolrFormat.get_temporal_duration_facet(node) }
22
-
23
- TEMPORAL_DURATION = proc { |node| IsoToSolrFormat.get_temporal_duration(node) }
24
- TEMPORAL_INDEX_STRING = proc { |node| IsoToSolrFormat.temporal_index_str node }
25
- TEMPORAL_DISPLAY_STRING = proc { |node| IsoToSolrFormat.temporal_display_str node }
26
- TEMPORAL_DISPLAY_STRING_FORMATTED = proc { |node| IsoToSolrFormat.temporal_display_str(node, true) }
27
-
28
- DATASET_URL = proc { |node| IsoToSolrFormat.dataset_url(node) }
29
- ICES_DATASET_URL = proc { |node| IsoToSolrFormat.ices_dataset_url(node) }
30
- EOL_AUTHOR_FORMAT = proc { |node| IsoToSolrFormat.eol_author_format(node) }
31
-
32
- def self.spatial_display_str(box_node)
33
- box = bounding_box(box_node)
34
- "#{box[:south]} #{box[:west]} #{box[:north]} #{box[:east]}"
35
- end
36
-
37
- def self.spatial_index_str(box_node)
38
- box = bounding_box(box_node)
39
- if box[:west] == box[:east] && box[:south] == box[:north]
40
- [box[:west], box[:south]]
41
- else
42
- [box[:west], box[:south], box[:east], box[:north]]
43
- end.join(' ')
44
- end
45
-
46
- def self.spatial_area_str(box_node)
47
- box = bounding_box(box_node)
48
- area = box[:north].to_f - box[:south].to_f
49
- area
50
- end
51
-
52
- def self.get_max_spatial_area(values)
53
- values.map(&:to_f).max
54
- end
55
-
56
- def self.get_spatial_facet(box_node)
57
- box = bounding_box(box_node)
58
-
59
- if BoundingBoxUtil.box_invalid?(box)
60
- facet = nil
61
- elsif BoundingBoxUtil.box_global?(box)
62
- facet = 'Global'
63
- else
64
- facet = 'Non Global'
65
- end
66
- facet
67
- end
68
-
69
- def self.get_spatial_scope_facet(box_node)
70
- box = bounding_box(box_node)
71
- SolrFormat.get_spatial_scope_facet_with_bounding_box(box)
72
- end
73
-
74
- def self.temporal_display_str(temporal_node, formatted = false)
75
- SolrFormat.temporal_display_str(date_range(temporal_node, formatted))
76
- end
77
-
78
- def self.get_temporal_duration(temporal_node)
79
- dr = date_range(temporal_node)
80
- end_time = dr[:end].to_s.empty? ? Time.now : Time.parse(dr[:end])
81
- SolrFormat.get_temporal_duration(Time.parse(dr[:start]), end_time) unless dr[:start].to_s.empty?
82
- end
83
-
84
- def self.get_temporal_duration_facet(temporal_node)
85
- duration = get_temporal_duration(temporal_node)
86
- SolrFormat.get_temporal_duration_facet(duration)
87
- end
88
-
89
- def self.temporal_index_str(temporal_node)
90
- dr = date_range(temporal_node)
91
- SolrFormat.temporal_index_str(dr)
92
- end
93
-
94
- def self.sponsored_program_facet(node)
95
- long_name = node.xpath('.//gmd:organisationName', IsoNamespaces.namespaces(node)).text.strip
96
- short_name = node.xpath('.//gmd:organisationShortName', IsoNamespaces.namespaces(node)).text.strip
97
-
98
- [long_name, short_name].join(' | ')
99
- end
100
-
101
- def self.build_keyword_list(keywords)
102
- category = keywords.xpath('.//CategoryKeyword').text
103
- topic = keywords.xpath('.//TopicKeyword').text
104
- term = keywords.xpath('.//TermKeyword').text
105
- category << ' > ' << topic << ' > ' << term
106
- end
107
-
108
- def self.date_range(temporal_node, formatted = false)
109
- start_date = get_first_matching_child(
110
- temporal_node,
111
- ['.//gml:beginPosition', './/BeginningDateTime', './/gco:Date', './/dif:Start_Date']
112
- )
113
- start_date = '' unless SolrFormat.date?(start_date)
114
- start_date = SolrFormat.date_str(start_date) if formatted
115
-
116
- end_date = get_first_matching_child(
117
- temporal_node,
118
- ['.//gml:endPosition', './/EndingDateTime', './/gco:Date', './/dif:Stop_Date']
119
- )
120
- end_date = '' unless SolrFormat.date?(end_date)
121
- end_date = SolrFormat.date_str(end_date) if formatted
122
-
123
- {
124
- start: start_date,
125
- end: end_date
126
- }
127
- end
128
-
129
- # Met.no sometimes has bad metadata, such as <gmd:URL>SU-1 (planned activity)</gmd:URL>
130
- def self.dataset_url(url_node)
131
- url_node.text.strip =~ %r{http[s]?://} ? url_node.text.strip : ''
132
- end
133
-
134
- def self.ices_dataset_url(auth_id)
135
- 'http://geo.ices.dk/geonetwork/srv/en/main.home?uuid=' + auth_id
136
- end
137
-
138
- def self.get_first_matching_child(node, paths)
139
- matching_nodes = node.at_xpath(paths.join(' | '), IsoNamespaces.namespaces(node))
140
- matching_nodes.nil? ? '' : matching_nodes.text
141
- end
142
-
143
- def self.bounding_box(box_node)
144
- {
145
- west: get_bound(box_node, :west),
146
- south: get_bound(box_node, :south),
147
- east: get_bound(box_node, :east),
148
- north: get_bound(box_node, :north)
149
- }
150
- end
151
-
152
- def self.axis_label(direction)
153
- {
154
- north: 'Latitude',
155
- south: 'Latitude',
156
- east: 'Longitude',
157
- west: 'Longitude'
158
- }[direction]
159
- end
160
-
161
- def self.coordinate_boundary(lat_lon)
162
- {
163
- 'Latitude' => 90,
164
- 'Longitude' => 180
165
- }[lat_lon]
166
- end
167
-
168
- def self.node_values(box_node, direction, lat_lon)
169
- get_first_matching_child(
170
- box_node,
171
- [
172
- "./gmd:#{direction.to_s.downcase}Bounding#{lat_lon}/gco:Decimal",
173
- "./gmd:#{direction.to_s.downcase}Bound#{lat_lon}/gco:Decimal",
174
- "./#{direction.to_s.capitalize}BoundingCoordinate",
175
- "./dif:#{direction.to_s.capitalize}ernmost_#{lat_lon}"
176
- ]
177
- ).split(' ')
178
- end
179
-
180
- def self.get_bound(box_node, direction)
181
- lat_lon = axis_label(direction)
182
-
183
- vals = node_values(box_node, direction, lat_lon)
184
- val = vals.first
185
-
186
- boundary = coordinate_boundary(lat_lon)
187
- out_of_bounds = boundary < val.to_f.abs
188
-
189
- return '' if vals.empty? || out_of_bounds
190
-
191
- val = -val.to_f if %w(West South).include?(vals.last)
192
-
193
- val.to_f.to_s
194
- end
195
- end
196
- end
197
- end
@@ -1,61 +0,0 @@
1
- require 'date'
2
-
3
- require_relative 'iso_namespaces'
4
- require_relative 'solr_format'
5
- require_relative 'iso_to_solr_format'
6
-
7
- module SearchSolrTools
8
- module Helpers
9
- class NcdcPaleoFormat < IsoToSolrFormat
10
- def self.bounding_box(node)
11
- east, north = node.xpath('./ows:UpperCorner').text.split
12
- west, south = node.xpath('./ows:LowerCorner').text.split
13
- { north: north, south: south, east: east, west: west }
14
- end
15
-
16
- def self.date_range(node, _formatted = false)
17
- if node.text.include?('START YEAR')
18
- if node.text.include?('AD')
19
- format_ad_time(node.text)
20
- elsif node.text.include?('yr BP')
21
- format_cal_yr_bp_time(node.text)
22
- end
23
- end
24
- end
25
-
26
- def self.format_ad_time(node_text)
27
- match = node_text.match(/START YEAR:(?<start>[^*]*)AD\s*\* END YEAR:(?<end>[^*]*)AD/)
28
- {
29
- start: DateTime.strptime(match[:start].strip, '%Y'),
30
- end: DateTime.strptime(match[:end].strip, '%Y')
31
- }
32
- end
33
-
34
- def self.format_cal_yr_bp_time(node_text)
35
- zero_year = 1950
36
- match = node_text.match(/START YEAR:(?<start>[^*]*)... yr BP\s*\* END YEAR:(?<end>[^*]*)... yr BP/)
37
- {
38
- start: DateTime.strptime((-(match[:start].strip.to_i) - zero_year).to_s, '%Y'),
39
- end: DateTime.strptime((-(match[:end].strip.to_i) - zero_year).to_s, '%Y')
40
- }
41
- end
42
-
43
- def self.temporal_index_str(node)
44
- range = date_range(node)
45
- SolrFormat.temporal_index_str(start: range[:start].to_s, end: range[:end].to_s) unless range.nil?
46
- end
47
-
48
- def self.get_temporal_duration(node)
49
- range = date_range(node)
50
- return if range.to_s.empty?
51
- (range[:start] - range[:end]).to_i.abs
52
- end
53
-
54
- def self.author(node)
55
- return node if node == ''
56
- return if node.text.include? ';'
57
- node.text
58
- end
59
- end
60
- end
61
- end
@@ -1,13 +0,0 @@
1
- module SearchSolrTools
2
- module Helpers
3
- # Class to build a query string based on a hash of params
4
- class QueryBuilder
5
- class << self
6
- def build(params)
7
- param_str = params.map { |k, v| "#{k}=#{v}" }.join('&')
8
- "?#{param_str}"
9
- end
10
- end
11
- end
12
- end
13
- end
@@ -1,25 +0,0 @@
1
- require_relative 'iso_namespaces'
2
- require_relative 'iso_to_solr_format'
3
- require_relative 'solr_format'
4
-
5
- module SearchSolrTools
6
- module Helpers
7
- class R2RFormat < IsoToSolrFormat
8
- TEMPORAL_INDEX_STRING = proc { |node| R2RFormat.temporal_index_str(node) }
9
- TEMPORAL_DISPLAY_STRING = proc { |node| R2RFormat.temporal_display_str(node) }
10
- TEMPORAL_DURATION = proc { |node| R2RFormat.get_temporal_duration(node) }
11
- FACET_TEMPORAL_DURATION = proc { |node| R2RFormat.get_temporal_duration_facet(node) }
12
-
13
- def self.date_range(temporal_node, _formatted = false)
14
- xpath_start = './/gmd:temporalElement/gmd:EX_SpatialTemporalExtent/gmd:extent/'\
15
- 'gml:TimeInstant[@gml:id="start"]/gml:timePosition'
16
- xpath_end = xpath_start.gsub('start', 'end')
17
-
18
- {
19
- start: temporal_node.xpath(xpath_start).text,
20
- end: temporal_node.xpath(xpath_end).text
21
- }
22
- end
23
- end
24
- end
25
- end