search_solr_tools 6.1.0 → 6.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +11 -2
  3. data/bin/search_solr_tools +5 -17
  4. data/lib/search_solr_tools/config/environments.rb +3 -1
  5. data/lib/search_solr_tools/config/environments.yaml +0 -32
  6. data/lib/search_solr_tools/errors/harvest_error.rb +44 -31
  7. data/lib/search_solr_tools/harvesters/auto_suggest.rb +5 -3
  8. data/lib/search_solr_tools/harvesters/base.rb +21 -20
  9. data/lib/search_solr_tools/harvesters/nsidc_auto_suggest.rb +7 -5
  10. data/lib/search_solr_tools/harvesters/nsidc_json.rb +9 -8
  11. data/lib/search_solr_tools/helpers/bounding_box_util.rb +8 -8
  12. data/lib/search_solr_tools/helpers/facet_configuration.rb +3 -1
  13. data/lib/search_solr_tools/helpers/harvest_status.rb +10 -8
  14. data/lib/search_solr_tools/helpers/iso_namespaces.rb +3 -1
  15. data/lib/search_solr_tools/helpers/solr_format.rb +25 -45
  16. data/lib/search_solr_tools/helpers/translate_spatial_coverage.rb +13 -10
  17. data/lib/search_solr_tools/helpers/translate_temporal_coverage.rb +2 -0
  18. data/lib/search_solr_tools/translators/nsidc_json.rb +48 -44
  19. data/lib/search_solr_tools/version.rb +3 -1
  20. data/lib/search_solr_tools.rb +3 -2
  21. metadata +3 -45
  22. data/lib/search_solr_tools/harvesters/adc.rb +0 -49
  23. data/lib/search_solr_tools/harvesters/ade_auto_suggest.rb +0 -46
  24. data/lib/search_solr_tools/harvesters/bcodmo.rb +0 -64
  25. data/lib/search_solr_tools/harvesters/data_one.rb +0 -49
  26. data/lib/search_solr_tools/harvesters/echo.rb +0 -52
  27. data/lib/search_solr_tools/harvesters/eol.rb +0 -51
  28. data/lib/search_solr_tools/harvesters/gtnp.rb +0 -67
  29. data/lib/search_solr_tools/harvesters/ices.rb +0 -58
  30. data/lib/search_solr_tools/harvesters/ncdc_paleo.rb +0 -62
  31. data/lib/search_solr_tools/harvesters/nmi.rb +0 -34
  32. data/lib/search_solr_tools/harvesters/nodc.rb +0 -75
  33. data/lib/search_solr_tools/harvesters/oai.rb +0 -62
  34. data/lib/search_solr_tools/harvesters/pdc.rb +0 -40
  35. data/lib/search_solr_tools/harvesters/r2r.rb +0 -61
  36. data/lib/search_solr_tools/harvesters/rda.rb +0 -35
  37. data/lib/search_solr_tools/harvesters/tdar.rb +0 -71
  38. data/lib/search_solr_tools/harvesters/usgs.rb +0 -76
  39. data/lib/search_solr_tools/helpers/csw_iso_query_builder.rb +0 -29
  40. data/lib/search_solr_tools/helpers/data_one_format.rb +0 -74
  41. data/lib/search_solr_tools/helpers/iso_to_solr.rb +0 -97
  42. data/lib/search_solr_tools/helpers/iso_to_solr_format.rb +0 -197
  43. data/lib/search_solr_tools/helpers/ncdc_paleo_format.rb +0 -61
  44. data/lib/search_solr_tools/helpers/query_builder.rb +0 -13
  45. data/lib/search_solr_tools/helpers/r2r_format.rb +0 -25
  46. data/lib/search_solr_tools/helpers/selectors.rb +0 -22
  47. data/lib/search_solr_tools/helpers/tdar_format.rb +0 -70
  48. data/lib/search_solr_tools/helpers/usgs_format.rb +0 -50
  49. data/lib/search_solr_tools/selectors/adc.rb +0 -96
  50. data/lib/search_solr_tools/selectors/data_one.rb +0 -96
  51. data/lib/search_solr_tools/selectors/echo_iso.rb +0 -112
  52. data/lib/search_solr_tools/selectors/ices_iso.rb +0 -108
  53. data/lib/search_solr_tools/selectors/ncdc_paleo.rb +0 -90
  54. data/lib/search_solr_tools/selectors/nmi.rb +0 -107
  55. data/lib/search_solr_tools/selectors/nodc_iso.rb +0 -108
  56. data/lib/search_solr_tools/selectors/pdc_iso.rb +0 -109
  57. data/lib/search_solr_tools/selectors/r2r.rb +0 -115
  58. data/lib/search_solr_tools/selectors/rda.rb +0 -107
  59. data/lib/search_solr_tools/selectors/tdar_opensearch.rb +0 -91
  60. data/lib/search_solr_tools/selectors/usgs_iso.rb +0 -107
  61. data/lib/search_solr_tools/translators/bcodmo_json.rb +0 -89
  62. data/lib/search_solr_tools/translators/eol_to_solr.rb +0 -84
  63. data/lib/search_solr_tools/translators/gtnp_json.rb +0 -59
@@ -1,35 +0,0 @@
1
- require_relative 'oai'
2
-
3
- module SearchSolrTools
4
- module Harvesters
5
- # Harvests the RDA feed
6
- class Rda < Oai
7
- def initialize(env = 'development', die_on_failure = false)
8
- super
9
- @data_centers = Helpers::SolrFormat::DATA_CENTER_NAMES[:RDA][:long_name]
10
- @translator = Helpers::IsoToSolr.new :rda
11
- end
12
-
13
- def metadata_url
14
- SolrEnvironments[@environment][:rda_url]
15
- end
16
-
17
- # resumption_token must be empty to stop the harvest loop; RDA's feed does not
18
- # provide any resumption token and gets all the records in just one go
19
- def results
20
- @resumption_token = ''
21
- list_records_oai_response = get_results(request_string, '//oai:ListRecords', '')
22
- list_records_oai_response.xpath('.//oai:record', Helpers::IsoNamespaces.namespaces)
23
- end
24
-
25
- private
26
-
27
- def request_params
28
- {
29
- verb: 'ListRecords',
30
- metadataPrefix: 'dif'
31
- }
32
- end
33
- end
34
- end
35
- end
@@ -1,71 +0,0 @@
1
- require_relative 'base'
2
-
3
- module SearchSolrTools
4
- module Harvesters
5
- # Harvests data from TDAR and inserts it into Solr after it has been translated
6
- class Tdar < Base
7
- def initialize(env = 'development', die_on_failure = false)
8
- super env, die_on_failure
9
- @page_size = 100
10
- @translator = Helpers::IsoToSolr.new :tdar
11
- end
12
-
13
- def harvest_and_delete
14
- puts "Running harvest of TDAR catalog from #{tdar_url}"
15
- super(method(:harvest_tdar_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:TDAR][:long_name]}\"")
16
- end
17
-
18
- def harvest_tdar_into_solr
19
- start_record = 0
20
- total_harvested = 0
21
- total_expected = total_results
22
- while (entries = get_results_from_tdar(start_record)) && (entries.length > 0)
23
- begin
24
- insert_solr_docs(get_docs_with_translated_entries_from_tdar(entries))
25
- rescue => e
26
- puts "ERROR: #{e}\n\n"
27
- raise e if @die_on_failure
28
- end
29
-
30
- # if we have all the records we expect, don't attempt another request;
31
- # it would result in an error
32
- total_harvested += entries.length
33
- break if total_harvested >= total_expected
34
-
35
- start_record += @page_size
36
- end
37
- end
38
-
39
- def tdar_url
40
- SolrEnvironments[@environment][:tdar_url]
41
- end
42
-
43
- def get_results_from_tdar(start_record)
44
- get_results(build_request(@page_size, start_record), './/atom:entry', 'application/xml')
45
- end
46
-
47
- def get_docs_with_translated_entries_from_tdar(entries)
48
- entries.map do |entry|
49
- create_new_solr_add_doc_with_child(@translator.translate(entry).root)
50
- end
51
- end
52
-
53
- def build_request(max_records = '25', start_record = '0')
54
- request_url = tdar_url + '?_tDAR.searchType=ACADIS_RSS&'\
55
- 'resourceTypes=DATASET&'\
56
- 'groups[0].latitudeLongitudeBoxes[0].maximumLongitude=180&'\
57
- 'groups[0].latitudeLongitudeBoxes[0].minimumLatitude=45&'\
58
- 'groups[0].latitudeLongitudeBoxes[0].minimumLongitude=-180&'\
59
- 'groups[0].latitudeLongitudeBoxes[0].maximumLatitude=90&'\
60
- 'geoMode=ENVELOPE&'\
61
- 'recordsPerPage=' + max_records.to_s + '&startRecord=' + start_record.to_s
62
-
63
- request_url
64
- end
65
-
66
- def total_results
67
- get_results(build_request(0, 0), './/opensearch:totalResults').text.to_i
68
- end
69
- end
70
- end
71
- end
@@ -1,76 +0,0 @@
1
- require_relative 'base'
2
- require_relative '../helpers/csw_iso_query_builder'
3
-
4
- module SearchSolrTools
5
- module Harvesters
6
- # Harvests data from USGS and inserts it into Solr after it has been translated
7
- class Usgs < Base
8
- def initialize(env = 'development', die_on_failure = false)
9
- super env, die_on_failure
10
- @page_size = 100
11
- @translator = Helpers::IsoToSolr.new :usgs
12
- end
13
-
14
- def harvest_and_delete
15
- puts "Running harvest of USGS catalog from #{usgs_url}"
16
- super(method(:harvest_usgs_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:USGS][:long_name]}\"")
17
- end
18
-
19
- # get translated entries from USGS and add them to Solr
20
- # this is the main entry point for the class
21
- def harvest_usgs_into_solr
22
- start_index = 1
23
- while (entries = get_results_from_usgs(start_index)) && (entries.length > 0)
24
- begin
25
- insert_solr_docs get_docs_with_translated_entries_from_usgs(entries)
26
- rescue => e
27
- puts "ERROR: #{e}"
28
- raise e if @die_on_failure
29
- end
30
- start_index += @page_size
31
- end
32
- end
33
-
34
- def usgs_url
35
- SolrEnvironments[@environment][:usgs_url]
36
- end
37
-
38
- def get_results_from_usgs(start_index)
39
- get_results build_csw_request('results', @page_size, start_index), '//gmd:MD_Metadata', ''
40
- end
41
-
42
- def get_docs_with_translated_entries_from_usgs(entries)
43
- entries.map do |entry|
44
- create_new_solr_add_doc_with_child(@translator.translate(entry).root)
45
- end
46
- end
47
-
48
- def build_csw_request(resultType = 'results', maxRecords = '25', startPosition = '1')
49
- Helpers::CswIsoQueryBuilder.get_query_string(usgs_url,
50
- 'resultType' => resultType,
51
- 'maxRecords' => maxRecords,
52
- 'startPosition' => startPosition,
53
- 'TypeNames' => '',
54
- 'constraint' => bbox_constraint,
55
- 'outputSchema' => 'http://www.isotc211.org/2005/gmd')
56
- end
57
-
58
- def bbox_constraint
59
- bbox = {
60
- west: '-180',
61
- south: '45',
62
- east: '180',
63
- north: '90'
64
- }
65
-
66
- URI.encode '<Filter xmlns:ogc="http://www.opengis.net/ogc" ' \
67
- 'xmlns:gml="http://www.opengis.net/gml" ' \
68
- 'xmlns:apiso="http://www.opengis.net/cat/csw/apiso/1.0">' \
69
- '<ogc:BBOX><PropertyName>apiso:BoundingBox</PropertyName><gml:Envelope>' \
70
- '<gml:lowerCorner>' + bbox[:west] + ' ' + bbox[:south] + '</gml:lowerCorner>' \
71
- '<gml:upperCorner>' + bbox[:east] + ' ' + bbox[:north] + '</gml:upperCorner>' \
72
- '</gml:Envelope></ogc:BBOX></Filter>'
73
- end
74
- end
75
- end
76
- end
@@ -1,29 +0,0 @@
1
- require 'search_solr_tools/helpers/query_builder'
2
-
3
- module SearchSolrTools
4
- module Helpers
5
- # Constructs the string to query a CSW endpoint
6
- class CswIsoQueryBuilder
7
- DEFAULT_PARAMS = {
8
- service: 'CSW',
9
- version: '2.0.2',
10
- request: 'GetRecords',
11
- 'TypeNames' => 'gmd:MD_Metadata',
12
- 'ElementSetName' => 'full',
13
- 'resultType' => 'results',
14
- 'outputFormat' => 'application/xml',
15
- 'maxRecords' => '25',
16
- 'startPosition' => '1'
17
- }
18
-
19
- def self.get_query_string(url, query_params = {})
20
- all_params = query_params(query_params)
21
- QueryBuilder.build(all_params).prepend(url)
22
- end
23
-
24
- def self.query_params(query_params = {})
25
- DEFAULT_PARAMS.merge(query_params)
26
- end
27
- end
28
- end
29
- end
@@ -1,74 +0,0 @@
1
- require_relative 'iso_namespaces'
2
- require_relative 'iso_to_solr_format'
3
- require_relative 'solr_format'
4
-
5
- module SearchSolrTools
6
- module Helpers
7
- class DataOneFormat < IsoToSolrFormat
8
- class << self
9
- def date_range(node)
10
- {
11
- start: SolrFormat.date_str(node.xpath('.//date[@name="beginDate"]').text.strip),
12
- end: SolrFormat.date_str(node.xpath('.//date[@name="endDate"]').text.strip)
13
- }
14
- end
15
-
16
- def bounding_box(node)
17
- {
18
- north: node.xpath('.//float[@name="northBoundCoord"]').text.strip,
19
- south: node.xpath('.//float[@name="southBoundCoord"]').text.strip,
20
- east: node.xpath('.//float[@name="eastBoundCoord"]').text.strip,
21
- west: node.xpath('.//float[@name="westBoundCoord"]').text.strip
22
- }
23
- end
24
-
25
- def spatial_display(node)
26
- box = bounding_box(node)
27
-
28
- [box[:south], box[:west], box[:north], box[:east]].join(' ')
29
- end
30
-
31
- def spatial_index(node)
32
- box = bounding_box(node)
33
-
34
- if box[:west] == box[:east] && box[:south] == box[:north]
35
- [box[:west], box[:south]]
36
- else
37
- [box[:west], box[:south], box[:east], box[:north]]
38
- end.join(' ')
39
- end
40
-
41
- def spatial_area(node)
42
- box = bounding_box(node)
43
-
44
- box[:north].to_f - box[:south].to_f
45
- end
46
-
47
- def temporal_coverage(node)
48
- SolrFormat.temporal_display_str(date_range(node))
49
- end
50
-
51
- def temporal_duration(node)
52
- dr = date_range(node)
53
- end_time = dr[:end].to_s.empty? ? Time.now : Time.parse(dr[:end])
54
- SolrFormat.get_temporal_duration(Time.parse(dr[:start]), end_time) unless dr[:start].to_s.empty?
55
- end
56
-
57
- def temporal_index_string(node)
58
- dr = date_range(node)
59
- SolrFormat.temporal_index_str(dr)
60
- end
61
-
62
- def facet_spatial_scope(node)
63
- box = bounding_box(node)
64
- SolrFormat.get_spatial_scope_facet_with_bounding_box(box)
65
- end
66
-
67
- def facet_temporal_duration(node)
68
- duration = temporal_duration(node)
69
- SolrFormat.get_temporal_duration_facet(duration)
70
- end
71
- end
72
- end
73
- end
74
- end
@@ -1,97 +0,0 @@
1
- require_relative 'selectors'
2
- require 'nokogiri'
3
-
4
- module SearchSolrTools
5
- module Helpers
6
- # Translates ISO nokogiri documents into solr nokogiri documents using a hash driver object.
7
- # This class should be constructed passing the selector file hash as a parameter (see selectors.rb).
8
- # After creating an instance we call translate with a nokogiri iso document as a parameter.
9
- class IsoToSolr
10
- def initialize(selector)
11
- @fields = SELECTORS[selector]
12
- @multiple_whitespace = /\s{2,}/ # save the regex so it is not recompiled every time format_field() is called
13
- end
14
-
15
- # this will return a nodeset with all the elements that matched the xpath
16
- def eval_xpath(iso_xml_doc, xpath, multivalue, reduce)
17
- fields = []
18
- begin
19
- iso_xml_doc.xpath(xpath, IsoNamespaces.namespaces(iso_xml_doc)).each do |f|
20
- fields.push(f)
21
- break if multivalue == false && reduce.nil?
22
- end
23
- rescue
24
- fields = []
25
- end
26
- fields
27
- end
28
-
29
- def get_default_values(selector)
30
- selector.key?(:default_values) ? selector[:default_values] : ['']
31
- end
32
-
33
- def format_text(field)
34
- field.respond_to?(:text) ? field.text : field
35
- end
36
-
37
- def format_field(selector, field)
38
- formatted = selector.key?(:format) ? selector[:format].call(field) : format_text(field) rescue format_text(field)
39
- formatted = strip_invalid_utf8_bytes(formatted)
40
- formatted.strip! if formatted.respond_to?(:strip!)
41
- formatted.gsub!(@multiple_whitespace, ' ') if formatted.respond_to?(:gsub!)
42
- formatted
43
- end
44
-
45
- def format_fields(selector, fields, reduce = nil)
46
- formatted = fields.map { |f| format_field(selector, f) }.flatten
47
- formatted = [reduce.call(formatted)] unless reduce.nil?
48
- selector[:unique] ? formatted.uniq : formatted
49
- end
50
-
51
- def create_solr_fields(iso_xml_doc, selector)
52
- selector[:xpaths].each do |xpath|
53
- fields = eval_xpath(iso_xml_doc, xpath, selector[:multivalue], selector[:reduce])
54
-
55
- # stop evaluating xpaths once we find data in one of them
56
- if fields.size > 0 && fields.any? { |f| strip_invalid_utf8_bytes(f.text).strip.length > 0 }
57
- return format_fields(selector, fields, selector[:reduce])
58
- end
59
- end
60
- format_fields(selector, get_default_values(selector))
61
- end
62
-
63
- def translate(iso_xml_doc)
64
- solr_xml_doc = Nokogiri::XML::Builder.new do |xml|
65
- xml.doc_ do
66
- build_fields(xml, iso_xml_doc)
67
- end
68
- end
69
- solr_xml_doc.doc
70
- end
71
-
72
- def build_fields(xml, iso_xml_doc)
73
- @fields.each do |field_name, selector|
74
- create_solr_fields(iso_xml_doc, selector).each do |value|
75
- if value.is_a? Array
76
- value.each do |v|
77
- xml.field_({ name: field_name }, v) unless v.nil? || v.eql?('')
78
- end
79
- else
80
- xml.field_({ name: field_name }, value) unless value.nil? || value.eql?('')
81
- end
82
- end
83
- end
84
- end
85
-
86
- def strip_invalid_utf8_bytes(text)
87
- if text.respond_to?(:encode) && !text.valid_encoding?
88
- text.encode!('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
89
- end
90
-
91
- text.delete!("\u00BF") if text.respond_to?(:delete!)
92
-
93
- text
94
- end
95
- end
96
- end
97
- end
@@ -1,197 +0,0 @@
1
- require 'date'
2
-
3
- require_relative 'iso_namespaces'
4
- require_relative 'solr_format'
5
-
6
- module SearchSolrTools
7
- module Helpers
8
- # Methods for generating formatted strings from ISO xml nodes that can be indexed by SOLR
9
- # rubocop:disable ClassLength
10
- class IsoToSolrFormat
11
- KEYWORDS = proc { |keywords| build_keyword_list keywords }
12
-
13
- SPATIAL_DISPLAY = proc { |node| IsoToSolrFormat.spatial_display_str(node) }
14
- SPATIAL_INDEX = proc { |node| IsoToSolrFormat.spatial_index_str(node) }
15
- SPATIAL_AREA = proc { |node| IsoToSolrFormat.spatial_area_str(node) }
16
- MAX_SPATIAL_AREA = proc { |values| IsoToSolrFormat.get_max_spatial_area(values) }
17
-
18
- FACET_SPONSORED_PROGRAM = proc { |node| IsoToSolrFormat.sponsored_program_facet node }
19
- FACET_SPATIAL_COVERAGE = proc { |node| IsoToSolrFormat.get_spatial_facet(node) }
20
- FACET_SPATIAL_SCOPE = proc { |node| IsoToSolrFormat.get_spatial_scope_facet(node) }
21
- FACET_TEMPORAL_DURATION = proc { |node| IsoToSolrFormat.get_temporal_duration_facet(node) }
22
-
23
- TEMPORAL_DURATION = proc { |node| IsoToSolrFormat.get_temporal_duration(node) }
24
- TEMPORAL_INDEX_STRING = proc { |node| IsoToSolrFormat.temporal_index_str node }
25
- TEMPORAL_DISPLAY_STRING = proc { |node| IsoToSolrFormat.temporal_display_str node }
26
- TEMPORAL_DISPLAY_STRING_FORMATTED = proc { |node| IsoToSolrFormat.temporal_display_str(node, true) }
27
-
28
- DATASET_URL = proc { |node| IsoToSolrFormat.dataset_url(node) }
29
- ICES_DATASET_URL = proc { |node| IsoToSolrFormat.ices_dataset_url(node) }
30
- EOL_AUTHOR_FORMAT = proc { |node| IsoToSolrFormat.eol_author_format(node) }
31
-
32
- def self.spatial_display_str(box_node)
33
- box = bounding_box(box_node)
34
- "#{box[:south]} #{box[:west]} #{box[:north]} #{box[:east]}"
35
- end
36
-
37
- def self.spatial_index_str(box_node)
38
- box = bounding_box(box_node)
39
- if box[:west] == box[:east] && box[:south] == box[:north]
40
- [box[:west], box[:south]]
41
- else
42
- [box[:west], box[:south], box[:east], box[:north]]
43
- end.join(' ')
44
- end
45
-
46
- def self.spatial_area_str(box_node)
47
- box = bounding_box(box_node)
48
- area = box[:north].to_f - box[:south].to_f
49
- area
50
- end
51
-
52
- def self.get_max_spatial_area(values)
53
- values.map(&:to_f).max
54
- end
55
-
56
- def self.get_spatial_facet(box_node)
57
- box = bounding_box(box_node)
58
-
59
- if BoundingBoxUtil.box_invalid?(box)
60
- facet = nil
61
- elsif BoundingBoxUtil.box_global?(box)
62
- facet = 'Global'
63
- else
64
- facet = 'Non Global'
65
- end
66
- facet
67
- end
68
-
69
- def self.get_spatial_scope_facet(box_node)
70
- box = bounding_box(box_node)
71
- SolrFormat.get_spatial_scope_facet_with_bounding_box(box)
72
- end
73
-
74
- def self.temporal_display_str(temporal_node, formatted = false)
75
- SolrFormat.temporal_display_str(date_range(temporal_node, formatted))
76
- end
77
-
78
- def self.get_temporal_duration(temporal_node)
79
- dr = date_range(temporal_node)
80
- end_time = dr[:end].to_s.empty? ? Time.now : Time.parse(dr[:end])
81
- SolrFormat.get_temporal_duration(Time.parse(dr[:start]), end_time) unless dr[:start].to_s.empty?
82
- end
83
-
84
- def self.get_temporal_duration_facet(temporal_node)
85
- duration = get_temporal_duration(temporal_node)
86
- SolrFormat.get_temporal_duration_facet(duration)
87
- end
88
-
89
- def self.temporal_index_str(temporal_node)
90
- dr = date_range(temporal_node)
91
- SolrFormat.temporal_index_str(dr)
92
- end
93
-
94
- def self.sponsored_program_facet(node)
95
- long_name = node.xpath('.//gmd:organisationName', IsoNamespaces.namespaces(node)).text.strip
96
- short_name = node.xpath('.//gmd:organisationShortName', IsoNamespaces.namespaces(node)).text.strip
97
-
98
- [long_name, short_name].join(' | ')
99
- end
100
-
101
- def self.build_keyword_list(keywords)
102
- category = keywords.xpath('.//CategoryKeyword').text
103
- topic = keywords.xpath('.//TopicKeyword').text
104
- term = keywords.xpath('.//TermKeyword').text
105
- category << ' > ' << topic << ' > ' << term
106
- end
107
-
108
- def self.date_range(temporal_node, formatted = false)
109
- start_date = get_first_matching_child(
110
- temporal_node,
111
- ['.//gml:beginPosition', './/BeginningDateTime', './/gco:Date', './/dif:Start_Date']
112
- )
113
- start_date = '' unless SolrFormat.date?(start_date)
114
- start_date = SolrFormat.date_str(start_date) if formatted
115
-
116
- end_date = get_first_matching_child(
117
- temporal_node,
118
- ['.//gml:endPosition', './/EndingDateTime', './/gco:Date', './/dif:Stop_Date']
119
- )
120
- end_date = '' unless SolrFormat.date?(end_date)
121
- end_date = SolrFormat.date_str(end_date) if formatted
122
-
123
- {
124
- start: start_date,
125
- end: end_date
126
- }
127
- end
128
-
129
- # Met.no sometimes has bad metadata, such as <gmd:URL>SU-1 (planned activity)</gmd:URL>
130
- def self.dataset_url(url_node)
131
- url_node.text.strip =~ %r{http[s]?://} ? url_node.text.strip : ''
132
- end
133
-
134
- def self.ices_dataset_url(auth_id)
135
- 'http://geo.ices.dk/geonetwork/srv/en/main.home?uuid=' + auth_id
136
- end
137
-
138
- def self.get_first_matching_child(node, paths)
139
- matching_nodes = node.at_xpath(paths.join(' | '), IsoNamespaces.namespaces(node))
140
- matching_nodes.nil? ? '' : matching_nodes.text
141
- end
142
-
143
- def self.bounding_box(box_node)
144
- {
145
- west: get_bound(box_node, :west),
146
- south: get_bound(box_node, :south),
147
- east: get_bound(box_node, :east),
148
- north: get_bound(box_node, :north)
149
- }
150
- end
151
-
152
- def self.axis_label(direction)
153
- {
154
- north: 'Latitude',
155
- south: 'Latitude',
156
- east: 'Longitude',
157
- west: 'Longitude'
158
- }[direction]
159
- end
160
-
161
- def self.coordinate_boundary(lat_lon)
162
- {
163
- 'Latitude' => 90,
164
- 'Longitude' => 180
165
- }[lat_lon]
166
- end
167
-
168
- def self.node_values(box_node, direction, lat_lon)
169
- get_first_matching_child(
170
- box_node,
171
- [
172
- "./gmd:#{direction.to_s.downcase}Bounding#{lat_lon}/gco:Decimal",
173
- "./gmd:#{direction.to_s.downcase}Bound#{lat_lon}/gco:Decimal",
174
- "./#{direction.to_s.capitalize}BoundingCoordinate",
175
- "./dif:#{direction.to_s.capitalize}ernmost_#{lat_lon}"
176
- ]
177
- ).split(' ')
178
- end
179
-
180
- def self.get_bound(box_node, direction)
181
- lat_lon = axis_label(direction)
182
-
183
- vals = node_values(box_node, direction, lat_lon)
184
- val = vals.first
185
-
186
- boundary = coordinate_boundary(lat_lon)
187
- out_of_bounds = boundary < val.to_f.abs
188
-
189
- return '' if vals.empty? || out_of_bounds
190
-
191
- val = -val.to_f if %w(West South).include?(vals.last)
192
-
193
- val.to_f.to_s
194
- end
195
- end
196
- end
197
- end
@@ -1,61 +0,0 @@
1
- require 'date'
2
-
3
- require_relative 'iso_namespaces'
4
- require_relative 'solr_format'
5
- require_relative 'iso_to_solr_format'
6
-
7
- module SearchSolrTools
8
- module Helpers
9
- class NcdcPaleoFormat < IsoToSolrFormat
10
- def self.bounding_box(node)
11
- east, north = node.xpath('./ows:UpperCorner').text.split
12
- west, south = node.xpath('./ows:LowerCorner').text.split
13
- { north: north, south: south, east: east, west: west }
14
- end
15
-
16
- def self.date_range(node, _formatted = false)
17
- if node.text.include?('START YEAR')
18
- if node.text.include?('AD')
19
- format_ad_time(node.text)
20
- elsif node.text.include?('yr BP')
21
- format_cal_yr_bp_time(node.text)
22
- end
23
- end
24
- end
25
-
26
- def self.format_ad_time(node_text)
27
- match = node_text.match(/START YEAR:(?<start>[^*]*)AD\s*\* END YEAR:(?<end>[^*]*)AD/)
28
- {
29
- start: DateTime.strptime(match[:start].strip, '%Y'),
30
- end: DateTime.strptime(match[:end].strip, '%Y')
31
- }
32
- end
33
-
34
- def self.format_cal_yr_bp_time(node_text)
35
- zero_year = 1950
36
- match = node_text.match(/START YEAR:(?<start>[^*]*)... yr BP\s*\* END YEAR:(?<end>[^*]*)... yr BP/)
37
- {
38
- start: DateTime.strptime((-(match[:start].strip.to_i) - zero_year).to_s, '%Y'),
39
- end: DateTime.strptime((-(match[:end].strip.to_i) - zero_year).to_s, '%Y')
40
- }
41
- end
42
-
43
- def self.temporal_index_str(node)
44
- range = date_range(node)
45
- SolrFormat.temporal_index_str(start: range[:start].to_s, end: range[:end].to_s) unless range.nil?
46
- end
47
-
48
- def self.get_temporal_duration(node)
49
- range = date_range(node)
50
- return if range.to_s.empty?
51
- (range[:start] - range[:end]).to_i.abs
52
- end
53
-
54
- def self.author(node)
55
- return node if node == ''
56
- return if node.text.include? ';'
57
- node.text
58
- end
59
- end
60
- end
61
- end
@@ -1,13 +0,0 @@
1
- module SearchSolrTools
2
- module Helpers
3
- # Class to build a query string based on a hash of params
4
- class QueryBuilder
5
- class << self
6
- def build(params)
7
- param_str = params.map { |k, v| "#{k}=#{v}" }.join('&')
8
- "?#{param_str}"
9
- end
10
- end
11
- end
12
- end
13
- end
@@ -1,25 +0,0 @@
1
- require_relative 'iso_namespaces'
2
- require_relative 'iso_to_solr_format'
3
- require_relative 'solr_format'
4
-
5
- module SearchSolrTools
6
- module Helpers
7
- class R2RFormat < IsoToSolrFormat
8
- TEMPORAL_INDEX_STRING = proc { |node| R2RFormat.temporal_index_str(node) }
9
- TEMPORAL_DISPLAY_STRING = proc { |node| R2RFormat.temporal_display_str(node) }
10
- TEMPORAL_DURATION = proc { |node| R2RFormat.get_temporal_duration(node) }
11
- FACET_TEMPORAL_DURATION = proc { |node| R2RFormat.get_temporal_duration_facet(node) }
12
-
13
- def self.date_range(temporal_node, _formatted = false)
14
- xpath_start = './/gmd:temporalElement/gmd:EX_SpatialTemporalExtent/gmd:extent/'\
15
- 'gml:TimeInstant[@gml:id="start"]/gml:timePosition'
16
- xpath_end = xpath_start.gsub('start', 'end')
17
-
18
- {
19
- start: temporal_node.xpath(xpath_start).text,
20
- end: temporal_node.xpath(xpath_end).text
21
- }
22
- end
23
- end
24
- end
25
- end