search_solr_tools 6.1.0 → 6.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/bin/search_solr_tools +1 -13
  4. data/lib/search_solr_tools/config/environments.yaml +0 -32
  5. data/lib/search_solr_tools/harvesters/base.rb +0 -1
  6. data/lib/search_solr_tools/helpers/solr_format.rb +0 -15
  7. data/lib/search_solr_tools/helpers/translate_spatial_coverage.rb +0 -1
  8. data/lib/search_solr_tools/version.rb +1 -1
  9. data/lib/search_solr_tools.rb +1 -2
  10. metadata +2 -44
  11. data/lib/search_solr_tools/harvesters/adc.rb +0 -49
  12. data/lib/search_solr_tools/harvesters/ade_auto_suggest.rb +0 -46
  13. data/lib/search_solr_tools/harvesters/bcodmo.rb +0 -64
  14. data/lib/search_solr_tools/harvesters/data_one.rb +0 -49
  15. data/lib/search_solr_tools/harvesters/echo.rb +0 -52
  16. data/lib/search_solr_tools/harvesters/eol.rb +0 -51
  17. data/lib/search_solr_tools/harvesters/gtnp.rb +0 -67
  18. data/lib/search_solr_tools/harvesters/ices.rb +0 -58
  19. data/lib/search_solr_tools/harvesters/ncdc_paleo.rb +0 -62
  20. data/lib/search_solr_tools/harvesters/nmi.rb +0 -34
  21. data/lib/search_solr_tools/harvesters/nodc.rb +0 -75
  22. data/lib/search_solr_tools/harvesters/oai.rb +0 -62
  23. data/lib/search_solr_tools/harvesters/pdc.rb +0 -40
  24. data/lib/search_solr_tools/harvesters/r2r.rb +0 -61
  25. data/lib/search_solr_tools/harvesters/rda.rb +0 -35
  26. data/lib/search_solr_tools/harvesters/tdar.rb +0 -71
  27. data/lib/search_solr_tools/harvesters/usgs.rb +0 -76
  28. data/lib/search_solr_tools/helpers/csw_iso_query_builder.rb +0 -29
  29. data/lib/search_solr_tools/helpers/data_one_format.rb +0 -74
  30. data/lib/search_solr_tools/helpers/iso_to_solr.rb +0 -97
  31. data/lib/search_solr_tools/helpers/iso_to_solr_format.rb +0 -197
  32. data/lib/search_solr_tools/helpers/ncdc_paleo_format.rb +0 -61
  33. data/lib/search_solr_tools/helpers/query_builder.rb +0 -13
  34. data/lib/search_solr_tools/helpers/r2r_format.rb +0 -25
  35. data/lib/search_solr_tools/helpers/selectors.rb +0 -22
  36. data/lib/search_solr_tools/helpers/tdar_format.rb +0 -70
  37. data/lib/search_solr_tools/helpers/usgs_format.rb +0 -50
  38. data/lib/search_solr_tools/selectors/adc.rb +0 -96
  39. data/lib/search_solr_tools/selectors/data_one.rb +0 -96
  40. data/lib/search_solr_tools/selectors/echo_iso.rb +0 -112
  41. data/lib/search_solr_tools/selectors/ices_iso.rb +0 -108
  42. data/lib/search_solr_tools/selectors/ncdc_paleo.rb +0 -90
  43. data/lib/search_solr_tools/selectors/nmi.rb +0 -107
  44. data/lib/search_solr_tools/selectors/nodc_iso.rb +0 -108
  45. data/lib/search_solr_tools/selectors/pdc_iso.rb +0 -109
  46. data/lib/search_solr_tools/selectors/r2r.rb +0 -115
  47. data/lib/search_solr_tools/selectors/rda.rb +0 -107
  48. data/lib/search_solr_tools/selectors/tdar_opensearch.rb +0 -91
  49. data/lib/search_solr_tools/selectors/usgs_iso.rb +0 -107
  50. data/lib/search_solr_tools/translators/bcodmo_json.rb +0 -89
  51. data/lib/search_solr_tools/translators/eol_to_solr.rb +0 -84
  52. data/lib/search_solr_tools/translators/gtnp_json.rb +0 -59
@@ -1,97 +0,0 @@
1
- require_relative 'selectors'
2
- require 'nokogiri'
3
-
4
- module SearchSolrTools
5
- module Helpers
6
- # Translates ISO nokogiri documents into solr nokogiri documents using a hash driver object.
7
- # This class should be constructed passing the selector file hash as a parameter (see selectors.rb).
8
- # After creating an instance we call translate with a nokogiri iso document as a parameter.
9
- class IsoToSolr
10
- def initialize(selector)
11
- @fields = SELECTORS[selector]
12
- @multiple_whitespace = /\s{2,}/ # save the regex so it is not recompiled every time format_field() is called
13
- end
14
-
15
- # this will return a nodeset with all the elements that matched the xpath
16
- def eval_xpath(iso_xml_doc, xpath, multivalue, reduce)
17
- fields = []
18
- begin
19
- iso_xml_doc.xpath(xpath, IsoNamespaces.namespaces(iso_xml_doc)).each do |f|
20
- fields.push(f)
21
- break if multivalue == false && reduce.nil?
22
- end
23
- rescue
24
- fields = []
25
- end
26
- fields
27
- end
28
-
29
- def get_default_values(selector)
30
- selector.key?(:default_values) ? selector[:default_values] : ['']
31
- end
32
-
33
- def format_text(field)
34
- field.respond_to?(:text) ? field.text : field
35
- end
36
-
37
- def format_field(selector, field)
38
- formatted = selector.key?(:format) ? selector[:format].call(field) : format_text(field) rescue format_text(field)
39
- formatted = strip_invalid_utf8_bytes(formatted)
40
- formatted.strip! if formatted.respond_to?(:strip!)
41
- formatted.gsub!(@multiple_whitespace, ' ') if formatted.respond_to?(:gsub!)
42
- formatted
43
- end
44
-
45
- def format_fields(selector, fields, reduce = nil)
46
- formatted = fields.map { |f| format_field(selector, f) }.flatten
47
- formatted = [reduce.call(formatted)] unless reduce.nil?
48
- selector[:unique] ? formatted.uniq : formatted
49
- end
50
-
51
- def create_solr_fields(iso_xml_doc, selector)
52
- selector[:xpaths].each do |xpath|
53
- fields = eval_xpath(iso_xml_doc, xpath, selector[:multivalue], selector[:reduce])
54
-
55
- # stop evaluating xpaths once we find data in one of them
56
- if fields.size > 0 && fields.any? { |f| strip_invalid_utf8_bytes(f.text).strip.length > 0 }
57
- return format_fields(selector, fields, selector[:reduce])
58
- end
59
- end
60
- format_fields(selector, get_default_values(selector))
61
- end
62
-
63
- def translate(iso_xml_doc)
64
- solr_xml_doc = Nokogiri::XML::Builder.new do |xml|
65
- xml.doc_ do
66
- build_fields(xml, iso_xml_doc)
67
- end
68
- end
69
- solr_xml_doc.doc
70
- end
71
-
72
- def build_fields(xml, iso_xml_doc)
73
- @fields.each do |field_name, selector|
74
- create_solr_fields(iso_xml_doc, selector).each do |value|
75
- if value.is_a? Array
76
- value.each do |v|
77
- xml.field_({ name: field_name }, v) unless v.nil? || v.eql?('')
78
- end
79
- else
80
- xml.field_({ name: field_name }, value) unless value.nil? || value.eql?('')
81
- end
82
- end
83
- end
84
- end
85
-
86
- def strip_invalid_utf8_bytes(text)
87
- if text.respond_to?(:encode) && !text.valid_encoding?
88
- text.encode!('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
89
- end
90
-
91
- text.delete!("\u00BF") if text.respond_to?(:delete!)
92
-
93
- text
94
- end
95
- end
96
- end
97
- end
@@ -1,197 +0,0 @@
1
- require 'date'
2
-
3
- require_relative 'iso_namespaces'
4
- require_relative 'solr_format'
5
-
6
- module SearchSolrTools
7
- module Helpers
8
- # Methods for generating formatted strings from ISO xml nodes that can be indexed by SOLR
9
- # rubocop:disable ClassLength
10
- class IsoToSolrFormat
11
- KEYWORDS = proc { |keywords| build_keyword_list keywords }
12
-
13
- SPATIAL_DISPLAY = proc { |node| IsoToSolrFormat.spatial_display_str(node) }
14
- SPATIAL_INDEX = proc { |node| IsoToSolrFormat.spatial_index_str(node) }
15
- SPATIAL_AREA = proc { |node| IsoToSolrFormat.spatial_area_str(node) }
16
- MAX_SPATIAL_AREA = proc { |values| IsoToSolrFormat.get_max_spatial_area(values) }
17
-
18
- FACET_SPONSORED_PROGRAM = proc { |node| IsoToSolrFormat.sponsored_program_facet node }
19
- FACET_SPATIAL_COVERAGE = proc { |node| IsoToSolrFormat.get_spatial_facet(node) }
20
- FACET_SPATIAL_SCOPE = proc { |node| IsoToSolrFormat.get_spatial_scope_facet(node) }
21
- FACET_TEMPORAL_DURATION = proc { |node| IsoToSolrFormat.get_temporal_duration_facet(node) }
22
-
23
- TEMPORAL_DURATION = proc { |node| IsoToSolrFormat.get_temporal_duration(node) }
24
- TEMPORAL_INDEX_STRING = proc { |node| IsoToSolrFormat.temporal_index_str node }
25
- TEMPORAL_DISPLAY_STRING = proc { |node| IsoToSolrFormat.temporal_display_str node }
26
- TEMPORAL_DISPLAY_STRING_FORMATTED = proc { |node| IsoToSolrFormat.temporal_display_str(node, true) }
27
-
28
- DATASET_URL = proc { |node| IsoToSolrFormat.dataset_url(node) }
29
- ICES_DATASET_URL = proc { |node| IsoToSolrFormat.ices_dataset_url(node) }
30
- EOL_AUTHOR_FORMAT = proc { |node| IsoToSolrFormat.eol_author_format(node) }
31
-
32
- def self.spatial_display_str(box_node)
33
- box = bounding_box(box_node)
34
- "#{box[:south]} #{box[:west]} #{box[:north]} #{box[:east]}"
35
- end
36
-
37
- def self.spatial_index_str(box_node)
38
- box = bounding_box(box_node)
39
- if box[:west] == box[:east] && box[:south] == box[:north]
40
- [box[:west], box[:south]]
41
- else
42
- [box[:west], box[:south], box[:east], box[:north]]
43
- end.join(' ')
44
- end
45
-
46
- def self.spatial_area_str(box_node)
47
- box = bounding_box(box_node)
48
- area = box[:north].to_f - box[:south].to_f
49
- area
50
- end
51
-
52
- def self.get_max_spatial_area(values)
53
- values.map(&:to_f).max
54
- end
55
-
56
- def self.get_spatial_facet(box_node)
57
- box = bounding_box(box_node)
58
-
59
- if BoundingBoxUtil.box_invalid?(box)
60
- facet = nil
61
- elsif BoundingBoxUtil.box_global?(box)
62
- facet = 'Global'
63
- else
64
- facet = 'Non Global'
65
- end
66
- facet
67
- end
68
-
69
- def self.get_spatial_scope_facet(box_node)
70
- box = bounding_box(box_node)
71
- SolrFormat.get_spatial_scope_facet_with_bounding_box(box)
72
- end
73
-
74
- def self.temporal_display_str(temporal_node, formatted = false)
75
- SolrFormat.temporal_display_str(date_range(temporal_node, formatted))
76
- end
77
-
78
- def self.get_temporal_duration(temporal_node)
79
- dr = date_range(temporal_node)
80
- end_time = dr[:end].to_s.empty? ? Time.now : Time.parse(dr[:end])
81
- SolrFormat.get_temporal_duration(Time.parse(dr[:start]), end_time) unless dr[:start].to_s.empty?
82
- end
83
-
84
- def self.get_temporal_duration_facet(temporal_node)
85
- duration = get_temporal_duration(temporal_node)
86
- SolrFormat.get_temporal_duration_facet(duration)
87
- end
88
-
89
- def self.temporal_index_str(temporal_node)
90
- dr = date_range(temporal_node)
91
- SolrFormat.temporal_index_str(dr)
92
- end
93
-
94
- def self.sponsored_program_facet(node)
95
- long_name = node.xpath('.//gmd:organisationName', IsoNamespaces.namespaces(node)).text.strip
96
- short_name = node.xpath('.//gmd:organisationShortName', IsoNamespaces.namespaces(node)).text.strip
97
-
98
- [long_name, short_name].join(' | ')
99
- end
100
-
101
- def self.build_keyword_list(keywords)
102
- category = keywords.xpath('.//CategoryKeyword').text
103
- topic = keywords.xpath('.//TopicKeyword').text
104
- term = keywords.xpath('.//TermKeyword').text
105
- category << ' > ' << topic << ' > ' << term
106
- end
107
-
108
- def self.date_range(temporal_node, formatted = false)
109
- start_date = get_first_matching_child(
110
- temporal_node,
111
- ['.//gml:beginPosition', './/BeginningDateTime', './/gco:Date', './/dif:Start_Date']
112
- )
113
- start_date = '' unless SolrFormat.date?(start_date)
114
- start_date = SolrFormat.date_str(start_date) if formatted
115
-
116
- end_date = get_first_matching_child(
117
- temporal_node,
118
- ['.//gml:endPosition', './/EndingDateTime', './/gco:Date', './/dif:Stop_Date']
119
- )
120
- end_date = '' unless SolrFormat.date?(end_date)
121
- end_date = SolrFormat.date_str(end_date) if formatted
122
-
123
- {
124
- start: start_date,
125
- end: end_date
126
- }
127
- end
128
-
129
- # Met.no sometimes has bad metadata, such as <gmd:URL>SU-1 (planned activity)</gmd:URL>
130
- def self.dataset_url(url_node)
131
- url_node.text.strip =~ %r{http[s]?://} ? url_node.text.strip : ''
132
- end
133
-
134
- def self.ices_dataset_url(auth_id)
135
- 'http://geo.ices.dk/geonetwork/srv/en/main.home?uuid=' + auth_id
136
- end
137
-
138
- def self.get_first_matching_child(node, paths)
139
- matching_nodes = node.at_xpath(paths.join(' | '), IsoNamespaces.namespaces(node))
140
- matching_nodes.nil? ? '' : matching_nodes.text
141
- end
142
-
143
- def self.bounding_box(box_node)
144
- {
145
- west: get_bound(box_node, :west),
146
- south: get_bound(box_node, :south),
147
- east: get_bound(box_node, :east),
148
- north: get_bound(box_node, :north)
149
- }
150
- end
151
-
152
- def self.axis_label(direction)
153
- {
154
- north: 'Latitude',
155
- south: 'Latitude',
156
- east: 'Longitude',
157
- west: 'Longitude'
158
- }[direction]
159
- end
160
-
161
- def self.coordinate_boundary(lat_lon)
162
- {
163
- 'Latitude' => 90,
164
- 'Longitude' => 180
165
- }[lat_lon]
166
- end
167
-
168
- def self.node_values(box_node, direction, lat_lon)
169
- get_first_matching_child(
170
- box_node,
171
- [
172
- "./gmd:#{direction.to_s.downcase}Bounding#{lat_lon}/gco:Decimal",
173
- "./gmd:#{direction.to_s.downcase}Bound#{lat_lon}/gco:Decimal",
174
- "./#{direction.to_s.capitalize}BoundingCoordinate",
175
- "./dif:#{direction.to_s.capitalize}ernmost_#{lat_lon}"
176
- ]
177
- ).split(' ')
178
- end
179
-
180
- def self.get_bound(box_node, direction)
181
- lat_lon = axis_label(direction)
182
-
183
- vals = node_values(box_node, direction, lat_lon)
184
- val = vals.first
185
-
186
- boundary = coordinate_boundary(lat_lon)
187
- out_of_bounds = boundary < val.to_f.abs
188
-
189
- return '' if vals.empty? || out_of_bounds
190
-
191
- val = -val.to_f if %w(West South).include?(vals.last)
192
-
193
- val.to_f.to_s
194
- end
195
- end
196
- end
197
- end
@@ -1,61 +0,0 @@
1
- require 'date'
2
-
3
- require_relative 'iso_namespaces'
4
- require_relative 'solr_format'
5
- require_relative 'iso_to_solr_format'
6
-
7
- module SearchSolrTools
8
- module Helpers
9
- class NcdcPaleoFormat < IsoToSolrFormat
10
- def self.bounding_box(node)
11
- east, north = node.xpath('./ows:UpperCorner').text.split
12
- west, south = node.xpath('./ows:LowerCorner').text.split
13
- { north: north, south: south, east: east, west: west }
14
- end
15
-
16
- def self.date_range(node, _formatted = false)
17
- if node.text.include?('START YEAR')
18
- if node.text.include?('AD')
19
- format_ad_time(node.text)
20
- elsif node.text.include?('yr BP')
21
- format_cal_yr_bp_time(node.text)
22
- end
23
- end
24
- end
25
-
26
- def self.format_ad_time(node_text)
27
- match = node_text.match(/START YEAR:(?<start>[^*]*)AD\s*\* END YEAR:(?<end>[^*]*)AD/)
28
- {
29
- start: DateTime.strptime(match[:start].strip, '%Y'),
30
- end: DateTime.strptime(match[:end].strip, '%Y')
31
- }
32
- end
33
-
34
- def self.format_cal_yr_bp_time(node_text)
35
- zero_year = 1950
36
- match = node_text.match(/START YEAR:(?<start>[^*]*)... yr BP\s*\* END YEAR:(?<end>[^*]*)... yr BP/)
37
- {
38
- start: DateTime.strptime((-(match[:start].strip.to_i) - zero_year).to_s, '%Y'),
39
- end: DateTime.strptime((-(match[:end].strip.to_i) - zero_year).to_s, '%Y')
40
- }
41
- end
42
-
43
- def self.temporal_index_str(node)
44
- range = date_range(node)
45
- SolrFormat.temporal_index_str(start: range[:start].to_s, end: range[:end].to_s) unless range.nil?
46
- end
47
-
48
- def self.get_temporal_duration(node)
49
- range = date_range(node)
50
- return if range.to_s.empty?
51
- (range[:start] - range[:end]).to_i.abs
52
- end
53
-
54
- def self.author(node)
55
- return node if node == ''
56
- return if node.text.include? ';'
57
- node.text
58
- end
59
- end
60
- end
61
- end
@@ -1,13 +0,0 @@
1
- module SearchSolrTools
2
- module Helpers
3
- # Class to build a query string based on a hash of params
4
- class QueryBuilder
5
- class << self
6
- def build(params)
7
- param_str = params.map { |k, v| "#{k}=#{v}" }.join('&')
8
- "?#{param_str}"
9
- end
10
- end
11
- end
12
- end
13
- end
@@ -1,25 +0,0 @@
1
- require_relative 'iso_namespaces'
2
- require_relative 'iso_to_solr_format'
3
- require_relative 'solr_format'
4
-
5
- module SearchSolrTools
6
- module Helpers
7
- class R2RFormat < IsoToSolrFormat
8
- TEMPORAL_INDEX_STRING = proc { |node| R2RFormat.temporal_index_str(node) }
9
- TEMPORAL_DISPLAY_STRING = proc { |node| R2RFormat.temporal_display_str(node) }
10
- TEMPORAL_DURATION = proc { |node| R2RFormat.get_temporal_duration(node) }
11
- FACET_TEMPORAL_DURATION = proc { |node| R2RFormat.get_temporal_duration_facet(node) }
12
-
13
- def self.date_range(temporal_node, _formatted = false)
14
- xpath_start = './/gmd:temporalElement/gmd:EX_SpatialTemporalExtent/gmd:extent/'\
15
- 'gml:TimeInstant[@gml:id="start"]/gml:timePosition'
16
- xpath_end = xpath_start.gsub('start', 'end')
17
-
18
- {
19
- start: temporal_node.xpath(xpath_start).text,
20
- end: temporal_node.xpath(xpath_end).text
21
- }
22
- end
23
- end
24
- end
25
- end
@@ -1,22 +0,0 @@
1
- Dir[File.join(__dir__, '..', 'selectors', '*.rb')].each { |file| require file }
2
-
3
- module SearchSolrTools
4
- module Helpers
5
- # This hash grabs all the selector files inside the selectors directory,
6
- # to add a new source we need to create a selector file and add it to this hash.
7
- SELECTORS = {
8
- adc: Selectors::ADC,
9
- data_one: Selectors::DATA_ONE,
10
- echo: Selectors::ECHO,
11
- ices: Selectors::ICES,
12
- nmi: Selectors::NMI,
13
- ncdc_paleo: Selectors::NCDC_PALEO,
14
- nodc: Selectors::NODC,
15
- pdc: Selectors::PDC,
16
- r2r: Selectors::R2R,
17
- rda: Selectors::RDA,
18
- tdar: Selectors::TDAR,
19
- usgs: Selectors::USGS
20
- }
21
- end
22
- end
@@ -1,70 +0,0 @@
1
- require_relative 'iso_namespaces'
2
- require_relative 'iso_to_solr_format'
3
- require_relative 'solr_format'
4
-
5
- module SearchSolrTools
6
- module Helpers
7
- # Special formatter for dealing with temporal metadata issues in the TDAR feed
8
- class TdarFormat < IsoToSolrFormat
9
- SPATIAL_DISPLAY = proc { |node| TdarFormat.spatial_display_str(node) }
10
- SPATIAL_INDEX = proc { |node| TdarFormat.spatial_index_str(node) }
11
- FACET_SPATIAL_SCOPE = proc { |node| TdarFormat.get_spatial_scope_facet(node) }
12
-
13
- TEMPORAL_INDEX_STRING = proc { |node| TdarFormat.temporal_index_str(node) }
14
- TEMPORAL_DISPLAY_STRING = proc { |node| TdarFormat.temporal_display_str(node) }
15
- TEMPORAL_DISPLAY_STRING_FORMATTED = proc { |node| TdarFormat.temporal_display_str(node, true) }
16
- TEMPORAL_DURATION = proc { |node| TdarFormat.get_temporal_duration(node) }
17
- FACET_TEMPORAL_DURATION = proc { |node| TdarFormat.get_temporal_duration_facet(node) }
18
-
19
- def self.get_spatial_scope_facet(node)
20
- box = bounding_box(node)
21
- SolrFormat.get_spatial_scope_facet_with_bounding_box(box)
22
- end
23
-
24
- def self.date_range(temporal_node, formatted = false)
25
- xpath = '.'
26
- namespaces = IsoNamespaces.namespaces(temporal_node)
27
-
28
- temporal_node_count = temporal_node.xpath(xpath, namespaces).size
29
- date_str = temporal_node.at_xpath(xpath, namespaces).text
30
-
31
- super if temporal_node_count != 1
32
-
33
- case date_str
34
- when /^[0-9]{4}$/
35
- year_to_range(date_str)
36
- when /^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$/
37
- single_date_to_range(date_str)
38
- else
39
- super
40
- end
41
- end
42
-
43
- def self.single_date_to_range(date)
44
- {
45
- start: date,
46
- end: date
47
- }
48
- end
49
-
50
- def self.year_to_range(year)
51
- {
52
- start: "#{year}-01-01",
53
- end: "#{year}-12-31"
54
- }
55
- end
56
-
57
- # Bounding box is defined by two coordinates to create a point.
58
- # Create a bounding box from this point.
59
- def self.bounding_box(node)
60
- point = node.text.split(' ')
61
- {
62
- west: point[1],
63
- south: point[0],
64
- east: point[3],
65
- north: point[2]
66
- }
67
- end
68
- end
69
- end
70
- end
@@ -1,50 +0,0 @@
1
- require_relative 'iso_namespaces'
2
- require_relative 'iso_to_solr_format'
3
-
4
- module SearchSolrTools
5
- module Helpers
6
- # Special formatter for dealing with temporal metadata issues in the USGS feed
7
- class UsgsFormat < IsoToSolrFormat
8
- TEMPORAL_INDEX_STRING = proc { |node| UsgsFormat.temporal_index_str(node) }
9
- TEMPORAL_DISPLAY_STRING = proc { |node| UsgsFormat.temporal_display_str(node) }
10
- TEMPORAL_DURATION = proc { |node| UsgsFormat.get_temporal_duration(node) }
11
- FACET_TEMPORAL_DURATION = proc { |node| UsgsFormat.get_temporal_duration_facet(node) }
12
-
13
- # for USGS, a single date entry (i.e., missing either start or end date, and
14
- # the value that is present is not clearly labeled) means the whole year if
15
- # just a year is given, or just a single day if just a single day is given
16
- def self.date_range(temporal_node, formatted = false)
17
- xpath = './/gco:Date'
18
- namespaces = IsoNamespaces.namespaces(temporal_node)
19
-
20
- temporal_node_count = temporal_node.xpath(xpath, namespaces).size
21
- date_str = temporal_node.at_xpath(xpath, namespaces).text
22
-
23
- super if temporal_node_count != 1
24
-
25
- case date_str
26
- when /^[0-9]{4}$/
27
- year_to_range(date_str)
28
- when /^[0-9]{4}-[0-9]{2}-[0-9]{2}$/
29
- single_date_to_range(date_str)
30
- else
31
- super
32
- end
33
- end
34
-
35
- def self.single_date_to_range(date)
36
- {
37
- start: date,
38
- end: date
39
- }
40
- end
41
-
42
- def self.year_to_range(year)
43
- {
44
- start: "#{year}-01-01",
45
- end: "#{year}-12-31"
46
- }
47
- end
48
- end
49
- end
50
- end
@@ -1,96 +0,0 @@
1
- require_relative '../helpers/solr_format'
2
- require_relative '../helpers/data_one_format'
3
-
4
- module SearchSolrTools
5
- module Selectors
6
- ADC = {
7
- authoritative_id: {
8
- xpaths: ['.//str[@name="id"]'],
9
- multivalue: false
10
- },
11
- title: {
12
- xpaths: ['.//str[@name="title"]'],
13
- multivalue: false
14
- },
15
- summary: {
16
- xpaths: ['.//str[@name="abstract"]'],
17
- multivalue: false
18
- },
19
- data_centers: {
20
- xpaths: [''],
21
- default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:ADC][:long_name]],
22
- multivalue: false
23
- },
24
- authors: {
25
- xpaths: ['.//str[@name="author"]'],
26
- multivalue: false
27
- },
28
- keywords: {
29
- xpaths: ['.//arr[@name="keywords"]/str'],
30
- multivalue: true
31
- },
32
- last_revision_date: {
33
- xpaths: ['.//date[@name="updateDate"]'],
34
- default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
35
- multivalue: false,
36
- format: Helpers::SolrFormat::DATE
37
- },
38
- dataset_url: {
39
- xpaths: ['.//str[@name="dataUrl"]'],
40
- default_values: [''],
41
- multivalue: false
42
- },
43
- spatial_coverages: {
44
- xpaths: ['.'],
45
- multivalue: false,
46
- format: Helpers::DataOneFormat.method(:spatial_display)
47
- },
48
- spatial: {
49
- xpaths: ['.'],
50
- multivalue: false,
51
- format: Helpers::DataOneFormat.method(:spatial_index)
52
- },
53
- spatial_area: {
54
- xpaths: ['.'],
55
- multivalue: false,
56
- format: Helpers::DataOneFormat.method(:spatial_area)
57
- },
58
- temporal_coverages: {
59
- xpaths: ['.'],
60
- multivalue: false,
61
- format: Helpers::DataOneFormat.method(:temporal_coverage)
62
- },
63
- temporal_duration: {
64
- xpaths: ['.'],
65
- multivalue: false,
66
- format: Helpers::DataOneFormat.method(:temporal_duration)
67
- },
68
- temporal: {
69
- xpaths: ['.'],
70
- multivalue: false,
71
- format: Helpers::DataOneFormat.method(:temporal_index_string)
72
- },
73
- source: {
74
- xpaths: [''],
75
- default_values: ['ADE'],
76
- multivalue: false
77
- },
78
- facet_data_center: {
79
- xpaths: [''],
80
- default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:ADC][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:ADC][:short_name]}"],
81
- multivalue: false
82
- },
83
- facet_spatial_scope: {
84
- xpaths: ['.'],
85
- multivalue: false,
86
- format: Helpers::DataOneFormat.method(:facet_spatial_scope)
87
- },
88
- facet_temporal_duration: {
89
- xpaths: ['.'],
90
- default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
91
- format: Helpers::DataOneFormat.method(:facet_temporal_duration),
92
- multivalue: false
93
- }
94
- }
95
- end
96
- end