search_solr_tools 6.1.0 → 6.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/bin/search_solr_tools +1 -13
  4. data/lib/search_solr_tools/config/environments.yaml +0 -32
  5. data/lib/search_solr_tools/harvesters/base.rb +0 -1
  6. data/lib/search_solr_tools/helpers/solr_format.rb +0 -15
  7. data/lib/search_solr_tools/helpers/translate_spatial_coverage.rb +0 -1
  8. data/lib/search_solr_tools/version.rb +1 -1
  9. data/lib/search_solr_tools.rb +1 -2
  10. metadata +2 -44
  11. data/lib/search_solr_tools/harvesters/adc.rb +0 -49
  12. data/lib/search_solr_tools/harvesters/ade_auto_suggest.rb +0 -46
  13. data/lib/search_solr_tools/harvesters/bcodmo.rb +0 -64
  14. data/lib/search_solr_tools/harvesters/data_one.rb +0 -49
  15. data/lib/search_solr_tools/harvesters/echo.rb +0 -52
  16. data/lib/search_solr_tools/harvesters/eol.rb +0 -51
  17. data/lib/search_solr_tools/harvesters/gtnp.rb +0 -67
  18. data/lib/search_solr_tools/harvesters/ices.rb +0 -58
  19. data/lib/search_solr_tools/harvesters/ncdc_paleo.rb +0 -62
  20. data/lib/search_solr_tools/harvesters/nmi.rb +0 -34
  21. data/lib/search_solr_tools/harvesters/nodc.rb +0 -75
  22. data/lib/search_solr_tools/harvesters/oai.rb +0 -62
  23. data/lib/search_solr_tools/harvesters/pdc.rb +0 -40
  24. data/lib/search_solr_tools/harvesters/r2r.rb +0 -61
  25. data/lib/search_solr_tools/harvesters/rda.rb +0 -35
  26. data/lib/search_solr_tools/harvesters/tdar.rb +0 -71
  27. data/lib/search_solr_tools/harvesters/usgs.rb +0 -76
  28. data/lib/search_solr_tools/helpers/csw_iso_query_builder.rb +0 -29
  29. data/lib/search_solr_tools/helpers/data_one_format.rb +0 -74
  30. data/lib/search_solr_tools/helpers/iso_to_solr.rb +0 -97
  31. data/lib/search_solr_tools/helpers/iso_to_solr_format.rb +0 -197
  32. data/lib/search_solr_tools/helpers/ncdc_paleo_format.rb +0 -61
  33. data/lib/search_solr_tools/helpers/query_builder.rb +0 -13
  34. data/lib/search_solr_tools/helpers/r2r_format.rb +0 -25
  35. data/lib/search_solr_tools/helpers/selectors.rb +0 -22
  36. data/lib/search_solr_tools/helpers/tdar_format.rb +0 -70
  37. data/lib/search_solr_tools/helpers/usgs_format.rb +0 -50
  38. data/lib/search_solr_tools/selectors/adc.rb +0 -96
  39. data/lib/search_solr_tools/selectors/data_one.rb +0 -96
  40. data/lib/search_solr_tools/selectors/echo_iso.rb +0 -112
  41. data/lib/search_solr_tools/selectors/ices_iso.rb +0 -108
  42. data/lib/search_solr_tools/selectors/ncdc_paleo.rb +0 -90
  43. data/lib/search_solr_tools/selectors/nmi.rb +0 -107
  44. data/lib/search_solr_tools/selectors/nodc_iso.rb +0 -108
  45. data/lib/search_solr_tools/selectors/pdc_iso.rb +0 -109
  46. data/lib/search_solr_tools/selectors/r2r.rb +0 -115
  47. data/lib/search_solr_tools/selectors/rda.rb +0 -107
  48. data/lib/search_solr_tools/selectors/tdar_opensearch.rb +0 -91
  49. data/lib/search_solr_tools/selectors/usgs_iso.rb +0 -107
  50. data/lib/search_solr_tools/translators/bcodmo_json.rb +0 -89
  51. data/lib/search_solr_tools/translators/eol_to_solr.rb +0 -84
  52. data/lib/search_solr_tools/translators/gtnp_json.rb +0 -59
@@ -1,97 +0,0 @@
1
- require_relative 'selectors'
2
- require 'nokogiri'
3
-
4
- module SearchSolrTools
5
- module Helpers
6
- # Translates ISO nokogiri documents into solr nokogiri documents using a hash driver object.
7
- # This class should be constructed passing the selector file hash as a parameter (see selectors.rb).
8
- # After creating an instance we call translate with a nokogiri iso document as a parameter.
9
- class IsoToSolr
10
- def initialize(selector)
11
- @fields = SELECTORS[selector]
12
- @multiple_whitespace = /\s{2,}/ # save the regex so it is not recompiled every time format_field() is called
13
- end
14
-
15
- # this will return a nodeset with all the elements that matched the xpath
16
- def eval_xpath(iso_xml_doc, xpath, multivalue, reduce)
17
- fields = []
18
- begin
19
- iso_xml_doc.xpath(xpath, IsoNamespaces.namespaces(iso_xml_doc)).each do |f|
20
- fields.push(f)
21
- break if multivalue == false && reduce.nil?
22
- end
23
- rescue
24
- fields = []
25
- end
26
- fields
27
- end
28
-
29
- def get_default_values(selector)
30
- selector.key?(:default_values) ? selector[:default_values] : ['']
31
- end
32
-
33
- def format_text(field)
34
- field.respond_to?(:text) ? field.text : field
35
- end
36
-
37
- def format_field(selector, field)
38
- formatted = selector.key?(:format) ? selector[:format].call(field) : format_text(field) rescue format_text(field)
39
- formatted = strip_invalid_utf8_bytes(formatted)
40
- formatted.strip! if formatted.respond_to?(:strip!)
41
- formatted.gsub!(@multiple_whitespace, ' ') if formatted.respond_to?(:gsub!)
42
- formatted
43
- end
44
-
45
- def format_fields(selector, fields, reduce = nil)
46
- formatted = fields.map { |f| format_field(selector, f) }.flatten
47
- formatted = [reduce.call(formatted)] unless reduce.nil?
48
- selector[:unique] ? formatted.uniq : formatted
49
- end
50
-
51
- def create_solr_fields(iso_xml_doc, selector)
52
- selector[:xpaths].each do |xpath|
53
- fields = eval_xpath(iso_xml_doc, xpath, selector[:multivalue], selector[:reduce])
54
-
55
- # stop evaluating xpaths once we find data in one of them
56
- if fields.size > 0 && fields.any? { |f| strip_invalid_utf8_bytes(f.text).strip.length > 0 }
57
- return format_fields(selector, fields, selector[:reduce])
58
- end
59
- end
60
- format_fields(selector, get_default_values(selector))
61
- end
62
-
63
- def translate(iso_xml_doc)
64
- solr_xml_doc = Nokogiri::XML::Builder.new do |xml|
65
- xml.doc_ do
66
- build_fields(xml, iso_xml_doc)
67
- end
68
- end
69
- solr_xml_doc.doc
70
- end
71
-
72
- def build_fields(xml, iso_xml_doc)
73
- @fields.each do |field_name, selector|
74
- create_solr_fields(iso_xml_doc, selector).each do |value|
75
- if value.is_a? Array
76
- value.each do |v|
77
- xml.field_({ name: field_name }, v) unless v.nil? || v.eql?('')
78
- end
79
- else
80
- xml.field_({ name: field_name }, value) unless value.nil? || value.eql?('')
81
- end
82
- end
83
- end
84
- end
85
-
86
- def strip_invalid_utf8_bytes(text)
87
- if text.respond_to?(:encode) && !text.valid_encoding?
88
- text.encode!('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
89
- end
90
-
91
- text.delete!("\u00BF") if text.respond_to?(:delete!)
92
-
93
- text
94
- end
95
- end
96
- end
97
- end
@@ -1,197 +0,0 @@
1
- require 'date'
2
-
3
- require_relative 'iso_namespaces'
4
- require_relative 'solr_format'
5
-
6
- module SearchSolrTools
7
- module Helpers
8
- # Methods for generating formatted strings from ISO xml nodes that can be indexed by SOLR
9
- # rubocop:disable ClassLength
10
- class IsoToSolrFormat
11
- KEYWORDS = proc { |keywords| build_keyword_list keywords }
12
-
13
- SPATIAL_DISPLAY = proc { |node| IsoToSolrFormat.spatial_display_str(node) }
14
- SPATIAL_INDEX = proc { |node| IsoToSolrFormat.spatial_index_str(node) }
15
- SPATIAL_AREA = proc { |node| IsoToSolrFormat.spatial_area_str(node) }
16
- MAX_SPATIAL_AREA = proc { |values| IsoToSolrFormat.get_max_spatial_area(values) }
17
-
18
- FACET_SPONSORED_PROGRAM = proc { |node| IsoToSolrFormat.sponsored_program_facet node }
19
- FACET_SPATIAL_COVERAGE = proc { |node| IsoToSolrFormat.get_spatial_facet(node) }
20
- FACET_SPATIAL_SCOPE = proc { |node| IsoToSolrFormat.get_spatial_scope_facet(node) }
21
- FACET_TEMPORAL_DURATION = proc { |node| IsoToSolrFormat.get_temporal_duration_facet(node) }
22
-
23
- TEMPORAL_DURATION = proc { |node| IsoToSolrFormat.get_temporal_duration(node) }
24
- TEMPORAL_INDEX_STRING = proc { |node| IsoToSolrFormat.temporal_index_str node }
25
- TEMPORAL_DISPLAY_STRING = proc { |node| IsoToSolrFormat.temporal_display_str node }
26
- TEMPORAL_DISPLAY_STRING_FORMATTED = proc { |node| IsoToSolrFormat.temporal_display_str(node, true) }
27
-
28
- DATASET_URL = proc { |node| IsoToSolrFormat.dataset_url(node) }
29
- ICES_DATASET_URL = proc { |node| IsoToSolrFormat.ices_dataset_url(node) }
30
- EOL_AUTHOR_FORMAT = proc { |node| IsoToSolrFormat.eol_author_format(node) }
31
-
32
- def self.spatial_display_str(box_node)
33
- box = bounding_box(box_node)
34
- "#{box[:south]} #{box[:west]} #{box[:north]} #{box[:east]}"
35
- end
36
-
37
- def self.spatial_index_str(box_node)
38
- box = bounding_box(box_node)
39
- if box[:west] == box[:east] && box[:south] == box[:north]
40
- [box[:west], box[:south]]
41
- else
42
- [box[:west], box[:south], box[:east], box[:north]]
43
- end.join(' ')
44
- end
45
-
46
- def self.spatial_area_str(box_node)
47
- box = bounding_box(box_node)
48
- area = box[:north].to_f - box[:south].to_f
49
- area
50
- end
51
-
52
- def self.get_max_spatial_area(values)
53
- values.map(&:to_f).max
54
- end
55
-
56
- def self.get_spatial_facet(box_node)
57
- box = bounding_box(box_node)
58
-
59
- if BoundingBoxUtil.box_invalid?(box)
60
- facet = nil
61
- elsif BoundingBoxUtil.box_global?(box)
62
- facet = 'Global'
63
- else
64
- facet = 'Non Global'
65
- end
66
- facet
67
- end
68
-
69
- def self.get_spatial_scope_facet(box_node)
70
- box = bounding_box(box_node)
71
- SolrFormat.get_spatial_scope_facet_with_bounding_box(box)
72
- end
73
-
74
- def self.temporal_display_str(temporal_node, formatted = false)
75
- SolrFormat.temporal_display_str(date_range(temporal_node, formatted))
76
- end
77
-
78
- def self.get_temporal_duration(temporal_node)
79
- dr = date_range(temporal_node)
80
- end_time = dr[:end].to_s.empty? ? Time.now : Time.parse(dr[:end])
81
- SolrFormat.get_temporal_duration(Time.parse(dr[:start]), end_time) unless dr[:start].to_s.empty?
82
- end
83
-
84
- def self.get_temporal_duration_facet(temporal_node)
85
- duration = get_temporal_duration(temporal_node)
86
- SolrFormat.get_temporal_duration_facet(duration)
87
- end
88
-
89
- def self.temporal_index_str(temporal_node)
90
- dr = date_range(temporal_node)
91
- SolrFormat.temporal_index_str(dr)
92
- end
93
-
94
- def self.sponsored_program_facet(node)
95
- long_name = node.xpath('.//gmd:organisationName', IsoNamespaces.namespaces(node)).text.strip
96
- short_name = node.xpath('.//gmd:organisationShortName', IsoNamespaces.namespaces(node)).text.strip
97
-
98
- [long_name, short_name].join(' | ')
99
- end
100
-
101
- def self.build_keyword_list(keywords)
102
- category = keywords.xpath('.//CategoryKeyword').text
103
- topic = keywords.xpath('.//TopicKeyword').text
104
- term = keywords.xpath('.//TermKeyword').text
105
- category << ' > ' << topic << ' > ' << term
106
- end
107
-
108
- def self.date_range(temporal_node, formatted = false)
109
- start_date = get_first_matching_child(
110
- temporal_node,
111
- ['.//gml:beginPosition', './/BeginningDateTime', './/gco:Date', './/dif:Start_Date']
112
- )
113
- start_date = '' unless SolrFormat.date?(start_date)
114
- start_date = SolrFormat.date_str(start_date) if formatted
115
-
116
- end_date = get_first_matching_child(
117
- temporal_node,
118
- ['.//gml:endPosition', './/EndingDateTime', './/gco:Date', './/dif:Stop_Date']
119
- )
120
- end_date = '' unless SolrFormat.date?(end_date)
121
- end_date = SolrFormat.date_str(end_date) if formatted
122
-
123
- {
124
- start: start_date,
125
- end: end_date
126
- }
127
- end
128
-
129
- # Met.no sometimes has bad metadata, such as <gmd:URL>SU-1 (planned activity)</gmd:URL>
130
- def self.dataset_url(url_node)
131
- url_node.text.strip =~ %r{http[s]?://} ? url_node.text.strip : ''
132
- end
133
-
134
- def self.ices_dataset_url(auth_id)
135
- 'http://geo.ices.dk/geonetwork/srv/en/main.home?uuid=' + auth_id
136
- end
137
-
138
- def self.get_first_matching_child(node, paths)
139
- matching_nodes = node.at_xpath(paths.join(' | '), IsoNamespaces.namespaces(node))
140
- matching_nodes.nil? ? '' : matching_nodes.text
141
- end
142
-
143
- def self.bounding_box(box_node)
144
- {
145
- west: get_bound(box_node, :west),
146
- south: get_bound(box_node, :south),
147
- east: get_bound(box_node, :east),
148
- north: get_bound(box_node, :north)
149
- }
150
- end
151
-
152
- def self.axis_label(direction)
153
- {
154
- north: 'Latitude',
155
- south: 'Latitude',
156
- east: 'Longitude',
157
- west: 'Longitude'
158
- }[direction]
159
- end
160
-
161
- def self.coordinate_boundary(lat_lon)
162
- {
163
- 'Latitude' => 90,
164
- 'Longitude' => 180
165
- }[lat_lon]
166
- end
167
-
168
- def self.node_values(box_node, direction, lat_lon)
169
- get_first_matching_child(
170
- box_node,
171
- [
172
- "./gmd:#{direction.to_s.downcase}Bounding#{lat_lon}/gco:Decimal",
173
- "./gmd:#{direction.to_s.downcase}Bound#{lat_lon}/gco:Decimal",
174
- "./#{direction.to_s.capitalize}BoundingCoordinate",
175
- "./dif:#{direction.to_s.capitalize}ernmost_#{lat_lon}"
176
- ]
177
- ).split(' ')
178
- end
179
-
180
- def self.get_bound(box_node, direction)
181
- lat_lon = axis_label(direction)
182
-
183
- vals = node_values(box_node, direction, lat_lon)
184
- val = vals.first
185
-
186
- boundary = coordinate_boundary(lat_lon)
187
- out_of_bounds = boundary < val.to_f.abs
188
-
189
- return '' if vals.empty? || out_of_bounds
190
-
191
- val = -val.to_f if %w(West South).include?(vals.last)
192
-
193
- val.to_f.to_s
194
- end
195
- end
196
- end
197
- end
@@ -1,61 +0,0 @@
1
- require 'date'
2
-
3
- require_relative 'iso_namespaces'
4
- require_relative 'solr_format'
5
- require_relative 'iso_to_solr_format'
6
-
7
- module SearchSolrTools
8
- module Helpers
9
- class NcdcPaleoFormat < IsoToSolrFormat
10
- def self.bounding_box(node)
11
- east, north = node.xpath('./ows:UpperCorner').text.split
12
- west, south = node.xpath('./ows:LowerCorner').text.split
13
- { north: north, south: south, east: east, west: west }
14
- end
15
-
16
- def self.date_range(node, _formatted = false)
17
- if node.text.include?('START YEAR')
18
- if node.text.include?('AD')
19
- format_ad_time(node.text)
20
- elsif node.text.include?('yr BP')
21
- format_cal_yr_bp_time(node.text)
22
- end
23
- end
24
- end
25
-
26
- def self.format_ad_time(node_text)
27
- match = node_text.match(/START YEAR:(?<start>[^*]*)AD\s*\* END YEAR:(?<end>[^*]*)AD/)
28
- {
29
- start: DateTime.strptime(match[:start].strip, '%Y'),
30
- end: DateTime.strptime(match[:end].strip, '%Y')
31
- }
32
- end
33
-
34
- def self.format_cal_yr_bp_time(node_text)
35
- zero_year = 1950
36
- match = node_text.match(/START YEAR:(?<start>[^*]*)... yr BP\s*\* END YEAR:(?<end>[^*]*)... yr BP/)
37
- {
38
- start: DateTime.strptime((-(match[:start].strip.to_i) - zero_year).to_s, '%Y'),
39
- end: DateTime.strptime((-(match[:end].strip.to_i) - zero_year).to_s, '%Y')
40
- }
41
- end
42
-
43
- def self.temporal_index_str(node)
44
- range = date_range(node)
45
- SolrFormat.temporal_index_str(start: range[:start].to_s, end: range[:end].to_s) unless range.nil?
46
- end
47
-
48
- def self.get_temporal_duration(node)
49
- range = date_range(node)
50
- return if range.to_s.empty?
51
- (range[:start] - range[:end]).to_i.abs
52
- end
53
-
54
- def self.author(node)
55
- return node if node == ''
56
- return if node.text.include? ';'
57
- node.text
58
- end
59
- end
60
- end
61
- end
@@ -1,13 +0,0 @@
1
- module SearchSolrTools
2
- module Helpers
3
- # Class to build a query string based on a hash of params
4
- class QueryBuilder
5
- class << self
6
- def build(params)
7
- param_str = params.map { |k, v| "#{k}=#{v}" }.join('&')
8
- "?#{param_str}"
9
- end
10
- end
11
- end
12
- end
13
- end
@@ -1,25 +0,0 @@
1
- require_relative 'iso_namespaces'
2
- require_relative 'iso_to_solr_format'
3
- require_relative 'solr_format'
4
-
5
- module SearchSolrTools
6
- module Helpers
7
- class R2RFormat < IsoToSolrFormat
8
- TEMPORAL_INDEX_STRING = proc { |node| R2RFormat.temporal_index_str(node) }
9
- TEMPORAL_DISPLAY_STRING = proc { |node| R2RFormat.temporal_display_str(node) }
10
- TEMPORAL_DURATION = proc { |node| R2RFormat.get_temporal_duration(node) }
11
- FACET_TEMPORAL_DURATION = proc { |node| R2RFormat.get_temporal_duration_facet(node) }
12
-
13
- def self.date_range(temporal_node, _formatted = false)
14
- xpath_start = './/gmd:temporalElement/gmd:EX_SpatialTemporalExtent/gmd:extent/'\
15
- 'gml:TimeInstant[@gml:id="start"]/gml:timePosition'
16
- xpath_end = xpath_start.gsub('start', 'end')
17
-
18
- {
19
- start: temporal_node.xpath(xpath_start).text,
20
- end: temporal_node.xpath(xpath_end).text
21
- }
22
- end
23
- end
24
- end
25
- end
@@ -1,22 +0,0 @@
1
- Dir[File.join(__dir__, '..', 'selectors', '*.rb')].each { |file| require file }
2
-
3
- module SearchSolrTools
4
- module Helpers
5
- # This hash grabs all the selector files inside the selectors directory,
6
- # to add a new source we need to create a selector file and add it to this hash.
7
- SELECTORS = {
8
- adc: Selectors::ADC,
9
- data_one: Selectors::DATA_ONE,
10
- echo: Selectors::ECHO,
11
- ices: Selectors::ICES,
12
- nmi: Selectors::NMI,
13
- ncdc_paleo: Selectors::NCDC_PALEO,
14
- nodc: Selectors::NODC,
15
- pdc: Selectors::PDC,
16
- r2r: Selectors::R2R,
17
- rda: Selectors::RDA,
18
- tdar: Selectors::TDAR,
19
- usgs: Selectors::USGS
20
- }
21
- end
22
- end
@@ -1,70 +0,0 @@
1
- require_relative 'iso_namespaces'
2
- require_relative 'iso_to_solr_format'
3
- require_relative 'solr_format'
4
-
5
- module SearchSolrTools
6
- module Helpers
7
- # Special formatter for dealing with temporal metadata issues in the TDAR feed
8
- class TdarFormat < IsoToSolrFormat
9
- SPATIAL_DISPLAY = proc { |node| TdarFormat.spatial_display_str(node) }
10
- SPATIAL_INDEX = proc { |node| TdarFormat.spatial_index_str(node) }
11
- FACET_SPATIAL_SCOPE = proc { |node| TdarFormat.get_spatial_scope_facet(node) }
12
-
13
- TEMPORAL_INDEX_STRING = proc { |node| TdarFormat.temporal_index_str(node) }
14
- TEMPORAL_DISPLAY_STRING = proc { |node| TdarFormat.temporal_display_str(node) }
15
- TEMPORAL_DISPLAY_STRING_FORMATTED = proc { |node| TdarFormat.temporal_display_str(node, true) }
16
- TEMPORAL_DURATION = proc { |node| TdarFormat.get_temporal_duration(node) }
17
- FACET_TEMPORAL_DURATION = proc { |node| TdarFormat.get_temporal_duration_facet(node) }
18
-
19
- def self.get_spatial_scope_facet(node)
20
- box = bounding_box(node)
21
- SolrFormat.get_spatial_scope_facet_with_bounding_box(box)
22
- end
23
-
24
- def self.date_range(temporal_node, formatted = false)
25
- xpath = '.'
26
- namespaces = IsoNamespaces.namespaces(temporal_node)
27
-
28
- temporal_node_count = temporal_node.xpath(xpath, namespaces).size
29
- date_str = temporal_node.at_xpath(xpath, namespaces).text
30
-
31
- super if temporal_node_count != 1
32
-
33
- case date_str
34
- when /^[0-9]{4}$/
35
- year_to_range(date_str)
36
- when /^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$/
37
- single_date_to_range(date_str)
38
- else
39
- super
40
- end
41
- end
42
-
43
- def self.single_date_to_range(date)
44
- {
45
- start: date,
46
- end: date
47
- }
48
- end
49
-
50
- def self.year_to_range(year)
51
- {
52
- start: "#{year}-01-01",
53
- end: "#{year}-12-31"
54
- }
55
- end
56
-
57
- # Bounding box is defined by two coordinates to create a point.
58
- # Create a bounding box from this point.
59
- def self.bounding_box(node)
60
- point = node.text.split(' ')
61
- {
62
- west: point[1],
63
- south: point[0],
64
- east: point[3],
65
- north: point[2]
66
- }
67
- end
68
- end
69
- end
70
- end
@@ -1,50 +0,0 @@
1
- require_relative 'iso_namespaces'
2
- require_relative 'iso_to_solr_format'
3
-
4
- module SearchSolrTools
5
- module Helpers
6
- # Special formatter for dealing with temporal metadata issues in the USGS feed
7
- class UsgsFormat < IsoToSolrFormat
8
- TEMPORAL_INDEX_STRING = proc { |node| UsgsFormat.temporal_index_str(node) }
9
- TEMPORAL_DISPLAY_STRING = proc { |node| UsgsFormat.temporal_display_str(node) }
10
- TEMPORAL_DURATION = proc { |node| UsgsFormat.get_temporal_duration(node) }
11
- FACET_TEMPORAL_DURATION = proc { |node| UsgsFormat.get_temporal_duration_facet(node) }
12
-
13
- # for USGS, a single date entry (i.e., missing either start or end date, and
14
- # the value that is present is not clearly labeled) means the whole year if
15
- # just a year is given, or just a single day if just a single day is given
16
- def self.date_range(temporal_node, formatted = false)
17
- xpath = './/gco:Date'
18
- namespaces = IsoNamespaces.namespaces(temporal_node)
19
-
20
- temporal_node_count = temporal_node.xpath(xpath, namespaces).size
21
- date_str = temporal_node.at_xpath(xpath, namespaces).text
22
-
23
- super if temporal_node_count != 1
24
-
25
- case date_str
26
- when /^[0-9]{4}$/
27
- year_to_range(date_str)
28
- when /^[0-9]{4}-[0-9]{2}-[0-9]{2}$/
29
- single_date_to_range(date_str)
30
- else
31
- super
32
- end
33
- end
34
-
35
- def self.single_date_to_range(date)
36
- {
37
- start: date,
38
- end: date
39
- }
40
- end
41
-
42
- def self.year_to_range(year)
43
- {
44
- start: "#{year}-01-01",
45
- end: "#{year}-12-31"
46
- }
47
- end
48
- end
49
- end
50
- end
@@ -1,96 +0,0 @@
1
- require_relative '../helpers/solr_format'
2
- require_relative '../helpers/data_one_format'
3
-
4
- module SearchSolrTools
5
- module Selectors
6
- ADC = {
7
- authoritative_id: {
8
- xpaths: ['.//str[@name="id"]'],
9
- multivalue: false
10
- },
11
- title: {
12
- xpaths: ['.//str[@name="title"]'],
13
- multivalue: false
14
- },
15
- summary: {
16
- xpaths: ['.//str[@name="abstract"]'],
17
- multivalue: false
18
- },
19
- data_centers: {
20
- xpaths: [''],
21
- default_values: [Helpers::SolrFormat::DATA_CENTER_NAMES[:ADC][:long_name]],
22
- multivalue: false
23
- },
24
- authors: {
25
- xpaths: ['.//str[@name="author"]'],
26
- multivalue: false
27
- },
28
- keywords: {
29
- xpaths: ['.//arr[@name="keywords"]/str'],
30
- multivalue: true
31
- },
32
- last_revision_date: {
33
- xpaths: ['.//date[@name="updateDate"]'],
34
- default_values: [Helpers::SolrFormat.date_str(DateTime.now)], # formats the date into ISO8601 as in http://lucene.apache.org/solr/4_4_0/solr-core/org/apache/solr/schema/DateField.html
35
- multivalue: false,
36
- format: Helpers::SolrFormat::DATE
37
- },
38
- dataset_url: {
39
- xpaths: ['.//str[@name="dataUrl"]'],
40
- default_values: [''],
41
- multivalue: false
42
- },
43
- spatial_coverages: {
44
- xpaths: ['.'],
45
- multivalue: false,
46
- format: Helpers::DataOneFormat.method(:spatial_display)
47
- },
48
- spatial: {
49
- xpaths: ['.'],
50
- multivalue: false,
51
- format: Helpers::DataOneFormat.method(:spatial_index)
52
- },
53
- spatial_area: {
54
- xpaths: ['.'],
55
- multivalue: false,
56
- format: Helpers::DataOneFormat.method(:spatial_area)
57
- },
58
- temporal_coverages: {
59
- xpaths: ['.'],
60
- multivalue: false,
61
- format: Helpers::DataOneFormat.method(:temporal_coverage)
62
- },
63
- temporal_duration: {
64
- xpaths: ['.'],
65
- multivalue: false,
66
- format: Helpers::DataOneFormat.method(:temporal_duration)
67
- },
68
- temporal: {
69
- xpaths: ['.'],
70
- multivalue: false,
71
- format: Helpers::DataOneFormat.method(:temporal_index_string)
72
- },
73
- source: {
74
- xpaths: [''],
75
- default_values: ['ADE'],
76
- multivalue: false
77
- },
78
- facet_data_center: {
79
- xpaths: [''],
80
- default_values: ["#{Helpers::SolrFormat::DATA_CENTER_NAMES[:ADC][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:ADC][:short_name]}"],
81
- multivalue: false
82
- },
83
- facet_spatial_scope: {
84
- xpaths: ['.'],
85
- multivalue: false,
86
- format: Helpers::DataOneFormat.method(:facet_spatial_scope)
87
- },
88
- facet_temporal_duration: {
89
- xpaths: ['.'],
90
- default_values: [Helpers::SolrFormat::NOT_SPECIFIED],
91
- format: Helpers::DataOneFormat.method(:facet_temporal_duration),
92
- multivalue: false
93
- }
94
- }
95
- end
96
- end