geo_combine 0.1.0 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.travis.yml +6 -1
- data/Gemfile +2 -1
- data/README.md +108 -23
- data/geo_combine.gemspec +4 -2
- data/lib/geo_combine.rb +11 -1
- data/lib/geo_combine/bounding_box.rb +71 -0
- data/lib/geo_combine/ckan_metadata.rb +112 -0
- data/lib/geo_combine/esri_open_data.rb +0 -9
- data/lib/geo_combine/exceptions.rb +8 -0
- data/lib/geo_combine/formatting.rb +6 -1
- data/lib/geo_combine/geo_blacklight_harvester.rb +203 -0
- data/lib/geo_combine/geoblacklight.rb +80 -12
- data/lib/geo_combine/ogp.rb +229 -0
- data/lib/geo_combine/railtie.rb +7 -0
- data/lib/geo_combine/version.rb +1 -1
- data/lib/tasks/geo_combine.rake +54 -20
- data/lib/xslt/fgdc2geoBL.xsl +95 -154
- data/lib/xslt/fgdc2html.xsl +105 -157
- data/lib/xslt/iso2geoBL.xsl +62 -84
- data/lib/xslt/iso2html.xsl +1107 -1070
- data/spec/features/iso2html_spec.rb +7 -1
- data/spec/fixtures/docs/basic_geoblacklight.json +5 -7
- data/spec/fixtures/docs/ckan.json +456 -0
- data/spec/fixtures/docs/full_geoblacklight.json +2 -8
- data/spec/fixtures/docs/geoblacklight_pre_v1.json +37 -0
- data/spec/fixtures/docs/ogp_harvard_line.json +28 -0
- data/spec/fixtures/docs/ogp_harvard_raster.json +28 -0
- data/spec/fixtures/docs/ogp_tufts_vector.json +31 -0
- data/spec/fixtures/json_docs.rb +20 -0
- data/spec/lib/geo_combine/bounding_box_spec.rb +59 -0
- data/spec/lib/geo_combine/ckan_metadata_spec.rb +114 -0
- data/spec/lib/geo_combine/esri_open_data_spec.rb +1 -14
- data/spec/lib/geo_combine/fgdc_spec.rb +11 -14
- data/spec/lib/geo_combine/formatting_spec.rb +6 -0
- data/spec/lib/geo_combine/geo_blacklight_harvester_spec.rb +190 -0
- data/spec/lib/geo_combine/geoblacklight_spec.rb +137 -11
- data/spec/lib/geo_combine/iso19139_spec.rb +5 -2
- data/spec/lib/geo_combine/ogp_spec.rb +163 -0
- data/spec/spec_helper.rb +1 -0
- metadata +63 -14
- data/lib/schema/geoblacklight-schema.json +0 -169
@@ -26,14 +26,12 @@ module GeoCombine
|
|
26
26
|
# @return [Hash]
|
27
27
|
def geoblacklight_terms
|
28
28
|
{
|
29
|
-
uuid: @metadata['id'],
|
30
29
|
dc_identifier_s: @metadata['id'],
|
31
30
|
dc_title_s: @metadata['name'],
|
32
31
|
dc_description_s: sanitize_and_remove_lines(@metadata['description']),
|
33
32
|
dc_rights_s: 'Public',
|
34
33
|
dct_provenance_s: @metadata['owner'],
|
35
34
|
dct_references_s: references,
|
36
|
-
georss_box_s: georss_box,
|
37
35
|
# layer_id_s is used for describing a layer id for a web serivce (WMS, WFS) but is still a required field
|
38
36
|
layer_id_s: '',
|
39
37
|
layer_geom_type_s: @metadata['geometry_type'],
|
@@ -62,13 +60,6 @@ module GeoCombine
|
|
62
60
|
}
|
63
61
|
end
|
64
62
|
|
65
|
-
##
|
66
|
-
# Builds a GeoRSS box
|
67
|
-
# @return [String]
|
68
|
-
def georss_box
|
69
|
-
"#{south} #{west} #{north} #{east}"
|
70
|
-
end
|
71
|
-
|
72
63
|
##
|
73
64
|
# Builds a Solr Envelope using CQL syntax
|
74
65
|
# @return [String]
|
@@ -15,7 +15,7 @@ module GeoCombine
|
|
15
15
|
# @param [String] text
|
16
16
|
# @return [String]
|
17
17
|
def remove_lines(text)
|
18
|
-
text.
|
18
|
+
text.delete("\n")
|
19
19
|
end
|
20
20
|
|
21
21
|
##
|
@@ -25,5 +25,10 @@ module GeoCombine
|
|
25
25
|
def sanitize_and_remove_lines(text)
|
26
26
|
remove_lines(sanitize(text))
|
27
27
|
end
|
28
|
+
|
29
|
+
# slugs should be lowercase and only have a-z, A-Z, 0-9, and -
|
30
|
+
def sluggify(slug)
|
31
|
+
slug.gsub(/[^a-zA-Z0-9\-]/, '-').gsub(/[\-]+/, '-').downcase
|
32
|
+
end
|
28
33
|
end
|
29
34
|
end
|
@@ -0,0 +1,203 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module GeoCombine
|
4
|
+
##
|
5
|
+
# A class to harvest and index results from GeoBlacklight sites
|
6
|
+
# You can configure the sites to be harvested via a configure command.
|
7
|
+
# GeoCombine::GeoBlacklightHarvester.configure do
|
8
|
+
# {
|
9
|
+
# SITE: { host: 'https://example.com', params: { f: { dct_provenance_s: ['SITE'] } } }
|
10
|
+
# }
|
11
|
+
# end
|
12
|
+
# The class configuration also allows for various other things to be configured:
|
13
|
+
# - A debug parameter to print out details of what is being harvested and indexed
|
14
|
+
# - crawl delays for each page of results (globally or on a per site basis)
|
15
|
+
# - Solr's commitWithin parameter (defaults to 5000)
|
16
|
+
# - A document transformer proc to modify a document before indexing (defaults to removing _version_, score, and timestamp)
|
17
|
+
# Example: GeoCombine::GeoBlacklightHarvester.new('SITE').index
|
18
|
+
class GeoBlacklightHarvester
|
19
|
+
require 'active_support/core_ext/object/to_query'
|
20
|
+
|
21
|
+
class << self
|
22
|
+
attr_writer :document_transformer
|
23
|
+
|
24
|
+
def configure(&block)
|
25
|
+
@config = yield block
|
26
|
+
end
|
27
|
+
|
28
|
+
def config
|
29
|
+
@config || {}
|
30
|
+
end
|
31
|
+
|
32
|
+
def document_transformer
|
33
|
+
@document_transformer || ->(document) do
|
34
|
+
document.delete('_version_')
|
35
|
+
document.delete('score')
|
36
|
+
document.delete('timestamp')
|
37
|
+
document
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
|
43
|
+
attr_reader :site, :site_key
|
44
|
+
def initialize(site_key)
|
45
|
+
@site_key = site_key
|
46
|
+
@site = self.class.config[site_key]
|
47
|
+
|
48
|
+
raise ArgumentError, "Site key #{@site_key.inspect} is not configured for #{self.class.name}" unless @site
|
49
|
+
end
|
50
|
+
|
51
|
+
def index
|
52
|
+
puts "Fetching page 1 @ #{base_url}&page=1" if self.class.config[:debug]
|
53
|
+
response = JSON.parse(Net::HTTP.get(URI("#{base_url}&page=1")))
|
54
|
+
response_class = BlacklightResponseVersionFactory.call(response)
|
55
|
+
|
56
|
+
response_class.new(response: response, base_url: base_url).documents.each do |docs|
|
57
|
+
docs.map! do |document|
|
58
|
+
self.class.document_transformer.call(document) if self.class.document_transformer
|
59
|
+
end.compact
|
60
|
+
|
61
|
+
puts "Adding #{docs.count} documents to solr" if self.class.config[:debug]
|
62
|
+
solr_connection.update params: { commitWithin: commit_within, overwrite: true },
|
63
|
+
data: docs.to_json,
|
64
|
+
headers: { 'Content-Type' => 'application/json' }
|
65
|
+
|
66
|
+
sleep(crawl_delay.to_i) if crawl_delay
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
##
|
71
|
+
# A "factory" class to determine the blacklight response version to use
|
72
|
+
class BlacklightResponseVersionFactory
|
73
|
+
def self.call(json)
|
74
|
+
keys = json.keys
|
75
|
+
if keys.include?('response')
|
76
|
+
LegacyBlacklightResponse
|
77
|
+
elsif keys.any? && %w[links data].all? { |param| keys.include?(param) }
|
78
|
+
ModernBlacklightResponse
|
79
|
+
else
|
80
|
+
raise NotImplementedError, "The following json response was not able to be parsed by the GeoBlacklightHarvester\n#{json}"
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
class LegacyBlacklightResponse
|
86
|
+
attr_reader :base_url
|
87
|
+
attr_accessor :response, :page
|
88
|
+
def initialize(response:, base_url:)
|
89
|
+
@base_url = base_url
|
90
|
+
@response = response
|
91
|
+
@page = 1
|
92
|
+
end
|
93
|
+
|
94
|
+
def documents
|
95
|
+
return enum_for(:documents) unless block_given?
|
96
|
+
|
97
|
+
while current_page && total_pages && (current_page <= total_pages) do
|
98
|
+
yield response.dig('response', 'docs')
|
99
|
+
|
100
|
+
break if current_page == total_pages
|
101
|
+
self.page += 1
|
102
|
+
puts "Fetching page #{page} @ #{url}" if GeoCombine::GeoBlacklightHarvester.config[:debug]
|
103
|
+
|
104
|
+
begin
|
105
|
+
self.response = JSON.parse(Net::HTTP.get(URI(url)))
|
106
|
+
rescue => e
|
107
|
+
puts "Request for #{url} failed with #{e}"
|
108
|
+
self.response = nil
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
private
|
114
|
+
|
115
|
+
def url
|
116
|
+
"#{base_url}&page=#{page}"
|
117
|
+
end
|
118
|
+
|
119
|
+
def current_page
|
120
|
+
response.dig('response', 'pages', 'current_page')
|
121
|
+
end
|
122
|
+
|
123
|
+
def total_pages
|
124
|
+
response.dig('response', 'pages', 'total_pages')
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
##
|
129
|
+
# Class to return documents from the Blacklight API (v7 and above)
|
130
|
+
class ModernBlacklightResponse
|
131
|
+
attr_reader :base_url
|
132
|
+
attr_accessor :response, :page
|
133
|
+
def initialize(response:, base_url:)
|
134
|
+
@base_url = base_url
|
135
|
+
@response = response
|
136
|
+
@page = 1
|
137
|
+
end
|
138
|
+
|
139
|
+
def documents
|
140
|
+
return enum_for(:documents) unless block_given?
|
141
|
+
|
142
|
+
while response && response['data'].any?
|
143
|
+
document_urls = response['data'].collect { |data| data.dig('links', 'self') }.compact
|
144
|
+
|
145
|
+
yield documents_from_urls(document_urls)
|
146
|
+
|
147
|
+
url = response.dig('links', 'next')
|
148
|
+
break unless url
|
149
|
+
self.page += 1
|
150
|
+
puts "Fetching page #{page} @ #{url}" if GeoCombine::GeoBlacklightHarvester.config[:debug]
|
151
|
+
begin
|
152
|
+
self.response = JSON.parse(Net::HTTP.get(URI(url)))
|
153
|
+
rescue => e
|
154
|
+
puts "Request for #{url} failed with #{e}"
|
155
|
+
self.response = nil
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
private
|
161
|
+
|
162
|
+
def documents_from_urls(urls)
|
163
|
+
puts "Fetching #{urls.count} documents for page #{page}" if GeoCombine::GeoBlacklightHarvester.config[:debug]
|
164
|
+
urls.map do |url|
|
165
|
+
begin
|
166
|
+
JSON.parse(Net::HTTP.get(URI("#{url}/raw")))
|
167
|
+
rescue => e
|
168
|
+
puts "Fetching \"#{url}/raw\" failed with #{e}"
|
169
|
+
|
170
|
+
nil
|
171
|
+
end
|
172
|
+
end.compact
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
private
|
177
|
+
|
178
|
+
def base_url
|
179
|
+
"#{site[:host]}?#{default_params.to_query}"
|
180
|
+
end
|
181
|
+
|
182
|
+
def solr_connection
|
183
|
+
solr_url = ENV['SOLR_URL'] || 'http://127.0.0.1:8983/solr/blacklight-core'
|
184
|
+
|
185
|
+
RSolr.connect url: solr_url, adapter: :net_http_persistent
|
186
|
+
end
|
187
|
+
|
188
|
+
def commit_within
|
189
|
+
self.class.config[:commit_within] || '5000'
|
190
|
+
end
|
191
|
+
|
192
|
+
def crawl_delay
|
193
|
+
site[:crawl_delay] || self.class.config[:crawl_delay]
|
194
|
+
end
|
195
|
+
|
196
|
+
def default_params
|
197
|
+
{
|
198
|
+
per_page: 100,
|
199
|
+
format: :json
|
200
|
+
}.merge(site[:params])
|
201
|
+
end
|
202
|
+
end
|
203
|
+
end
|
@@ -1,3 +1,7 @@
|
|
1
|
+
require 'active_support/core_ext/object/blank'
|
2
|
+
require 'active_support/core_ext/hash/except'
|
3
|
+
require 'open-uri'
|
4
|
+
|
1
5
|
module GeoCombine
|
2
6
|
class Geoblacklight
|
3
7
|
include GeoCombine::Formats
|
@@ -6,6 +10,18 @@ module GeoCombine
|
|
6
10
|
|
7
11
|
attr_reader :metadata
|
8
12
|
|
13
|
+
GEOBLACKLIGHT_VERSION = 'v1.1.0'
|
14
|
+
SCHEMA_JSON_URL = "https://raw.githubusercontent.com/geoblacklight/geoblacklight/#{GEOBLACKLIGHT_VERSION}/schema/geoblacklight-schema.json".freeze
|
15
|
+
DEPRECATED_KEYS_V1 = %w[
|
16
|
+
uuid
|
17
|
+
georss_polygon_s
|
18
|
+
georss_point_s
|
19
|
+
georss_box_s
|
20
|
+
dc_relation_sm
|
21
|
+
solr_issued_i
|
22
|
+
solr_bbox
|
23
|
+
].freeze
|
24
|
+
|
9
25
|
##
|
10
26
|
# Initializes a GeoBlacklight object
|
11
27
|
# @param [String] metadata be a valid JSON string document in
|
@@ -13,13 +29,16 @@ module GeoCombine
|
|
13
29
|
# @param [Hash] fields enhancements to metadata that are merged with @metadata
|
14
30
|
def initialize(metadata, fields = {})
|
15
31
|
@metadata = JSON.parse(metadata).merge(fields)
|
32
|
+
@schema = nil
|
16
33
|
end
|
17
34
|
|
18
35
|
##
|
19
36
|
# Calls metadata enhancement methods for each key, value pair in the
|
20
37
|
# metadata hash
|
21
38
|
def enhance_metadata
|
22
|
-
|
39
|
+
upgrade_to_v1 if metadata['geoblacklight_version'].blank?
|
40
|
+
|
41
|
+
metadata.each do |key, value|
|
23
42
|
translate_formats(key, value)
|
24
43
|
enhance_subjects(key, value)
|
25
44
|
format_proper_date(key, value)
|
@@ -31,16 +50,36 @@ module GeoCombine
|
|
31
50
|
##
|
32
51
|
# Returns a string of JSON from a GeoBlacklight hash
|
33
52
|
# @return (String)
|
34
|
-
def to_json
|
35
|
-
|
53
|
+
def to_json(options = {})
|
54
|
+
metadata.to_json(options)
|
36
55
|
end
|
37
56
|
|
38
57
|
##
|
39
58
|
# Validates a GeoBlacklight-Schema json document
|
40
59
|
# @return [Boolean]
|
41
60
|
def valid?
|
42
|
-
schema
|
43
|
-
JSON::Validator.validate!(schema, to_json,
|
61
|
+
@schema ||= JSON.parse(open(SCHEMA_JSON_URL).read)
|
62
|
+
JSON::Validator.validate!(@schema, to_json, fragment: '#/properties/layer') &&
|
63
|
+
dct_references_validate! &&
|
64
|
+
spatial_validate!
|
65
|
+
end
|
66
|
+
|
67
|
+
##
|
68
|
+
# Validate dct_references_s
|
69
|
+
# @return [Boolean]
|
70
|
+
def dct_references_validate!
|
71
|
+
return true unless metadata.key?('dct_references_s') # TODO: shouldn't we require this field?
|
72
|
+
begin
|
73
|
+
ref = JSON.parse(metadata['dct_references_s'])
|
74
|
+
raise GeoCombine::Exceptions::InvalidDCTReferences, 'dct_references must be parsed to a Hash' unless ref.is_a?(Hash)
|
75
|
+
true
|
76
|
+
rescue JSON::ParserError => e
|
77
|
+
raise e, "Invalid JSON in dct_references_s: #{e.message}"
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def spatial_validate!
|
82
|
+
GeoCombine::BoundingBox.from_envelope(metadata['solr_geom']).valid?
|
44
83
|
end
|
45
84
|
|
46
85
|
private
|
@@ -49,43 +88,72 @@ module GeoCombine
|
|
49
88
|
# Enhances the 'dc_format_s' field by translating a format type to a valid
|
50
89
|
# GeoBlacklight-Schema format
|
51
90
|
def translate_formats(key, value)
|
52
|
-
|
91
|
+
return unless key == 'dc_format_s' && formats.include?(value)
|
92
|
+
metadata[key] = formats[value]
|
53
93
|
end
|
54
94
|
|
55
95
|
##
|
56
96
|
# Enhances the 'layer_geom_type_s' field by translating from known types
|
57
97
|
def translate_geometry_type(key, value)
|
58
|
-
|
98
|
+
return unless key == 'layer_geom_type_s' && geometry_types.include?(value)
|
99
|
+
metadata[key] = geometry_types[value]
|
59
100
|
end
|
60
101
|
|
61
102
|
##
|
62
103
|
# Enhances the 'dc_subject_sm' field by translating subjects to ISO topic
|
63
104
|
# categories
|
64
105
|
def enhance_subjects(key, value)
|
65
|
-
|
106
|
+
return unless key == 'dc_subject_sm'
|
107
|
+
metadata[key] = value.map do |val|
|
66
108
|
if subjects.include?(val)
|
67
109
|
subjects[val]
|
68
110
|
else
|
69
111
|
val
|
70
112
|
end
|
71
|
-
end
|
113
|
+
end
|
72
114
|
end
|
73
115
|
|
74
116
|
##
|
75
117
|
# Formats the 'layer_modified_dt' to a valid valid RFC3339 date/time string
|
76
118
|
# and ISO8601 (for indexing into Solr)
|
77
119
|
def format_proper_date(key, value)
|
78
|
-
|
120
|
+
return unless key == 'layer_modified_dt'
|
121
|
+
metadata[key] = Time.parse(value).utc.iso8601
|
79
122
|
end
|
80
123
|
|
81
124
|
def fields_should_be_array(key, value)
|
82
|
-
|
125
|
+
return unless should_be_array.include?(key) && !value.is_a?(Array)
|
126
|
+
metadata[key] = [value]
|
83
127
|
end
|
84
128
|
|
85
129
|
##
|
86
130
|
# GeoBlacklight-Schema fields that should be type Array
|
87
131
|
def should_be_array
|
88
|
-
[
|
132
|
+
%w[
|
133
|
+
dc_creator_sm
|
134
|
+
dc_subject_sm
|
135
|
+
dct_spatial_sm
|
136
|
+
dct_temporal_sm
|
137
|
+
dct_isPartOf_sm
|
138
|
+
].freeze
|
139
|
+
end
|
140
|
+
|
141
|
+
##
|
142
|
+
# Converts a pre-v1.0 schema into a compliant v1.0 schema
|
143
|
+
def upgrade_to_v1
|
144
|
+
metadata['geoblacklight_version'] = '1.0'
|
145
|
+
|
146
|
+
# ensure required fields
|
147
|
+
metadata['dc_identifier_s'] = metadata['uuid'] if metadata['dc_identifier_s'].blank?
|
148
|
+
|
149
|
+
# normalize to alphanum and - only
|
150
|
+
metadata['layer_slug_s'].gsub!(/[^[[:alnum:]]]+/, '-') if metadata['layer_slug_s'].present?
|
151
|
+
|
152
|
+
# remove deprecated fields
|
153
|
+
metadata.except!(*DEPRECATED_KEYS_V1)
|
154
|
+
|
155
|
+
# ensure we have a proper v1 record
|
156
|
+
valid?
|
89
157
|
end
|
90
158
|
end
|
91
159
|
end
|
@@ -0,0 +1,229 @@
|
|
1
|
+
require 'active_support/core_ext/object/blank'
|
2
|
+
require 'cgi'
|
3
|
+
|
4
|
+
module GeoCombine
|
5
|
+
# Data model for OpenGeoPortal metadata
|
6
|
+
class OGP
|
7
|
+
class InvalidMetadata < RuntimeError; end
|
8
|
+
include GeoCombine::Formatting
|
9
|
+
attr_reader :metadata
|
10
|
+
|
11
|
+
##
|
12
|
+
# Initializes an OGP object for parsing
|
13
|
+
# @param [String] metadata a valid serialized JSON string from OGP instance
|
14
|
+
# @raise [InvalidMetadata]
|
15
|
+
def initialize(metadata)
|
16
|
+
@metadata = JSON.parse(metadata)
|
17
|
+
raise InvalidMetadata unless valid?
|
18
|
+
end
|
19
|
+
|
20
|
+
OGP_REQUIRED_FIELDS = %w[
|
21
|
+
Access
|
22
|
+
Institution
|
23
|
+
LayerDisplayName
|
24
|
+
LayerId
|
25
|
+
MaxX
|
26
|
+
MaxY
|
27
|
+
MinX
|
28
|
+
MinY
|
29
|
+
Name
|
30
|
+
].freeze
|
31
|
+
|
32
|
+
##
|
33
|
+
# Runs validity checks on OGP metadata to ensure fields are present
|
34
|
+
def valid?
|
35
|
+
OGP_REQUIRED_FIELDS.all? { |k| metadata[k].present? }
|
36
|
+
end
|
37
|
+
|
38
|
+
##
|
39
|
+
# Creates and returns a Geoblacklight schema object from this metadata
|
40
|
+
# @return [GeoCombine::Geoblacklight]
|
41
|
+
def to_geoblacklight
|
42
|
+
GeoCombine::Geoblacklight.new(geoblacklight_terms.to_json)
|
43
|
+
end
|
44
|
+
|
45
|
+
##
|
46
|
+
# Builds a Geoblacklight Schema type hash from Esri Open Data portal
|
47
|
+
# metadata
|
48
|
+
# @return [Hash]
|
49
|
+
def geoblacklight_terms
|
50
|
+
{
|
51
|
+
# Required fields
|
52
|
+
dc_identifier_s: identifier,
|
53
|
+
layer_slug_s: slug,
|
54
|
+
dc_title_s: metadata['LayerDisplayName'],
|
55
|
+
solr_geom: envelope,
|
56
|
+
dct_provenance_s: institution,
|
57
|
+
dc_rights_s: metadata['Access'],
|
58
|
+
geoblacklight_version: '1.0',
|
59
|
+
|
60
|
+
# Recommended fields
|
61
|
+
dc_description_s: metadata['Abstract'],
|
62
|
+
layer_geom_type_s: ogp_geom,
|
63
|
+
dct_references_s: references,
|
64
|
+
layer_id_s: "#{metadata['WorkspaceName']}:#{metadata['Name']}",
|
65
|
+
|
66
|
+
# Optional
|
67
|
+
dct_temporal_sm: [metadata['ContentDate']],
|
68
|
+
dc_format_s: ogp_formats,
|
69
|
+
# dct_issued_dt
|
70
|
+
# dc_language_s
|
71
|
+
dct_spatial_sm: placenames,
|
72
|
+
solr_year_i: year,
|
73
|
+
dc_publisher_s: metadata['Publisher'],
|
74
|
+
dc_subject_sm: subjects,
|
75
|
+
dc_type_s: 'Dataset'
|
76
|
+
}.delete_if { |_k, v| v.nil? }
|
77
|
+
end
|
78
|
+
|
79
|
+
def date
|
80
|
+
begin
|
81
|
+
DateTime.rfc3339(metadata['ContentDate'])
|
82
|
+
rescue
|
83
|
+
nil
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
def year
|
88
|
+
date.year unless date.nil?
|
89
|
+
end
|
90
|
+
|
91
|
+
##
|
92
|
+
# Convert "Paper Map" to Raster, assumes all OGP "Paper Maps" have WMS
|
93
|
+
def ogp_geom
|
94
|
+
case metadata['DataType']
|
95
|
+
when 'Paper Map'
|
96
|
+
'Raster'
|
97
|
+
else
|
98
|
+
metadata['DataType']
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
##
|
103
|
+
# OGP doesn't ship format types, so we just try and be clever here.
|
104
|
+
def ogp_formats
|
105
|
+
case metadata['DataType']
|
106
|
+
when 'Paper Map', 'Raster'
|
107
|
+
return 'GeoTIFF'
|
108
|
+
when 'Polygon', 'Point', 'Line'
|
109
|
+
return 'Shapefile'
|
110
|
+
else
|
111
|
+
raise ArgumentError, metadata['DataType']
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
##
|
116
|
+
# Converts references to json
|
117
|
+
# @return [String]
|
118
|
+
def references
|
119
|
+
references_hash.to_json
|
120
|
+
end
|
121
|
+
|
122
|
+
##
|
123
|
+
# Builds a Solr Envelope using CQL syntax
|
124
|
+
# @return [String]
|
125
|
+
def envelope
|
126
|
+
raise ArgumentError unless west >= -180 && west <= 180 &&
|
127
|
+
east >= -180 && east <= 180 &&
|
128
|
+
north >= -90 && north <= 90 &&
|
129
|
+
south >= -90 && south <= 90 &&
|
130
|
+
west <= east && south <= north
|
131
|
+
"ENVELOPE(#{west}, #{east}, #{north}, #{south})"
|
132
|
+
end
|
133
|
+
|
134
|
+
def subjects
|
135
|
+
fgdc.metadata.xpath('//themekey').map(&:text) if fgdc
|
136
|
+
end
|
137
|
+
|
138
|
+
def placenames
|
139
|
+
fgdc.metadata.xpath('//placekey').map(&:text) if fgdc
|
140
|
+
end
|
141
|
+
|
142
|
+
def fgdc
|
143
|
+
GeoCombine::Fgdc.new(metadata['FgdcText']) if metadata['FgdcText']
|
144
|
+
end
|
145
|
+
|
146
|
+
private
|
147
|
+
|
148
|
+
##
|
149
|
+
# Builds references used for dct_references
|
150
|
+
# @return [Hash]
|
151
|
+
def references_hash
|
152
|
+
results = {
|
153
|
+
'http://www.opengis.net/def/serviceType/ogc/wfs' => location['wfs'],
|
154
|
+
'http://www.opengis.net/def/serviceType/ogc/wms' => location['wms'],
|
155
|
+
'http://schema.org/url' => location['url'],
|
156
|
+
download_uri => location['download']
|
157
|
+
}
|
158
|
+
|
159
|
+
# Handle null, "", and [""]
|
160
|
+
results.map { |k, v| { k => ([] << v).flatten.first } if v }
|
161
|
+
.flatten
|
162
|
+
.compact
|
163
|
+
.reduce({}, :merge)
|
164
|
+
end
|
165
|
+
|
166
|
+
def download_uri
|
167
|
+
return 'http://schema.org/DownloadAction' if institution == 'Harvard'
|
168
|
+
'http://schema.org/downloadUrl'
|
169
|
+
end
|
170
|
+
|
171
|
+
##
|
172
|
+
# OGP "Location" field parsed
|
173
|
+
def location
|
174
|
+
JSON.parse(metadata['Location'])
|
175
|
+
end
|
176
|
+
|
177
|
+
def north
|
178
|
+
metadata['MaxY'].to_f
|
179
|
+
end
|
180
|
+
|
181
|
+
def south
|
182
|
+
metadata['MinY'].to_f
|
183
|
+
end
|
184
|
+
|
185
|
+
def east
|
186
|
+
metadata['MaxX'].to_f
|
187
|
+
end
|
188
|
+
|
189
|
+
def west
|
190
|
+
metadata['MinX'].to_f
|
191
|
+
end
|
192
|
+
|
193
|
+
def institution
|
194
|
+
metadata['Institution']
|
195
|
+
end
|
196
|
+
|
197
|
+
def identifier
|
198
|
+
CGI.escape(metadata['LayerId']) # TODO: why are we using CGI.escape?
|
199
|
+
end
|
200
|
+
|
201
|
+
def slug
|
202
|
+
name = metadata['LayerId'] || metadata['Name'] || ''
|
203
|
+
name = [institution, name].join('-') if institution.present? &&
|
204
|
+
!name.downcase.start_with?(institution.downcase)
|
205
|
+
sluggify(filter_name(name))
|
206
|
+
end
|
207
|
+
|
208
|
+
SLUG_BLACKLIST = %w[
|
209
|
+
SDE_DATA.
|
210
|
+
SDE.
|
211
|
+
SDE2.
|
212
|
+
GISPORTAL.GISOWNER01.
|
213
|
+
GISDATA.
|
214
|
+
MORIS.
|
215
|
+
].freeze
|
216
|
+
|
217
|
+
def filter_name(name)
|
218
|
+
# strip out schema and usernames
|
219
|
+
SLUG_BLACKLIST.each do |blacklisted|
|
220
|
+
name.sub!(blacklisted, '')
|
221
|
+
end
|
222
|
+
unless name.size > 1
|
223
|
+
# use first word of title is empty name
|
224
|
+
name = metadata['LayerDisplayName'].split.first
|
225
|
+
end
|
226
|
+
name
|
227
|
+
end
|
228
|
+
end
|
229
|
+
end
|