geo_combine 0.2.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.travis.yml +4 -3
- data/Gemfile +2 -1
- data/README.md +107 -26
- data/geo_combine.gemspec +4 -2
- data/lib/geo_combine.rb +8 -1
- data/lib/geo_combine/bounding_box.rb +71 -0
- data/lib/geo_combine/ckan_metadata.rb +112 -0
- data/lib/geo_combine/exceptions.rb +2 -0
- data/lib/geo_combine/formatting.rb +6 -1
- data/lib/geo_combine/geo_blacklight_harvester.rb +204 -0
- data/lib/geo_combine/geoblacklight.rb +62 -13
- data/lib/geo_combine/ogp.rb +229 -0
- data/lib/geo_combine/railtie.rb +7 -0
- data/lib/geo_combine/version.rb +1 -1
- data/lib/tasks/geo_combine.rake +54 -20
- data/lib/xslt/fgdc2html.xsl +105 -157
- data/lib/xslt/iso2html.xsl +1107 -1070
- data/spec/features/iso2html_spec.rb +7 -1
- data/spec/fixtures/docs/ckan.json +456 -0
- data/spec/fixtures/docs/geoblacklight_pre_v1.json +37 -0
- data/spec/fixtures/docs/ogp_harvard_line.json +28 -0
- data/spec/fixtures/docs/ogp_harvard_raster.json +28 -0
- data/spec/fixtures/docs/ogp_tufts_vector.json +31 -0
- data/spec/fixtures/json_docs.rb +20 -0
- data/spec/lib/geo_combine/bounding_box_spec.rb +59 -0
- data/spec/lib/geo_combine/ckan_metadata_spec.rb +114 -0
- data/spec/lib/geo_combine/formatting_spec.rb +6 -0
- data/spec/lib/geo_combine/geo_blacklight_harvester_spec.rb +190 -0
- data/spec/lib/geo_combine/geoblacklight_spec.rb +38 -7
- data/spec/lib/geo_combine/ogp_spec.rb +163 -0
- data/spec/spec_helper.rb +1 -0
- metadata +65 -15
@@ -15,7 +15,7 @@ module GeoCombine
|
|
15
15
|
# @param [String] text
|
16
16
|
# @return [String]
|
17
17
|
def remove_lines(text)
|
18
|
-
text.
|
18
|
+
text.delete("\n")
|
19
19
|
end
|
20
20
|
|
21
21
|
##
|
@@ -25,5 +25,10 @@ module GeoCombine
|
|
25
25
|
def sanitize_and_remove_lines(text)
|
26
26
|
remove_lines(sanitize(text))
|
27
27
|
end
|
28
|
+
|
29
|
+
# slugs should be lowercase and only have a-z, A-Z, 0-9, and -
|
30
|
+
def sluggify(slug)
|
31
|
+
slug.gsub(/[^a-zA-Z0-9\-]/, '-').gsub(/[\-]+/, '-').downcase
|
32
|
+
end
|
28
33
|
end
|
29
34
|
end
|
@@ -0,0 +1,204 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module GeoCombine
|
4
|
+
##
|
5
|
+
# A class to harvest and index results from GeoBlacklight sites
|
6
|
+
# You can configure the sites to be harvested via a configure command.
|
7
|
+
# GeoCombine::GeoBlacklightHarvester.configure do
|
8
|
+
# {
|
9
|
+
# SITE: { host: 'https://example.com', params: { f: { dct_provenance_s: ['SITE'] } } }
|
10
|
+
# }
|
11
|
+
# end
|
12
|
+
# The class configuration also allows for various other things to be configured:
|
13
|
+
# - A debug parameter to print out details of what is being harvested and indexed
|
14
|
+
# - crawl delays for each page of results (globally or on a per site basis)
|
15
|
+
# - Solr's commitWithin parameter (defaults to 5000)
|
16
|
+
# - A document transformer proc to modify a document before indexing (defaults to removing _version_, score, and timestamp)
|
17
|
+
# Example: GeoCombine::GeoBlacklightHarvester.new('SITE').index
|
18
|
+
class GeoBlacklightHarvester
|
19
|
+
require 'active_support/core_ext/object/to_query'
|
20
|
+
|
21
|
+
class << self
|
22
|
+
attr_writer :document_transformer
|
23
|
+
|
24
|
+
def configure(&block)
|
25
|
+
@config = yield block
|
26
|
+
end
|
27
|
+
|
28
|
+
def config
|
29
|
+
@config || {}
|
30
|
+
end
|
31
|
+
|
32
|
+
def document_transformer
|
33
|
+
@document_transformer || ->(document) do
|
34
|
+
document.delete('_version_')
|
35
|
+
document.delete('score')
|
36
|
+
document.delete('timestamp')
|
37
|
+
document
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
|
43
|
+
attr_reader :site, :site_key
|
44
|
+
def initialize(site_key)
|
45
|
+
@site_key = site_key
|
46
|
+
@site = self.class.config[site_key]
|
47
|
+
|
48
|
+
raise ArgumentError, "Site key #{@site_key.inspect} is not configured for #{self.class.name}" unless @site
|
49
|
+
end
|
50
|
+
|
51
|
+
def index
|
52
|
+
puts "Fetching page 1 @ #{base_url}&page=1" if self.class.config[:debug]
|
53
|
+
response = JSON.parse(Net::HTTP.get(URI("#{base_url}&page=1")))
|
54
|
+
response_class = BlacklightResponseVersionFactory.call(response)
|
55
|
+
|
56
|
+
response_class.new(response: response, base_url: base_url).documents.each do |docs|
|
57
|
+
docs.map! do |document|
|
58
|
+
self.class.document_transformer.call(document) if self.class.document_transformer
|
59
|
+
end.compact
|
60
|
+
|
61
|
+
puts "Adding #{docs.count} documents to solr" if self.class.config[:debug]
|
62
|
+
solr_connection.update params: { commitWithin: commit_within, overwrite: true },
|
63
|
+
data: docs.to_json,
|
64
|
+
headers: { 'Content-Type' => 'application/json' }
|
65
|
+
|
66
|
+
sleep(crawl_delay.to_i) if crawl_delay
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
##
|
71
|
+
# A "factory" class to determine the blacklight response version to use
|
72
|
+
class BlacklightResponseVersionFactory
|
73
|
+
def self.call(json)
|
74
|
+
keys = json.keys
|
75
|
+
if keys.include?('response')
|
76
|
+
LegacyBlacklightResponse
|
77
|
+
elsif keys.any? && %w[links data].all? { |param| keys.include?(param) }
|
78
|
+
ModernBlacklightResponse
|
79
|
+
else
|
80
|
+
raise NotImplementedError, "The following json response was not able to be parsed by the GeoBlacklightHarvester\n#{json}"
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
class LegacyBlacklightResponse
|
86
|
+
attr_reader :base_url
|
87
|
+
attr_accessor :response, :page
|
88
|
+
def initialize(response:, base_url:)
|
89
|
+
@base_url = base_url
|
90
|
+
@response = response
|
91
|
+
@page = 1
|
92
|
+
end
|
93
|
+
|
94
|
+
def documents
|
95
|
+
return enum_for(:documents) unless block_given?
|
96
|
+
|
97
|
+
while current_page && total_pages && (current_page <= total_pages) do
|
98
|
+
yield response.dig('response', 'docs')
|
99
|
+
|
100
|
+
break if current_page == total_pages
|
101
|
+
self.page += 1
|
102
|
+
puts "Fetching page #{page} @ #{url}" if GeoCombine::GeoBlacklightHarvester.config[:debug]
|
103
|
+
|
104
|
+
begin
|
105
|
+
self.response = JSON.parse(Net::HTTP.get(URI(url)))
|
106
|
+
rescue => e
|
107
|
+
puts "Request for #{url} failed with #{e}"
|
108
|
+
self.response = nil
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
private
|
114
|
+
|
115
|
+
def url
|
116
|
+
"#{base_url}&page=#{page}"
|
117
|
+
end
|
118
|
+
|
119
|
+
def current_page
|
120
|
+
response.dig('response', 'pages', 'current_page')
|
121
|
+
end
|
122
|
+
|
123
|
+
def total_pages
|
124
|
+
response.dig('response', 'pages', 'total_pages')
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
##
|
129
|
+
# Class to return documents from the Blacklight API (v7 and above)
|
130
|
+
class ModernBlacklightResponse
|
131
|
+
attr_reader :base_url
|
132
|
+
attr_accessor :response, :page
|
133
|
+
def initialize(response:, base_url:)
|
134
|
+
@base_url = base_url
|
135
|
+
@response = response
|
136
|
+
@page = 1
|
137
|
+
end
|
138
|
+
|
139
|
+
def documents
|
140
|
+
return enum_for(:documents) unless block_given?
|
141
|
+
|
142
|
+
while response && response['data'].any?
|
143
|
+
document_urls = response['data'].collect { |data| data.dig('links', 'self') }.compact
|
144
|
+
|
145
|
+
yield documents_from_urls(document_urls)
|
146
|
+
|
147
|
+
url = response.dig('links', 'next')
|
148
|
+
break unless url
|
149
|
+
url = "#{url}&format=json"
|
150
|
+
self.page += 1
|
151
|
+
puts "Fetching page #{page} @ #{url}" if GeoCombine::GeoBlacklightHarvester.config[:debug]
|
152
|
+
begin
|
153
|
+
self.response = JSON.parse(Net::HTTP.get(URI(url)))
|
154
|
+
rescue => e
|
155
|
+
puts "Request for #{url} failed with #{e}"
|
156
|
+
self.response = nil
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
private
|
162
|
+
|
163
|
+
def documents_from_urls(urls)
|
164
|
+
puts "Fetching #{urls.count} documents for page #{page}" if GeoCombine::GeoBlacklightHarvester.config[:debug]
|
165
|
+
urls.map do |url|
|
166
|
+
begin
|
167
|
+
JSON.parse(Net::HTTP.get(URI("#{url}/raw")))
|
168
|
+
rescue => e
|
169
|
+
puts "Fetching \"#{url}/raw\" failed with #{e}"
|
170
|
+
|
171
|
+
nil
|
172
|
+
end
|
173
|
+
end.compact
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
private
|
178
|
+
|
179
|
+
def base_url
|
180
|
+
"#{site[:host]}?#{default_params.to_query}"
|
181
|
+
end
|
182
|
+
|
183
|
+
def solr_connection
|
184
|
+
solr_url = ENV['SOLR_URL'] || 'http://127.0.0.1:8983/solr/blacklight-core'
|
185
|
+
|
186
|
+
RSolr.connect url: solr_url, adapter: :net_http_persistent
|
187
|
+
end
|
188
|
+
|
189
|
+
def commit_within
|
190
|
+
self.class.config[:commit_within] || '5000'
|
191
|
+
end
|
192
|
+
|
193
|
+
def crawl_delay
|
194
|
+
site[:crawl_delay] || self.class.config[:crawl_delay]
|
195
|
+
end
|
196
|
+
|
197
|
+
def default_params
|
198
|
+
{
|
199
|
+
per_page: 100,
|
200
|
+
format: :json
|
201
|
+
}.merge(site[:params])
|
202
|
+
end
|
203
|
+
end
|
204
|
+
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'active_support/core_ext/object/blank'
|
2
|
+
require 'active_support/core_ext/hash/except'
|
1
3
|
require 'open-uri'
|
2
4
|
|
3
5
|
module GeoCombine
|
@@ -9,6 +11,16 @@ module GeoCombine
|
|
9
11
|
attr_reader :metadata
|
10
12
|
|
11
13
|
GEOBLACKLIGHT_VERSION = 'v1.1.0'
|
14
|
+
SCHEMA_JSON_URL = "https://raw.githubusercontent.com/geoblacklight/geoblacklight/#{GEOBLACKLIGHT_VERSION}/schema/geoblacklight-schema.json".freeze
|
15
|
+
DEPRECATED_KEYS_V1 = %w[
|
16
|
+
uuid
|
17
|
+
georss_polygon_s
|
18
|
+
georss_point_s
|
19
|
+
georss_box_s
|
20
|
+
dc_relation_sm
|
21
|
+
solr_issued_i
|
22
|
+
solr_bbox
|
23
|
+
].freeze
|
12
24
|
|
13
25
|
##
|
14
26
|
# Initializes a GeoBlacklight object
|
@@ -24,7 +36,9 @@ module GeoCombine
|
|
24
36
|
# Calls metadata enhancement methods for each key, value pair in the
|
25
37
|
# metadata hash
|
26
38
|
def enhance_metadata
|
27
|
-
|
39
|
+
upgrade_to_v1 if metadata['geoblacklight_version'].blank?
|
40
|
+
|
41
|
+
metadata.each do |key, value|
|
28
42
|
translate_formats(key, value)
|
29
43
|
enhance_subjects(key, value)
|
30
44
|
format_proper_date(key, value)
|
@@ -36,23 +50,25 @@ module GeoCombine
|
|
36
50
|
##
|
37
51
|
# Returns a string of JSON from a GeoBlacklight hash
|
38
52
|
# @return (String)
|
39
|
-
def to_json
|
40
|
-
|
53
|
+
def to_json(options = {})
|
54
|
+
metadata.to_json(options)
|
41
55
|
end
|
42
56
|
|
43
57
|
##
|
44
58
|
# Validates a GeoBlacklight-Schema json document
|
45
59
|
# @return [Boolean]
|
46
60
|
def valid?
|
47
|
-
@schema ||= JSON.parse(open(
|
48
|
-
JSON::Validator.validate!(@schema, to_json, fragment: '#/properties/layer') &&
|
61
|
+
@schema ||= JSON.parse(open(SCHEMA_JSON_URL).read)
|
62
|
+
JSON::Validator.validate!(@schema, to_json, fragment: '#/properties/layer') &&
|
63
|
+
dct_references_validate! &&
|
64
|
+
spatial_validate!
|
49
65
|
end
|
50
66
|
|
51
67
|
##
|
52
68
|
# Validate dct_references_s
|
53
69
|
# @return [Boolean]
|
54
70
|
def dct_references_validate!
|
55
|
-
return true unless metadata.key?('dct_references_s')
|
71
|
+
return true unless metadata.key?('dct_references_s') # TODO: shouldn't we require this field?
|
56
72
|
begin
|
57
73
|
ref = JSON.parse(metadata['dct_references_s'])
|
58
74
|
raise GeoCombine::Exceptions::InvalidDCTReferences, 'dct_references must be parsed to a Hash' unless ref.is_a?(Hash)
|
@@ -62,49 +78,82 @@ module GeoCombine
|
|
62
78
|
end
|
63
79
|
end
|
64
80
|
|
81
|
+
def spatial_validate!
|
82
|
+
GeoCombine::BoundingBox.from_envelope(metadata['solr_geom']).valid?
|
83
|
+
end
|
84
|
+
|
65
85
|
private
|
66
86
|
|
67
87
|
##
|
68
88
|
# Enhances the 'dc_format_s' field by translating a format type to a valid
|
69
89
|
# GeoBlacklight-Schema format
|
70
90
|
def translate_formats(key, value)
|
71
|
-
|
91
|
+
return unless key == 'dc_format_s' && formats.include?(value)
|
92
|
+
metadata[key] = formats[value]
|
72
93
|
end
|
73
94
|
|
74
95
|
##
|
75
96
|
# Enhances the 'layer_geom_type_s' field by translating from known types
|
76
97
|
def translate_geometry_type(key, value)
|
77
|
-
|
98
|
+
return unless key == 'layer_geom_type_s' && geometry_types.include?(value)
|
99
|
+
metadata[key] = geometry_types[value]
|
78
100
|
end
|
79
101
|
|
80
102
|
##
|
81
103
|
# Enhances the 'dc_subject_sm' field by translating subjects to ISO topic
|
82
104
|
# categories
|
83
105
|
def enhance_subjects(key, value)
|
84
|
-
|
106
|
+
return unless key == 'dc_subject_sm'
|
107
|
+
metadata[key] = value.map do |val|
|
85
108
|
if subjects.include?(val)
|
86
109
|
subjects[val]
|
87
110
|
else
|
88
111
|
val
|
89
112
|
end
|
90
|
-
end
|
113
|
+
end
|
91
114
|
end
|
92
115
|
|
93
116
|
##
|
94
117
|
# Formats the 'layer_modified_dt' to a valid valid RFC3339 date/time string
|
95
118
|
# and ISO8601 (for indexing into Solr)
|
96
119
|
def format_proper_date(key, value)
|
97
|
-
|
120
|
+
return unless key == 'layer_modified_dt'
|
121
|
+
metadata[key] = Time.parse(value).utc.iso8601
|
98
122
|
end
|
99
123
|
|
100
124
|
def fields_should_be_array(key, value)
|
101
|
-
|
125
|
+
return unless should_be_array.include?(key) && !value.is_a?(Array)
|
126
|
+
metadata[key] = [value]
|
102
127
|
end
|
103
128
|
|
104
129
|
##
|
105
130
|
# GeoBlacklight-Schema fields that should be type Array
|
106
131
|
def should_be_array
|
107
|
-
[
|
132
|
+
%w[
|
133
|
+
dc_creator_sm
|
134
|
+
dc_subject_sm
|
135
|
+
dct_spatial_sm
|
136
|
+
dct_temporal_sm
|
137
|
+
dct_isPartOf_sm
|
138
|
+
].freeze
|
139
|
+
end
|
140
|
+
|
141
|
+
##
|
142
|
+
# Converts a pre-v1.0 schema into a compliant v1.0 schema
|
143
|
+
def upgrade_to_v1
|
144
|
+
metadata['geoblacklight_version'] = '1.0'
|
145
|
+
|
146
|
+
# ensure required fields
|
147
|
+
metadata['dc_identifier_s'] = metadata['uuid'] if metadata['dc_identifier_s'].blank?
|
148
|
+
|
149
|
+
# normalize to alphanum and - only
|
150
|
+
metadata['layer_slug_s'].gsub!(/[^[[:alnum:]]]+/, '-') if metadata['layer_slug_s'].present?
|
151
|
+
|
152
|
+
# remove deprecated fields
|
153
|
+
metadata.except!(*DEPRECATED_KEYS_V1)
|
154
|
+
|
155
|
+
# ensure we have a proper v1 record
|
156
|
+
valid?
|
108
157
|
end
|
109
158
|
end
|
110
159
|
end
|
@@ -0,0 +1,229 @@
|
|
1
|
+
require 'active_support/core_ext/object/blank'
|
2
|
+
require 'cgi'
|
3
|
+
|
4
|
+
module GeoCombine
|
5
|
+
# Data model for OpenGeoPortal metadata
|
6
|
+
class OGP
|
7
|
+
class InvalidMetadata < RuntimeError; end
|
8
|
+
include GeoCombine::Formatting
|
9
|
+
attr_reader :metadata
|
10
|
+
|
11
|
+
##
|
12
|
+
# Initializes an OGP object for parsing
|
13
|
+
# @param [String] metadata a valid serialized JSON string from OGP instance
|
14
|
+
# @raise [InvalidMetadata]
|
15
|
+
def initialize(metadata)
|
16
|
+
@metadata = JSON.parse(metadata)
|
17
|
+
raise InvalidMetadata unless valid?
|
18
|
+
end
|
19
|
+
|
20
|
+
OGP_REQUIRED_FIELDS = %w[
|
21
|
+
Access
|
22
|
+
Institution
|
23
|
+
LayerDisplayName
|
24
|
+
LayerId
|
25
|
+
MaxX
|
26
|
+
MaxY
|
27
|
+
MinX
|
28
|
+
MinY
|
29
|
+
Name
|
30
|
+
].freeze
|
31
|
+
|
32
|
+
##
|
33
|
+
# Runs validity checks on OGP metadata to ensure fields are present
|
34
|
+
def valid?
|
35
|
+
OGP_REQUIRED_FIELDS.all? { |k| metadata[k].present? }
|
36
|
+
end
|
37
|
+
|
38
|
+
##
|
39
|
+
# Creates and returns a Geoblacklight schema object from this metadata
|
40
|
+
# @return [GeoCombine::Geoblacklight]
|
41
|
+
def to_geoblacklight
|
42
|
+
GeoCombine::Geoblacklight.new(geoblacklight_terms.to_json)
|
43
|
+
end
|
44
|
+
|
45
|
+
##
|
46
|
+
# Builds a Geoblacklight Schema type hash from Esri Open Data portal
|
47
|
+
# metadata
|
48
|
+
# @return [Hash]
|
49
|
+
def geoblacklight_terms
|
50
|
+
{
|
51
|
+
# Required fields
|
52
|
+
dc_identifier_s: identifier,
|
53
|
+
layer_slug_s: slug,
|
54
|
+
dc_title_s: metadata['LayerDisplayName'],
|
55
|
+
solr_geom: envelope,
|
56
|
+
dct_provenance_s: institution,
|
57
|
+
dc_rights_s: metadata['Access'],
|
58
|
+
geoblacklight_version: '1.0',
|
59
|
+
|
60
|
+
# Recommended fields
|
61
|
+
dc_description_s: metadata['Abstract'],
|
62
|
+
layer_geom_type_s: ogp_geom,
|
63
|
+
dct_references_s: references,
|
64
|
+
layer_id_s: "#{metadata['WorkspaceName']}:#{metadata['Name']}",
|
65
|
+
|
66
|
+
# Optional
|
67
|
+
dct_temporal_sm: [metadata['ContentDate']],
|
68
|
+
dc_format_s: ogp_formats,
|
69
|
+
# dct_issued_dt
|
70
|
+
# dc_language_s
|
71
|
+
dct_spatial_sm: placenames,
|
72
|
+
solr_year_i: year,
|
73
|
+
dc_publisher_s: metadata['Publisher'],
|
74
|
+
dc_subject_sm: subjects,
|
75
|
+
dc_type_s: 'Dataset'
|
76
|
+
}.delete_if { |_k, v| v.nil? }
|
77
|
+
end
|
78
|
+
|
79
|
+
def date
|
80
|
+
begin
|
81
|
+
DateTime.rfc3339(metadata['ContentDate'])
|
82
|
+
rescue
|
83
|
+
nil
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
def year
|
88
|
+
date.year unless date.nil?
|
89
|
+
end
|
90
|
+
|
91
|
+
##
|
92
|
+
# Convert "Paper Map" to Raster, assumes all OGP "Paper Maps" have WMS
|
93
|
+
def ogp_geom
|
94
|
+
case metadata['DataType']
|
95
|
+
when 'Paper Map'
|
96
|
+
'Raster'
|
97
|
+
else
|
98
|
+
metadata['DataType']
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
##
|
103
|
+
# OGP doesn't ship format types, so we just try and be clever here.
|
104
|
+
def ogp_formats
|
105
|
+
case metadata['DataType']
|
106
|
+
when 'Paper Map', 'Raster'
|
107
|
+
return 'GeoTIFF'
|
108
|
+
when 'Polygon', 'Point', 'Line'
|
109
|
+
return 'Shapefile'
|
110
|
+
else
|
111
|
+
raise ArgumentError, metadata['DataType']
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
##
|
116
|
+
# Converts references to json
|
117
|
+
# @return [String]
|
118
|
+
def references
|
119
|
+
references_hash.to_json
|
120
|
+
end
|
121
|
+
|
122
|
+
##
|
123
|
+
# Builds a Solr Envelope using CQL syntax
|
124
|
+
# @return [String]
|
125
|
+
def envelope
|
126
|
+
raise ArgumentError unless west >= -180 && west <= 180 &&
|
127
|
+
east >= -180 && east <= 180 &&
|
128
|
+
north >= -90 && north <= 90 &&
|
129
|
+
south >= -90 && south <= 90 &&
|
130
|
+
west <= east && south <= north
|
131
|
+
"ENVELOPE(#{west}, #{east}, #{north}, #{south})"
|
132
|
+
end
|
133
|
+
|
134
|
+
def subjects
|
135
|
+
fgdc.metadata.xpath('//themekey').map(&:text) if fgdc
|
136
|
+
end
|
137
|
+
|
138
|
+
def placenames
|
139
|
+
fgdc.metadata.xpath('//placekey').map(&:text) if fgdc
|
140
|
+
end
|
141
|
+
|
142
|
+
def fgdc
|
143
|
+
GeoCombine::Fgdc.new(metadata['FgdcText']) if metadata['FgdcText']
|
144
|
+
end
|
145
|
+
|
146
|
+
private
|
147
|
+
|
148
|
+
##
|
149
|
+
# Builds references used for dct_references
|
150
|
+
# @return [Hash]
|
151
|
+
def references_hash
|
152
|
+
results = {
|
153
|
+
'http://www.opengis.net/def/serviceType/ogc/wfs' => location['wfs'],
|
154
|
+
'http://www.opengis.net/def/serviceType/ogc/wms' => location['wms'],
|
155
|
+
'http://schema.org/url' => location['url'],
|
156
|
+
download_uri => location['download']
|
157
|
+
}
|
158
|
+
|
159
|
+
# Handle null, "", and [""]
|
160
|
+
results.map { |k, v| { k => ([] << v).flatten.first } if v }
|
161
|
+
.flatten
|
162
|
+
.compact
|
163
|
+
.reduce({}, :merge)
|
164
|
+
end
|
165
|
+
|
166
|
+
def download_uri
|
167
|
+
return 'http://schema.org/DownloadAction' if institution == 'Harvard'
|
168
|
+
'http://schema.org/downloadUrl'
|
169
|
+
end
|
170
|
+
|
171
|
+
##
|
172
|
+
# OGP "Location" field parsed
|
173
|
+
def location
|
174
|
+
JSON.parse(metadata['Location'])
|
175
|
+
end
|
176
|
+
|
177
|
+
def north
|
178
|
+
metadata['MaxY'].to_f
|
179
|
+
end
|
180
|
+
|
181
|
+
def south
|
182
|
+
metadata['MinY'].to_f
|
183
|
+
end
|
184
|
+
|
185
|
+
def east
|
186
|
+
metadata['MaxX'].to_f
|
187
|
+
end
|
188
|
+
|
189
|
+
def west
|
190
|
+
metadata['MinX'].to_f
|
191
|
+
end
|
192
|
+
|
193
|
+
def institution
|
194
|
+
metadata['Institution']
|
195
|
+
end
|
196
|
+
|
197
|
+
def identifier
|
198
|
+
CGI.escape(metadata['LayerId']) # TODO: why are we using CGI.escape?
|
199
|
+
end
|
200
|
+
|
201
|
+
def slug
|
202
|
+
name = metadata['LayerId'] || metadata['Name'] || ''
|
203
|
+
name = [institution, name].join('-') if institution.present? &&
|
204
|
+
!name.downcase.start_with?(institution.downcase)
|
205
|
+
sluggify(filter_name(name))
|
206
|
+
end
|
207
|
+
|
208
|
+
SLUG_BLACKLIST = %w[
|
209
|
+
SDE_DATA.
|
210
|
+
SDE.
|
211
|
+
SDE2.
|
212
|
+
GISPORTAL.GISOWNER01.
|
213
|
+
GISDATA.
|
214
|
+
MORIS.
|
215
|
+
].freeze
|
216
|
+
|
217
|
+
def filter_name(name)
|
218
|
+
# strip out schema and usernames
|
219
|
+
SLUG_BLACKLIST.each do |blacklisted|
|
220
|
+
name.sub!(blacklisted, '')
|
221
|
+
end
|
222
|
+
unless name.size > 1
|
223
|
+
# use first word of title is empty name
|
224
|
+
name = metadata['LayerDisplayName'].split.first
|
225
|
+
end
|
226
|
+
name
|
227
|
+
end
|
228
|
+
end
|
229
|
+
end
|