geo_combine 0.2.0 → 0.5.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.travis.yml +4 -3
- data/Gemfile +2 -1
- data/README.md +107 -26
- data/geo_combine.gemspec +4 -2
- data/lib/geo_combine.rb +8 -1
- data/lib/geo_combine/bounding_box.rb +71 -0
- data/lib/geo_combine/ckan_metadata.rb +112 -0
- data/lib/geo_combine/exceptions.rb +2 -0
- data/lib/geo_combine/formatting.rb +6 -1
- data/lib/geo_combine/geo_blacklight_harvester.rb +204 -0
- data/lib/geo_combine/geoblacklight.rb +62 -13
- data/lib/geo_combine/ogp.rb +229 -0
- data/lib/geo_combine/railtie.rb +7 -0
- data/lib/geo_combine/version.rb +1 -1
- data/lib/tasks/geo_combine.rake +54 -20
- data/lib/xslt/fgdc2html.xsl +105 -157
- data/lib/xslt/iso2html.xsl +1107 -1070
- data/spec/features/iso2html_spec.rb +7 -1
- data/spec/fixtures/docs/ckan.json +456 -0
- data/spec/fixtures/docs/geoblacklight_pre_v1.json +37 -0
- data/spec/fixtures/docs/ogp_harvard_line.json +28 -0
- data/spec/fixtures/docs/ogp_harvard_raster.json +28 -0
- data/spec/fixtures/docs/ogp_tufts_vector.json +31 -0
- data/spec/fixtures/json_docs.rb +20 -0
- data/spec/lib/geo_combine/bounding_box_spec.rb +59 -0
- data/spec/lib/geo_combine/ckan_metadata_spec.rb +114 -0
- data/spec/lib/geo_combine/formatting_spec.rb +6 -0
- data/spec/lib/geo_combine/geo_blacklight_harvester_spec.rb +190 -0
- data/spec/lib/geo_combine/geoblacklight_spec.rb +38 -7
- data/spec/lib/geo_combine/ogp_spec.rb +163 -0
- data/spec/spec_helper.rb +1 -0
- metadata +65 -15
@@ -15,7 +15,7 @@ module GeoCombine
|
|
15
15
|
# @param [String] text
|
16
16
|
# @return [String]
|
17
17
|
def remove_lines(text)
|
18
|
-
text.
|
18
|
+
text.delete("\n")
|
19
19
|
end
|
20
20
|
|
21
21
|
##
|
@@ -25,5 +25,10 @@ module GeoCombine
|
|
25
25
|
def sanitize_and_remove_lines(text)
|
26
26
|
remove_lines(sanitize(text))
|
27
27
|
end
|
28
|
+
|
29
|
+
# slugs should be lowercase and only have a-z, A-Z, 0-9, and -
|
30
|
+
def sluggify(slug)
|
31
|
+
slug.gsub(/[^a-zA-Z0-9\-]/, '-').gsub(/[\-]+/, '-').downcase
|
32
|
+
end
|
28
33
|
end
|
29
34
|
end
|
@@ -0,0 +1,204 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module GeoCombine
|
4
|
+
##
|
5
|
+
# A class to harvest and index results from GeoBlacklight sites
|
6
|
+
# You can configure the sites to be harvested via a configure command.
|
7
|
+
# GeoCombine::GeoBlacklightHarvester.configure do
|
8
|
+
# {
|
9
|
+
# SITE: { host: 'https://example.com', params: { f: { dct_provenance_s: ['SITE'] } } }
|
10
|
+
# }
|
11
|
+
# end
|
12
|
+
# The class configuration also allows for various other things to be configured:
|
13
|
+
# - A debug parameter to print out details of what is being harvested and indexed
|
14
|
+
# - crawl delays for each page of results (globally or on a per site basis)
|
15
|
+
# - Solr's commitWithin parameter (defaults to 5000)
|
16
|
+
# - A document transformer proc to modify a document before indexing (defaults to removing _version_, score, and timestamp)
|
17
|
+
# Example: GeoCombine::GeoBlacklightHarvester.new('SITE').index
|
18
|
+
class GeoBlacklightHarvester
|
19
|
+
require 'active_support/core_ext/object/to_query'
|
20
|
+
|
21
|
+
class << self
|
22
|
+
attr_writer :document_transformer
|
23
|
+
|
24
|
+
def configure(&block)
|
25
|
+
@config = yield block
|
26
|
+
end
|
27
|
+
|
28
|
+
def config
|
29
|
+
@config || {}
|
30
|
+
end
|
31
|
+
|
32
|
+
def document_transformer
|
33
|
+
@document_transformer || ->(document) do
|
34
|
+
document.delete('_version_')
|
35
|
+
document.delete('score')
|
36
|
+
document.delete('timestamp')
|
37
|
+
document
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
|
43
|
+
attr_reader :site, :site_key
|
44
|
+
def initialize(site_key)
|
45
|
+
@site_key = site_key
|
46
|
+
@site = self.class.config[site_key]
|
47
|
+
|
48
|
+
raise ArgumentError, "Site key #{@site_key.inspect} is not configured for #{self.class.name}" unless @site
|
49
|
+
end
|
50
|
+
|
51
|
+
def index
|
52
|
+
puts "Fetching page 1 @ #{base_url}&page=1" if self.class.config[:debug]
|
53
|
+
response = JSON.parse(Net::HTTP.get(URI("#{base_url}&page=1")))
|
54
|
+
response_class = BlacklightResponseVersionFactory.call(response)
|
55
|
+
|
56
|
+
response_class.new(response: response, base_url: base_url).documents.each do |docs|
|
57
|
+
docs.map! do |document|
|
58
|
+
self.class.document_transformer.call(document) if self.class.document_transformer
|
59
|
+
end.compact
|
60
|
+
|
61
|
+
puts "Adding #{docs.count} documents to solr" if self.class.config[:debug]
|
62
|
+
solr_connection.update params: { commitWithin: commit_within, overwrite: true },
|
63
|
+
data: docs.to_json,
|
64
|
+
headers: { 'Content-Type' => 'application/json' }
|
65
|
+
|
66
|
+
sleep(crawl_delay.to_i) if crawl_delay
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
##
|
71
|
+
# A "factory" class to determine the blacklight response version to use
|
72
|
+
class BlacklightResponseVersionFactory
|
73
|
+
def self.call(json)
|
74
|
+
keys = json.keys
|
75
|
+
if keys.include?('response')
|
76
|
+
LegacyBlacklightResponse
|
77
|
+
elsif keys.any? && %w[links data].all? { |param| keys.include?(param) }
|
78
|
+
ModernBlacklightResponse
|
79
|
+
else
|
80
|
+
raise NotImplementedError, "The following json response was not able to be parsed by the GeoBlacklightHarvester\n#{json}"
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
class LegacyBlacklightResponse
|
86
|
+
attr_reader :base_url
|
87
|
+
attr_accessor :response, :page
|
88
|
+
def initialize(response:, base_url:)
|
89
|
+
@base_url = base_url
|
90
|
+
@response = response
|
91
|
+
@page = 1
|
92
|
+
end
|
93
|
+
|
94
|
+
def documents
|
95
|
+
return enum_for(:documents) unless block_given?
|
96
|
+
|
97
|
+
while current_page && total_pages && (current_page <= total_pages) do
|
98
|
+
yield response.dig('response', 'docs')
|
99
|
+
|
100
|
+
break if current_page == total_pages
|
101
|
+
self.page += 1
|
102
|
+
puts "Fetching page #{page} @ #{url}" if GeoCombine::GeoBlacklightHarvester.config[:debug]
|
103
|
+
|
104
|
+
begin
|
105
|
+
self.response = JSON.parse(Net::HTTP.get(URI(url)))
|
106
|
+
rescue => e
|
107
|
+
puts "Request for #{url} failed with #{e}"
|
108
|
+
self.response = nil
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
private
|
114
|
+
|
115
|
+
def url
|
116
|
+
"#{base_url}&page=#{page}"
|
117
|
+
end
|
118
|
+
|
119
|
+
def current_page
|
120
|
+
response.dig('response', 'pages', 'current_page')
|
121
|
+
end
|
122
|
+
|
123
|
+
def total_pages
|
124
|
+
response.dig('response', 'pages', 'total_pages')
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
##
|
129
|
+
# Class to return documents from the Blacklight API (v7 and above)
|
130
|
+
class ModernBlacklightResponse
|
131
|
+
attr_reader :base_url
|
132
|
+
attr_accessor :response, :page
|
133
|
+
def initialize(response:, base_url:)
|
134
|
+
@base_url = base_url
|
135
|
+
@response = response
|
136
|
+
@page = 1
|
137
|
+
end
|
138
|
+
|
139
|
+
def documents
|
140
|
+
return enum_for(:documents) unless block_given?
|
141
|
+
|
142
|
+
while response && response['data'].any?
|
143
|
+
document_urls = response['data'].collect { |data| data.dig('links', 'self') }.compact
|
144
|
+
|
145
|
+
yield documents_from_urls(document_urls)
|
146
|
+
|
147
|
+
url = response.dig('links', 'next')
|
148
|
+
break unless url
|
149
|
+
url = "#{url}&format=json"
|
150
|
+
self.page += 1
|
151
|
+
puts "Fetching page #{page} @ #{url}" if GeoCombine::GeoBlacklightHarvester.config[:debug]
|
152
|
+
begin
|
153
|
+
self.response = JSON.parse(Net::HTTP.get(URI(url)))
|
154
|
+
rescue => e
|
155
|
+
puts "Request for #{url} failed with #{e}"
|
156
|
+
self.response = nil
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
private
|
162
|
+
|
163
|
+
def documents_from_urls(urls)
|
164
|
+
puts "Fetching #{urls.count} documents for page #{page}" if GeoCombine::GeoBlacklightHarvester.config[:debug]
|
165
|
+
urls.map do |url|
|
166
|
+
begin
|
167
|
+
JSON.parse(Net::HTTP.get(URI("#{url}/raw")))
|
168
|
+
rescue => e
|
169
|
+
puts "Fetching \"#{url}/raw\" failed with #{e}"
|
170
|
+
|
171
|
+
nil
|
172
|
+
end
|
173
|
+
end.compact
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
private
|
178
|
+
|
179
|
+
def base_url
|
180
|
+
"#{site[:host]}?#{default_params.to_query}"
|
181
|
+
end
|
182
|
+
|
183
|
+
def solr_connection
|
184
|
+
solr_url = ENV['SOLR_URL'] || 'http://127.0.0.1:8983/solr/blacklight-core'
|
185
|
+
|
186
|
+
RSolr.connect url: solr_url, adapter: :net_http_persistent
|
187
|
+
end
|
188
|
+
|
189
|
+
def commit_within
|
190
|
+
self.class.config[:commit_within] || '5000'
|
191
|
+
end
|
192
|
+
|
193
|
+
def crawl_delay
|
194
|
+
site[:crawl_delay] || self.class.config[:crawl_delay]
|
195
|
+
end
|
196
|
+
|
197
|
+
def default_params
|
198
|
+
{
|
199
|
+
per_page: 100,
|
200
|
+
format: :json
|
201
|
+
}.merge(site[:params])
|
202
|
+
end
|
203
|
+
end
|
204
|
+
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'active_support/core_ext/object/blank'
|
2
|
+
require 'active_support/core_ext/hash/except'
|
1
3
|
require 'open-uri'
|
2
4
|
|
3
5
|
module GeoCombine
|
@@ -9,6 +11,16 @@ module GeoCombine
|
|
9
11
|
attr_reader :metadata
|
10
12
|
|
11
13
|
GEOBLACKLIGHT_VERSION = 'v1.1.0'
|
14
|
+
SCHEMA_JSON_URL = "https://raw.githubusercontent.com/geoblacklight/geoblacklight/#{GEOBLACKLIGHT_VERSION}/schema/geoblacklight-schema.json".freeze
|
15
|
+
DEPRECATED_KEYS_V1 = %w[
|
16
|
+
uuid
|
17
|
+
georss_polygon_s
|
18
|
+
georss_point_s
|
19
|
+
georss_box_s
|
20
|
+
dc_relation_sm
|
21
|
+
solr_issued_i
|
22
|
+
solr_bbox
|
23
|
+
].freeze
|
12
24
|
|
13
25
|
##
|
14
26
|
# Initializes a GeoBlacklight object
|
@@ -24,7 +36,9 @@ module GeoCombine
|
|
24
36
|
# Calls metadata enhancement methods for each key, value pair in the
|
25
37
|
# metadata hash
|
26
38
|
def enhance_metadata
|
27
|
-
|
39
|
+
upgrade_to_v1 if metadata['geoblacklight_version'].blank?
|
40
|
+
|
41
|
+
metadata.each do |key, value|
|
28
42
|
translate_formats(key, value)
|
29
43
|
enhance_subjects(key, value)
|
30
44
|
format_proper_date(key, value)
|
@@ -36,23 +50,25 @@ module GeoCombine
|
|
36
50
|
##
|
37
51
|
# Returns a string of JSON from a GeoBlacklight hash
|
38
52
|
# @return (String)
|
39
|
-
def to_json
|
40
|
-
|
53
|
+
def to_json(options = {})
|
54
|
+
metadata.to_json(options)
|
41
55
|
end
|
42
56
|
|
43
57
|
##
|
44
58
|
# Validates a GeoBlacklight-Schema json document
|
45
59
|
# @return [Boolean]
|
46
60
|
def valid?
|
47
|
-
@schema ||= JSON.parse(open(
|
48
|
-
JSON::Validator.validate!(@schema, to_json, fragment: '#/properties/layer') &&
|
61
|
+
@schema ||= JSON.parse(open(SCHEMA_JSON_URL).read)
|
62
|
+
JSON::Validator.validate!(@schema, to_json, fragment: '#/properties/layer') &&
|
63
|
+
dct_references_validate! &&
|
64
|
+
spatial_validate!
|
49
65
|
end
|
50
66
|
|
51
67
|
##
|
52
68
|
# Validate dct_references_s
|
53
69
|
# @return [Boolean]
|
54
70
|
def dct_references_validate!
|
55
|
-
return true unless metadata.key?('dct_references_s')
|
71
|
+
return true unless metadata.key?('dct_references_s') # TODO: shouldn't we require this field?
|
56
72
|
begin
|
57
73
|
ref = JSON.parse(metadata['dct_references_s'])
|
58
74
|
raise GeoCombine::Exceptions::InvalidDCTReferences, 'dct_references must be parsed to a Hash' unless ref.is_a?(Hash)
|
@@ -62,49 +78,82 @@ module GeoCombine
|
|
62
78
|
end
|
63
79
|
end
|
64
80
|
|
81
|
+
def spatial_validate!
|
82
|
+
GeoCombine::BoundingBox.from_envelope(metadata['solr_geom']).valid?
|
83
|
+
end
|
84
|
+
|
65
85
|
private
|
66
86
|
|
67
87
|
##
|
68
88
|
# Enhances the 'dc_format_s' field by translating a format type to a valid
|
69
89
|
# GeoBlacklight-Schema format
|
70
90
|
def translate_formats(key, value)
|
71
|
-
|
91
|
+
return unless key == 'dc_format_s' && formats.include?(value)
|
92
|
+
metadata[key] = formats[value]
|
72
93
|
end
|
73
94
|
|
74
95
|
##
|
75
96
|
# Enhances the 'layer_geom_type_s' field by translating from known types
|
76
97
|
def translate_geometry_type(key, value)
|
77
|
-
|
98
|
+
return unless key == 'layer_geom_type_s' && geometry_types.include?(value)
|
99
|
+
metadata[key] = geometry_types[value]
|
78
100
|
end
|
79
101
|
|
80
102
|
##
|
81
103
|
# Enhances the 'dc_subject_sm' field by translating subjects to ISO topic
|
82
104
|
# categories
|
83
105
|
def enhance_subjects(key, value)
|
84
|
-
|
106
|
+
return unless key == 'dc_subject_sm'
|
107
|
+
metadata[key] = value.map do |val|
|
85
108
|
if subjects.include?(val)
|
86
109
|
subjects[val]
|
87
110
|
else
|
88
111
|
val
|
89
112
|
end
|
90
|
-
end
|
113
|
+
end
|
91
114
|
end
|
92
115
|
|
93
116
|
##
|
94
117
|
# Formats the 'layer_modified_dt' to a valid valid RFC3339 date/time string
|
95
118
|
# and ISO8601 (for indexing into Solr)
|
96
119
|
def format_proper_date(key, value)
|
97
|
-
|
120
|
+
return unless key == 'layer_modified_dt'
|
121
|
+
metadata[key] = Time.parse(value).utc.iso8601
|
98
122
|
end
|
99
123
|
|
100
124
|
def fields_should_be_array(key, value)
|
101
|
-
|
125
|
+
return unless should_be_array.include?(key) && !value.is_a?(Array)
|
126
|
+
metadata[key] = [value]
|
102
127
|
end
|
103
128
|
|
104
129
|
##
|
105
130
|
# GeoBlacklight-Schema fields that should be type Array
|
106
131
|
def should_be_array
|
107
|
-
[
|
132
|
+
%w[
|
133
|
+
dc_creator_sm
|
134
|
+
dc_subject_sm
|
135
|
+
dct_spatial_sm
|
136
|
+
dct_temporal_sm
|
137
|
+
dct_isPartOf_sm
|
138
|
+
].freeze
|
139
|
+
end
|
140
|
+
|
141
|
+
##
|
142
|
+
# Converts a pre-v1.0 schema into a compliant v1.0 schema
|
143
|
+
def upgrade_to_v1
|
144
|
+
metadata['geoblacklight_version'] = '1.0'
|
145
|
+
|
146
|
+
# ensure required fields
|
147
|
+
metadata['dc_identifier_s'] = metadata['uuid'] if metadata['dc_identifier_s'].blank?
|
148
|
+
|
149
|
+
# normalize to alphanum and - only
|
150
|
+
metadata['layer_slug_s'].gsub!(/[^[[:alnum:]]]+/, '-') if metadata['layer_slug_s'].present?
|
151
|
+
|
152
|
+
# remove deprecated fields
|
153
|
+
metadata.except!(*DEPRECATED_KEYS_V1)
|
154
|
+
|
155
|
+
# ensure we have a proper v1 record
|
156
|
+
valid?
|
108
157
|
end
|
109
158
|
end
|
110
159
|
end
|
@@ -0,0 +1,229 @@
|
|
1
|
+
require 'active_support/core_ext/object/blank'
|
2
|
+
require 'cgi'
|
3
|
+
|
4
|
+
module GeoCombine
|
5
|
+
# Data model for OpenGeoPortal metadata
|
6
|
+
class OGP
|
7
|
+
class InvalidMetadata < RuntimeError; end
|
8
|
+
include GeoCombine::Formatting
|
9
|
+
attr_reader :metadata
|
10
|
+
|
11
|
+
##
|
12
|
+
# Initializes an OGP object for parsing
|
13
|
+
# @param [String] metadata a valid serialized JSON string from OGP instance
|
14
|
+
# @raise [InvalidMetadata]
|
15
|
+
def initialize(metadata)
|
16
|
+
@metadata = JSON.parse(metadata)
|
17
|
+
raise InvalidMetadata unless valid?
|
18
|
+
end
|
19
|
+
|
20
|
+
OGP_REQUIRED_FIELDS = %w[
|
21
|
+
Access
|
22
|
+
Institution
|
23
|
+
LayerDisplayName
|
24
|
+
LayerId
|
25
|
+
MaxX
|
26
|
+
MaxY
|
27
|
+
MinX
|
28
|
+
MinY
|
29
|
+
Name
|
30
|
+
].freeze
|
31
|
+
|
32
|
+
##
|
33
|
+
# Runs validity checks on OGP metadata to ensure fields are present
|
34
|
+
def valid?
|
35
|
+
OGP_REQUIRED_FIELDS.all? { |k| metadata[k].present? }
|
36
|
+
end
|
37
|
+
|
38
|
+
##
|
39
|
+
# Creates and returns a Geoblacklight schema object from this metadata
|
40
|
+
# @return [GeoCombine::Geoblacklight]
|
41
|
+
def to_geoblacklight
|
42
|
+
GeoCombine::Geoblacklight.new(geoblacklight_terms.to_json)
|
43
|
+
end
|
44
|
+
|
45
|
+
##
|
46
|
+
# Builds a Geoblacklight Schema type hash from Esri Open Data portal
|
47
|
+
# metadata
|
48
|
+
# @return [Hash]
|
49
|
+
def geoblacklight_terms
|
50
|
+
{
|
51
|
+
# Required fields
|
52
|
+
dc_identifier_s: identifier,
|
53
|
+
layer_slug_s: slug,
|
54
|
+
dc_title_s: metadata['LayerDisplayName'],
|
55
|
+
solr_geom: envelope,
|
56
|
+
dct_provenance_s: institution,
|
57
|
+
dc_rights_s: metadata['Access'],
|
58
|
+
geoblacklight_version: '1.0',
|
59
|
+
|
60
|
+
# Recommended fields
|
61
|
+
dc_description_s: metadata['Abstract'],
|
62
|
+
layer_geom_type_s: ogp_geom,
|
63
|
+
dct_references_s: references,
|
64
|
+
layer_id_s: "#{metadata['WorkspaceName']}:#{metadata['Name']}",
|
65
|
+
|
66
|
+
# Optional
|
67
|
+
dct_temporal_sm: [metadata['ContentDate']],
|
68
|
+
dc_format_s: ogp_formats,
|
69
|
+
# dct_issued_dt
|
70
|
+
# dc_language_s
|
71
|
+
dct_spatial_sm: placenames,
|
72
|
+
solr_year_i: year,
|
73
|
+
dc_publisher_s: metadata['Publisher'],
|
74
|
+
dc_subject_sm: subjects,
|
75
|
+
dc_type_s: 'Dataset'
|
76
|
+
}.delete_if { |_k, v| v.nil? }
|
77
|
+
end
|
78
|
+
|
79
|
+
def date
|
80
|
+
begin
|
81
|
+
DateTime.rfc3339(metadata['ContentDate'])
|
82
|
+
rescue
|
83
|
+
nil
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
def year
|
88
|
+
date.year unless date.nil?
|
89
|
+
end
|
90
|
+
|
91
|
+
##
|
92
|
+
# Convert "Paper Map" to Raster, assumes all OGP "Paper Maps" have WMS
|
93
|
+
def ogp_geom
|
94
|
+
case metadata['DataType']
|
95
|
+
when 'Paper Map'
|
96
|
+
'Raster'
|
97
|
+
else
|
98
|
+
metadata['DataType']
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
##
|
103
|
+
# OGP doesn't ship format types, so we just try and be clever here.
|
104
|
+
def ogp_formats
|
105
|
+
case metadata['DataType']
|
106
|
+
when 'Paper Map', 'Raster'
|
107
|
+
return 'GeoTIFF'
|
108
|
+
when 'Polygon', 'Point', 'Line'
|
109
|
+
return 'Shapefile'
|
110
|
+
else
|
111
|
+
raise ArgumentError, metadata['DataType']
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
##
|
116
|
+
# Converts references to json
|
117
|
+
# @return [String]
|
118
|
+
def references
|
119
|
+
references_hash.to_json
|
120
|
+
end
|
121
|
+
|
122
|
+
##
|
123
|
+
# Builds a Solr Envelope using CQL syntax
|
124
|
+
# @return [String]
|
125
|
+
def envelope
|
126
|
+
raise ArgumentError unless west >= -180 && west <= 180 &&
|
127
|
+
east >= -180 && east <= 180 &&
|
128
|
+
north >= -90 && north <= 90 &&
|
129
|
+
south >= -90 && south <= 90 &&
|
130
|
+
west <= east && south <= north
|
131
|
+
"ENVELOPE(#{west}, #{east}, #{north}, #{south})"
|
132
|
+
end
|
133
|
+
|
134
|
+
def subjects
|
135
|
+
fgdc.metadata.xpath('//themekey').map(&:text) if fgdc
|
136
|
+
end
|
137
|
+
|
138
|
+
def placenames
|
139
|
+
fgdc.metadata.xpath('//placekey').map(&:text) if fgdc
|
140
|
+
end
|
141
|
+
|
142
|
+
def fgdc
|
143
|
+
GeoCombine::Fgdc.new(metadata['FgdcText']) if metadata['FgdcText']
|
144
|
+
end
|
145
|
+
|
146
|
+
private
|
147
|
+
|
148
|
+
##
|
149
|
+
# Builds references used for dct_references
|
150
|
+
# @return [Hash]
|
151
|
+
def references_hash
|
152
|
+
results = {
|
153
|
+
'http://www.opengis.net/def/serviceType/ogc/wfs' => location['wfs'],
|
154
|
+
'http://www.opengis.net/def/serviceType/ogc/wms' => location['wms'],
|
155
|
+
'http://schema.org/url' => location['url'],
|
156
|
+
download_uri => location['download']
|
157
|
+
}
|
158
|
+
|
159
|
+
# Handle null, "", and [""]
|
160
|
+
results.map { |k, v| { k => ([] << v).flatten.first } if v }
|
161
|
+
.flatten
|
162
|
+
.compact
|
163
|
+
.reduce({}, :merge)
|
164
|
+
end
|
165
|
+
|
166
|
+
def download_uri
|
167
|
+
return 'http://schema.org/DownloadAction' if institution == 'Harvard'
|
168
|
+
'http://schema.org/downloadUrl'
|
169
|
+
end
|
170
|
+
|
171
|
+
##
|
172
|
+
# OGP "Location" field parsed
|
173
|
+
def location
|
174
|
+
JSON.parse(metadata['Location'])
|
175
|
+
end
|
176
|
+
|
177
|
+
def north
|
178
|
+
metadata['MaxY'].to_f
|
179
|
+
end
|
180
|
+
|
181
|
+
def south
|
182
|
+
metadata['MinY'].to_f
|
183
|
+
end
|
184
|
+
|
185
|
+
def east
|
186
|
+
metadata['MaxX'].to_f
|
187
|
+
end
|
188
|
+
|
189
|
+
def west
|
190
|
+
metadata['MinX'].to_f
|
191
|
+
end
|
192
|
+
|
193
|
+
def institution
|
194
|
+
metadata['Institution']
|
195
|
+
end
|
196
|
+
|
197
|
+
def identifier
|
198
|
+
CGI.escape(metadata['LayerId']) # TODO: why are we using CGI.escape?
|
199
|
+
end
|
200
|
+
|
201
|
+
def slug
|
202
|
+
name = metadata['LayerId'] || metadata['Name'] || ''
|
203
|
+
name = [institution, name].join('-') if institution.present? &&
|
204
|
+
!name.downcase.start_with?(institution.downcase)
|
205
|
+
sluggify(filter_name(name))
|
206
|
+
end
|
207
|
+
|
208
|
+
SLUG_BLACKLIST = %w[
|
209
|
+
SDE_DATA.
|
210
|
+
SDE.
|
211
|
+
SDE2.
|
212
|
+
GISPORTAL.GISOWNER01.
|
213
|
+
GISDATA.
|
214
|
+
MORIS.
|
215
|
+
].freeze
|
216
|
+
|
217
|
+
def filter_name(name)
|
218
|
+
# strip out schema and usernames
|
219
|
+
SLUG_BLACKLIST.each do |blacklisted|
|
220
|
+
name.sub!(blacklisted, '')
|
221
|
+
end
|
222
|
+
unless name.size > 1
|
223
|
+
# use first word of title is empty name
|
224
|
+
name = metadata['LayerDisplayName'].split.first
|
225
|
+
end
|
226
|
+
name
|
227
|
+
end
|
228
|
+
end
|
229
|
+
end
|