geo_combine 0.0.5 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.coveralls.yml +1 -0
- data/README.md +24 -4
- data/bin/geocombine +5 -0
- data/geo_combine.gemspec +4 -0
- data/lib/geo_combine/cli.rb +25 -0
- data/lib/geo_combine/esri_open_data.rb +97 -0
- data/lib/geo_combine/formats.rb +11 -0
- data/lib/geo_combine/formatting.rb +29 -0
- data/lib/geo_combine/geoblacklight.rb +79 -13
- data/lib/geo_combine/geometry_types.rb +11 -0
- data/lib/geo_combine/subjects.rb +29 -0
- data/lib/geo_combine/version.rb +1 -1
- data/lib/geo_combine.rb +21 -3
- data/lib/schema/geoblacklight-schema.json +169 -0
- data/lib/tasks/geo_combine.rake +16 -13
- data/lib/xslt/fgdc2geoBL.xsl +361 -418
- data/lib/xslt/fgdc2html.xsl +1917 -1025
- data/lib/xslt/iso2geoBL.xsl +173 -306
- data/lib/xslt/iso2html.xsl +1059 -1734
- data/spec/features/fgdc2html_spec.rb +42 -0
- data/spec/features/iso2html_spec.rb +50 -0
- data/spec/fixtures/docs/basic_geoblacklight.json +29 -0
- data/spec/fixtures/docs/esri_open_data.json +53 -0
- data/spec/fixtures/docs/full_geoblacklight.json +39 -0
- data/spec/fixtures/json_docs.rb +21 -0
- data/spec/fixtures/xml_docs.rb +0 -44
- data/spec/helpers.rb +4 -0
- data/spec/lib/geo_combine/esri_open_data_spec.rb +96 -0
- data/spec/lib/geo_combine/fgdc_spec.rb +77 -2
- data/spec/lib/geo_combine/formatting_spec.rb +22 -0
- data/spec/lib/geo_combine/geoblacklight_spec.rb +48 -5
- data/spec/lib/geo_combine/iso19139_spec.rb +14 -1
- data/spec/spec_helper.rb +3 -0
- metadata +85 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8627401efabab03d06f210ee160e7e09c7223398
|
4
|
+
data.tar.gz: dcdc93c201e689dc2c9b8245ac2076617af34e39
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9c957f3b783684f18c18c3265349c7efa4db3aa12094f573e9d4d9e5a2366f3464e390add7a12734213ca1801c5ef3d0d820a8139fdce47c07ba99e037f71e5f
|
7
|
+
data.tar.gz: e6bedee395328db1deed473721231e7a5d678e0bfa2a9b038e3f7de596e99622cc7dff7b4dc2d6db14d2d5734339d01fa9d5a462bfbe4363872afe87c98ecb48
|
data/.coveralls.yml
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
service_name: travis-ci
|
data/README.md
CHANGED
@@ -29,7 +29,7 @@ GeoCombine can be used as a set of rake tasks for cloning, updating, and indexin
|
|
29
29
|
|
30
30
|
```ruby
|
31
31
|
# Create a new ISO19139 object
|
32
|
-
> iso_metadata = GeoCombine::Iso19139.new('./tmp/edu.stanford.purl/bb/338/jh/0716/iso19139.xml')
|
32
|
+
> iso_metadata = GeoCombine::Iso19139.new('./tmp/opengeometadata/edu.stanford.purl/bb/338/jh/0716/iso19139.xml')
|
33
33
|
|
34
34
|
# Convert it to GeoBlacklight
|
35
35
|
> iso_metadata.to_geoblacklight
|
@@ -41,13 +41,25 @@ GeoCombine can be used as a set of rake tasks for cloning, updating, and indexin
|
|
41
41
|
> iso_metadata.to_html
|
42
42
|
```
|
43
43
|
|
44
|
+
## Command line ##
|
45
|
+
|
46
|
+
GeoCombine's tasks can be run either as rake tasks or as standalone executables.
|
47
|
+
|
44
48
|
### Clone all OpenGeoMetadata repositories
|
45
49
|
|
46
50
|
```sh
|
47
51
|
$ rake geocombine:clone
|
48
52
|
```
|
49
53
|
|
50
|
-
|
54
|
+
```sh
|
55
|
+
$ bundle exec geocombine clone
|
56
|
+
```
|
57
|
+
|
58
|
+
Will clone all edu.* OpenGeoMetadata repositories into `./tmp/opengeometadata`. Location of the OpenGeoMetadata repositories can be configured using the `OGM_PATH` environment variable.
|
59
|
+
|
60
|
+
```sh
|
61
|
+
$ OGM_PATH='my/custom/location' rake geocombine:clone
|
62
|
+
```
|
51
63
|
|
52
64
|
### Pull all OpenGeoMetadata repositories
|
53
65
|
|
@@ -55,7 +67,11 @@ Will clone all edu.* OpenGeoMetadata repositories into `./tmp`
|
|
55
67
|
$ rake geocombine:pull
|
56
68
|
```
|
57
69
|
|
58
|
-
|
70
|
+
```sh
|
71
|
+
$ bundle exec geocombine pull
|
72
|
+
```
|
73
|
+
|
74
|
+
Runs `git pull origin master` on all cloned repositories in `./tmp/opengeometadata` (or custom path with configured environment variable `OGM_PATH`)
|
59
75
|
|
60
76
|
### Index all of the GeoBlacklight documents
|
61
77
|
|
@@ -63,7 +79,11 @@ Runs `git pull origin master` on all cloned repositories in `./tmp`
|
|
63
79
|
$ rake geocombine:index
|
64
80
|
```
|
65
81
|
|
66
|
-
|
82
|
+
```sh
|
83
|
+
$ bundle exec geocombine index
|
84
|
+
```
|
85
|
+
|
86
|
+
Indexs all of the `geoblacklight.json` files in cloned repositories to a Solr index running at http://127.0.0.1:8983/solr
|
67
87
|
|
68
88
|
## Contributing
|
69
89
|
|
data/bin/geocombine
ADDED
data/geo_combine.gemspec
CHANGED
@@ -20,8 +20,12 @@ Gem::Specification.new do |spec|
|
|
20
20
|
|
21
21
|
spec.add_dependency 'rsolr'
|
22
22
|
spec.add_dependency 'nokogiri'
|
23
|
+
spec.add_dependency 'json-schema'
|
24
|
+
spec.add_dependency 'sanitize'
|
25
|
+
spec.add_dependency 'thor'
|
23
26
|
|
24
27
|
spec.add_development_dependency "bundler", "~> 1.7"
|
25
28
|
spec.add_development_dependency "rake", "~> 10.0"
|
26
29
|
spec.add_development_dependency 'rspec'
|
30
|
+
spec.add_development_dependency 'rspec-html-matchers'
|
27
31
|
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'thor'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
root = Gem::Specification.find_by_name('geo_combine').gem_dir
|
5
|
+
tasks = File.join(root, 'lib/tasks/*.rake')
|
6
|
+
Dir.glob(tasks).each { |r| load r }
|
7
|
+
|
8
|
+
module GeoCombine
|
9
|
+
class CLI < Thor
|
10
|
+
desc 'clone', 'Clone all OpenGeoMetadata repositories'
|
11
|
+
def clone
|
12
|
+
Rake::Task['geocombine:clone'].invoke
|
13
|
+
end
|
14
|
+
|
15
|
+
desc 'pull', '"git pull" OpenGeoMetadata repositories'
|
16
|
+
def pull
|
17
|
+
Rake::Task['geocombine:pull'].invoke
|
18
|
+
end
|
19
|
+
|
20
|
+
desc "index", "Index all of the GeoBlacklight documents"
|
21
|
+
def index
|
22
|
+
Rake::Task['geocombine:index'].invoke
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,97 @@
|
|
1
|
+
module GeoCombine
|
2
|
+
# Data model for ESRI's open data portal metadata
|
3
|
+
class EsriOpenData
|
4
|
+
include GeoCombine::Formatting
|
5
|
+
attr_reader :metadata
|
6
|
+
|
7
|
+
##
|
8
|
+
# Initializes an EsriOpenData object for parsing
|
9
|
+
# @param [String] metadata a valid serialized JSON string from an ESRI Open
|
10
|
+
# Data portal
|
11
|
+
def initialize(metadata)
|
12
|
+
@metadata = JSON.parse(metadata)
|
13
|
+
@geometry = @metadata['extent']['coordinates']
|
14
|
+
end
|
15
|
+
|
16
|
+
##
|
17
|
+
# Creates and returns a Geoblacklight schema object from this metadata
|
18
|
+
# @return [GeoCombine::Geoblacklight]
|
19
|
+
def to_geoblacklight
|
20
|
+
GeoCombine::Geoblacklight.new(geoblacklight_terms.to_json)
|
21
|
+
end
|
22
|
+
|
23
|
+
##
|
24
|
+
# Builds a Geoblacklight Schema type hash from Esri Open Data portal
|
25
|
+
# metadata
|
26
|
+
# @return [Hash]
|
27
|
+
def geoblacklight_terms
|
28
|
+
{
|
29
|
+
uuid: @metadata['id'],
|
30
|
+
dc_identifier_s: @metadata['id'],
|
31
|
+
dc_title_s: @metadata['name'],
|
32
|
+
dc_description_s: sanitize_and_remove_lines(@metadata['description']),
|
33
|
+
dc_rights_s: 'Public',
|
34
|
+
dct_provenance_s: @metadata['owner'],
|
35
|
+
dct_references_s: references,
|
36
|
+
georss_box_s: georss_box,
|
37
|
+
# layer_id_s is used for describing a layer id for a web serivce (WMS, WFS) but is still a required field
|
38
|
+
layer_id_s: '',
|
39
|
+
layer_geom_type_s: @metadata['geometry_type'],
|
40
|
+
layer_modified_dt: @metadata['updated_at'],
|
41
|
+
layer_slug_s: @metadata['id'],
|
42
|
+
solr_geom: envelope,
|
43
|
+
# solr_year_i: '', No equivalent in Esri Open Data metadata
|
44
|
+
dc_subject_sm: @metadata['tags']
|
45
|
+
}
|
46
|
+
end
|
47
|
+
|
48
|
+
##
|
49
|
+
# Converts references to json
|
50
|
+
# @return [String]
|
51
|
+
def references
|
52
|
+
references_hash.to_json
|
53
|
+
end
|
54
|
+
|
55
|
+
##
|
56
|
+
# Builds references used for dct_references
|
57
|
+
# @return [Hash]
|
58
|
+
def references_hash
|
59
|
+
{
|
60
|
+
'http://schema.org/url' => @metadata['landing_page'],
|
61
|
+
'http://resources.arcgis.com/en/help/arcgis-rest-api' => @metadata['url']
|
62
|
+
}
|
63
|
+
end
|
64
|
+
|
65
|
+
##
|
66
|
+
# Builds a GeoRSS box
|
67
|
+
# @return [String]
|
68
|
+
def georss_box
|
69
|
+
"#{south} #{west} #{north} #{east}"
|
70
|
+
end
|
71
|
+
|
72
|
+
##
|
73
|
+
# Builds a Solr Envelope using CQL syntax
|
74
|
+
# @return [String]
|
75
|
+
def envelope
|
76
|
+
"ENVELOPE(#{west}, #{east}, #{north}, #{south})"
|
77
|
+
end
|
78
|
+
|
79
|
+
private
|
80
|
+
|
81
|
+
def north
|
82
|
+
@geometry[1][1]
|
83
|
+
end
|
84
|
+
|
85
|
+
def south
|
86
|
+
@geometry[0][1]
|
87
|
+
end
|
88
|
+
|
89
|
+
def east
|
90
|
+
@geometry[1][0]
|
91
|
+
end
|
92
|
+
|
93
|
+
def west
|
94
|
+
@geometry[0][0]
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module GeoCombine
|
2
|
+
##
|
3
|
+
# Mixin used for formatting metadata fields
|
4
|
+
module Formatting
|
5
|
+
##
|
6
|
+
# Sanitizes html from a text input
|
7
|
+
# @param [String] text
|
8
|
+
# @return [String]
|
9
|
+
def sanitize(text)
|
10
|
+
Sanitize.fragment(text)
|
11
|
+
end
|
12
|
+
|
13
|
+
##
|
14
|
+
# Removes line breaks from a text input
|
15
|
+
# @param [String] text
|
16
|
+
# @return [String]
|
17
|
+
def remove_lines(text)
|
18
|
+
text.gsub(/\n/, '')
|
19
|
+
end
|
20
|
+
|
21
|
+
##
|
22
|
+
# Sanitizes and removes lines from a text block
|
23
|
+
# @param [String] text
|
24
|
+
# @return [String]
|
25
|
+
def sanitize_and_remove_lines(text)
|
26
|
+
remove_lines(sanitize(text))
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -1,25 +1,91 @@
|
|
1
1
|
module GeoCombine
|
2
|
-
class Geoblacklight
|
2
|
+
class Geoblacklight
|
3
|
+
include GeoCombine::Formats
|
4
|
+
include GeoCombine::Subjects
|
5
|
+
include GeoCombine::GeometryTypes
|
6
|
+
|
7
|
+
attr_reader :metadata
|
8
|
+
|
9
|
+
##
|
10
|
+
# Initializes a GeoBlacklight object
|
11
|
+
# @param [String] metadata be a valid JSON string document in
|
12
|
+
# GeoBlacklight-Schema
|
13
|
+
# @param [Hash] fields enhancements to metadata that are merged with @metadata
|
14
|
+
def initialize(metadata, fields = {})
|
15
|
+
@metadata = JSON.parse(metadata).merge(fields)
|
16
|
+
end
|
17
|
+
|
18
|
+
##
|
19
|
+
# Calls metadata enhancement methods for each key, value pair in the
|
20
|
+
# metadata hash
|
21
|
+
def enhance_metadata
|
22
|
+
@metadata.each do |key, value|
|
23
|
+
translate_formats(key, value)
|
24
|
+
enhance_subjects(key, value)
|
25
|
+
format_proper_date(key, value)
|
26
|
+
fields_should_be_array(key, value)
|
27
|
+
translate_geometry_type(key, value)
|
28
|
+
end
|
29
|
+
end
|
3
30
|
|
4
31
|
##
|
5
32
|
# Returns a string of JSON from a GeoBlacklight hash
|
6
33
|
# @return (String)
|
7
34
|
def to_json
|
8
|
-
|
35
|
+
@metadata.to_json
|
9
36
|
end
|
10
37
|
|
11
38
|
##
|
12
|
-
#
|
13
|
-
# @return
|
14
|
-
def
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
39
|
+
# Validates a GeoBlacklight-Schema json document
|
40
|
+
# @return [Boolean]
|
41
|
+
def valid?
|
42
|
+
schema = JSON.parse(File.read(File.join(File.dirname(__FILE__), '../schema/geoblacklight-schema.json')))
|
43
|
+
JSON::Validator.validate!(schema, to_json, validate_schema: true)
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
|
48
|
+
##
|
49
|
+
# Enhances the 'dc_format_s' field by translating a format type to a valid
|
50
|
+
# GeoBlacklight-Schema format
|
51
|
+
def translate_formats(key, value)
|
52
|
+
@metadata[key] = formats[value] if key == 'dc_format_s' && formats.include?(value)
|
53
|
+
end
|
54
|
+
|
55
|
+
##
|
56
|
+
# Enhances the 'layer_geom_type_s' field by translating from known types
|
57
|
+
def translate_geometry_type(key, value)
|
58
|
+
@metadata[key] = geometry_types[value] if key == 'layer_geom_type_s' && geometry_types.include?(value)
|
59
|
+
end
|
60
|
+
|
61
|
+
##
|
62
|
+
# Enhances the 'dc_subject_sm' field by translating subjects to ISO topic
|
63
|
+
# categories
|
64
|
+
def enhance_subjects(key, value)
|
65
|
+
@metadata[key] = value.map do |val|
|
66
|
+
if subjects.include?(val)
|
67
|
+
subjects[val]
|
68
|
+
else
|
69
|
+
val
|
70
|
+
end
|
71
|
+
end if key == 'dc_subject_sm'
|
72
|
+
end
|
73
|
+
|
74
|
+
##
|
75
|
+
# Formats the 'layer_modified_dt' to a valid valid RFC3339 date/time string
|
76
|
+
# and ISO8601 (for indexing into Solr)
|
77
|
+
def format_proper_date(key, value)
|
78
|
+
@metadata[key] = Time.parse(value).utc.iso8601 if key == 'layer_modified_dt'
|
79
|
+
end
|
80
|
+
|
81
|
+
def fields_should_be_array(key, value)
|
82
|
+
@metadata[key] = [value] if should_be_array.include?(key) && !value.kind_of?(Array)
|
83
|
+
end
|
84
|
+
|
85
|
+
##
|
86
|
+
# GeoBlacklight-Schema fields that should be type Array
|
87
|
+
def should_be_array
|
88
|
+
['dc_creator_sm', 'dc_subject_sm', 'dct_spatial_sm', 'dct_temporal_sm', 'dct_isPartOf_sm']
|
23
89
|
end
|
24
90
|
end
|
25
91
|
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module GeoCombine
|
2
|
+
##
|
3
|
+
# Translation dictionary to ISO topics
|
4
|
+
module Subjects
|
5
|
+
def subjects
|
6
|
+
{
|
7
|
+
'farming' => 'Farming',
|
8
|
+
'biota' => 'Biology and Ecology',
|
9
|
+
'climatologyMeteorologyAtmosphere' => 'Climatology, Meteorology and Atmosphere',
|
10
|
+
'boundaries' => 'Boundaries',
|
11
|
+
'elevation' => 'Elevation',
|
12
|
+
'environment' => 'Environment',
|
13
|
+
'geoscientificInformation' => 'Geoscientific Information',
|
14
|
+
'health' => 'Health',
|
15
|
+
'imageryBaseMapsEarthCover' => 'Imagery and Base Maps',
|
16
|
+
'intelligenceMilitary' => 'Military',
|
17
|
+
'inlandWaters' => 'Inland Waters',
|
18
|
+
'location' => 'Location',
|
19
|
+
'oceans' => 'Oceans',
|
20
|
+
'planningCadastre' => 'Planning and Cadastral',
|
21
|
+
'structure' => 'Structure',
|
22
|
+
'transportation' => 'Transportation',
|
23
|
+
'utilitiesCommunication' => 'Utilities and Communication',
|
24
|
+
'society' => 'Society',
|
25
|
+
'economy' => 'Economy'
|
26
|
+
}
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
data/lib/geo_combine/version.rb
CHANGED
data/lib/geo_combine.rb
CHANGED
@@ -1,4 +1,7 @@
|
|
1
1
|
require 'nokogiri'
|
2
|
+
require 'json'
|
3
|
+
require 'json-schema'
|
4
|
+
require 'sanitize'
|
2
5
|
|
3
6
|
module GeoCombine
|
4
7
|
|
@@ -28,9 +31,12 @@ module GeoCombine
|
|
28
31
|
|
29
32
|
##
|
30
33
|
# Perform an XSLT tranformation on metadata using an object's XSL
|
31
|
-
# @return
|
32
|
-
|
33
|
-
|
34
|
+
# @return fields additional GeoBlacklight fields to be passed to
|
35
|
+
# GeoCombine::Geoblacklight on its instantiation
|
36
|
+
# @return [GeoCombine::Geoblacklight] the data transformed into
|
37
|
+
# geoblacklight schema, returned as a GeoCombine::Geoblacklight
|
38
|
+
def to_geoblacklight fields = {}
|
39
|
+
GeoCombine::Geoblacklight.new(xsl_geoblacklight.apply_to(@metadata), fields)
|
34
40
|
end
|
35
41
|
|
36
42
|
##
|
@@ -42,7 +48,19 @@ module GeoCombine
|
|
42
48
|
end
|
43
49
|
end
|
44
50
|
|
51
|
+
# Require translation mixins
|
52
|
+
require 'geo_combine/formats'
|
53
|
+
require 'geo_combine/subjects'
|
54
|
+
require 'geo_combine/geometry_types'
|
55
|
+
|
56
|
+
# Require helper mixins
|
57
|
+
require 'geo_combine/formatting'
|
58
|
+
|
59
|
+
# Require additional classes
|
45
60
|
require 'geo_combine/fgdc'
|
46
61
|
require 'geo_combine/geoblacklight'
|
47
62
|
require 'geo_combine/iso19139'
|
63
|
+
require 'geo_combine/esri_open_data'
|
64
|
+
|
65
|
+
# Require gem files
|
48
66
|
require 'geo_combine/version'
|
@@ -0,0 +1,169 @@
|
|
1
|
+
{
|
2
|
+
"$schema": "http://json-schema.org/draft-04/schema#",
|
3
|
+
"description": "Schema for GeoBlacklight as implemented for Solr 4.10+. See http://journal.code4lib.org/articles/9710 for more details. Note that the Solr schema uses dynamic typing based on the suffix of the field name. For example, _s denotes a string where _sm denotes a multi-valued string (array of strings).",
|
4
|
+
"id": "http://geoblacklight.org/schema",
|
5
|
+
"title": "GeoBlacklight Schema",
|
6
|
+
"required": [
|
7
|
+
"uuid",
|
8
|
+
"dc_identifier_s",
|
9
|
+
"dc_title_s",
|
10
|
+
"dc_description_s",
|
11
|
+
"dc_rights_s",
|
12
|
+
"dct_provenance_s",
|
13
|
+
"dct_references_s",
|
14
|
+
"georss_box_s",
|
15
|
+
"layer_id_s",
|
16
|
+
"layer_geom_type_s",
|
17
|
+
"layer_modified_dt",
|
18
|
+
"layer_slug_s",
|
19
|
+
"solr_geom",
|
20
|
+
"solr_year_i"
|
21
|
+
],
|
22
|
+
"type": "object",
|
23
|
+
"properties": {
|
24
|
+
"uuid": {
|
25
|
+
"type": "string",
|
26
|
+
"description": "Unique identifier for layer that is globally unique."
|
27
|
+
|
28
|
+
},
|
29
|
+
"dc_identifier_s": {
|
30
|
+
"type": "string",
|
31
|
+
"description": "Unique identifier for layer. May be same as UUID but may be an alternate identifier."
|
32
|
+
},
|
33
|
+
"dc_title_s": {
|
34
|
+
"type": "string",
|
35
|
+
"description": "Title for the layer."
|
36
|
+
|
37
|
+
},
|
38
|
+
"dc_description_s": {
|
39
|
+
"type": "string",
|
40
|
+
"description": "Description for the layer."
|
41
|
+
|
42
|
+
},
|
43
|
+
"dc_rights_s": {
|
44
|
+
"type": "string",
|
45
|
+
"enum": ["Public", "Restricted"],
|
46
|
+
"description": "Access rights for the layer."
|
47
|
+
|
48
|
+
},
|
49
|
+
"dct_provenance_s": {
|
50
|
+
"type": "string",
|
51
|
+
"description": "Institution who holds the layer."
|
52
|
+
|
53
|
+
},
|
54
|
+
"dct_references_s": {
|
55
|
+
"type": "string",
|
56
|
+
"description": "JSON hash for external resources, where each key is a URI for the protocol or format and the value is the URL to the resource."
|
57
|
+
|
58
|
+
},
|
59
|
+
"georss_box_s": {
|
60
|
+
"type": "string",
|
61
|
+
"description": "Bounding box as maximum values for S W N E. Example: 12.6 -119.4 19.9 84.8."
|
62
|
+
|
63
|
+
},
|
64
|
+
"layer_id_s": {
|
65
|
+
"type": "string",
|
66
|
+
"description": "The complete identifier for the layer via WMS/WFS/WCS protocol. Example: druid:vr593vj7147."
|
67
|
+
|
68
|
+
},
|
69
|
+
"layer_geom_type_s": {
|
70
|
+
"type": "string",
|
71
|
+
"enum": ["Point", "Line", "Polygon", "Raster", "Scanned Map", "Mixed"],
|
72
|
+
"description": "Geometry type for layer data, using controlled vocabulary."
|
73
|
+
},
|
74
|
+
"layer_modified_dt": {
|
75
|
+
"type": "string",
|
76
|
+
"format": "date-time",
|
77
|
+
"description": "Last modification date for the metadata record, using XML Schema dateTime format (YYYY-MM-DDThh:mm:ssZ)."
|
78
|
+
},
|
79
|
+
"layer_slug_s": {
|
80
|
+
"type": "string",
|
81
|
+
"description": "Unique identifier visible to the user, used for Permalinks. Example: stanford-vr593vj7147."
|
82
|
+
|
83
|
+
},
|
84
|
+
"solr_geom": {
|
85
|
+
"type": "string",
|
86
|
+
"pattern": "ENVELOPE(.*,.*,.*,.*)",
|
87
|
+
"description": "Derived from georss_polygon_s or georss_box_s. Shape of the layer as a ENVELOPE WKT using W E N S. Example: ENVELOPE(76.76, 84.76, 19.91, 12.62). Note that this field is indexed as a Solr spatial (RPT) field."
|
88
|
+
|
89
|
+
},
|
90
|
+
"solr_year_i": {
|
91
|
+
"type": "integer",
|
92
|
+
"description": "Derived from dct_temporal_sm. Year for which layer is valid and only a single value. Example: 1989. Note that this field is indexed as a Solr numeric field."
|
93
|
+
|
94
|
+
},
|
95
|
+
"dc_creator_sm": {
|
96
|
+
"type": "array",
|
97
|
+
"items": {
|
98
|
+
"type": "string"
|
99
|
+
},
|
100
|
+
"description": "Author(s). Example: George Washington, Thomas Jefferson."
|
101
|
+
|
102
|
+
},
|
103
|
+
"dc_format_s": {
|
104
|
+
"type": "string",
|
105
|
+
"enum": ["Shapefile", "GeoTIFF", "ArcGRID"],
|
106
|
+
"description": "File format for the layer, using a controlled vocabulary."
|
107
|
+
|
108
|
+
},
|
109
|
+
"dc_language_s": {
|
110
|
+
"type": "string",
|
111
|
+
"description": "Language for the layer. Example: English."
|
112
|
+
|
113
|
+
},
|
114
|
+
"dc_publisher_s": {
|
115
|
+
"type": "string",
|
116
|
+
"description": "Publisher. Example: ML InfoMap."
|
117
|
+
|
118
|
+
},
|
119
|
+
"dc_subject_sm": {
|
120
|
+
"type": "array",
|
121
|
+
"items": {
|
122
|
+
"type": "string"
|
123
|
+
},
|
124
|
+
"description": "Subjects, preferrably in a controlled vocabulary. Examples: Census, Human settlements."
|
125
|
+
|
126
|
+
},
|
127
|
+
"dc_type_s": {
|
128
|
+
"type": "string",
|
129
|
+
"enum": ["Dataset", "Image", "PhysicalObject"],
|
130
|
+
"description": "Resource type, using DCMI Type Vocabulary."
|
131
|
+
|
132
|
+
},
|
133
|
+
"dct_spatial_sm": {
|
134
|
+
"type": "array",
|
135
|
+
"items": {
|
136
|
+
"type": "string"
|
137
|
+
},
|
138
|
+
"description": "Spatial coverage and place names, preferrably in a controlled vocabulary. Example: 'Paris, France'."
|
139
|
+
|
140
|
+
},
|
141
|
+
"dct_temporal_sm": {
|
142
|
+
"type": "array",
|
143
|
+
"items": {
|
144
|
+
"type": "string"
|
145
|
+
},
|
146
|
+
"description": "Temporal coverage, typically years or dates. Example: 1989, circa 2010, 2007-2009. Note that this field is not in a specific date format."
|
147
|
+
|
148
|
+
},
|
149
|
+
"dct_issued_dt": {
|
150
|
+
"type": "string",
|
151
|
+
"format": "date-time",
|
152
|
+
"description": "Issued date for the layer, using XML Schema dateTime format (YYYY-MM-DDThh:mm:ssZ)."
|
153
|
+
|
154
|
+
},
|
155
|
+
"dct_isPartOf_sm": {
|
156
|
+
"type": "array",
|
157
|
+
"items": {
|
158
|
+
"type": "string"
|
159
|
+
},
|
160
|
+
"description": "Holding dataset for the layer, such as the name of a collection. Example: Village Maps of India."
|
161
|
+
|
162
|
+
},
|
163
|
+
"georss_point_s": {
|
164
|
+
"type": "string",
|
165
|
+
"description": "Point representation for layer as y, x - i.e., centroid. Example: 12.6 -119.4."
|
166
|
+
|
167
|
+
}
|
168
|
+
}
|
169
|
+
}
|
data/lib/tasks/geo_combine.rake
CHANGED
@@ -4,32 +4,35 @@ require 'json'
|
|
4
4
|
require 'rsolr'
|
5
5
|
|
6
6
|
namespace :geocombine do
|
7
|
+
ogm_path = ENV['OGM_PATH'] || 'tmp/opengeometadata'
|
8
|
+
solr_url = ENV['SOLR_URL'] || 'http://127.0.0.1:8983/solr/blacklight-core'
|
7
9
|
desc 'Clone all OpenGeoMetadata repositories'
|
8
10
|
task :clone do
|
9
11
|
ogm_api_uri = URI('https://api.github.com/orgs/opengeometadata/repos')
|
10
12
|
ogm_repos = JSON.parse(Net::HTTP.get(ogm_api_uri)).map{ |repo| repo['git_url']}
|
11
13
|
ogm_repos.each do |repo|
|
12
|
-
if repo =~ /^git:\/\/github.com\/OpenGeoMetadata\/edu
|
13
|
-
system "cd
|
14
|
+
if repo =~ /^git:\/\/github.com\/OpenGeoMetadata\/(edu|org|uk)\..*/
|
15
|
+
system "mkdir -p #{ogm_path} && cd #{ogm_path} && git clone #{repo}"
|
14
16
|
end
|
15
17
|
end
|
16
18
|
end
|
17
19
|
desc '"git pull" OpenGeoMetadata repositories'
|
18
20
|
task :pull do
|
19
|
-
Dir.glob(
|
21
|
+
Dir.glob("#{ogm_path}/*").map{ |dir| system "cd #{dir} && git pull origin master" if dir =~ /.*(edu|org|uk)\..*./ }
|
20
22
|
end
|
21
23
|
desc 'Index all of the GeoBlacklight documents'
|
22
24
|
task :index do
|
23
|
-
solr = RSolr.connect :url =>
|
24
|
-
Find.find(
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
25
|
+
solr = RSolr.connect :url => solr_url
|
26
|
+
Find.find(ogm_path) do |path|
|
27
|
+
next unless path =~ /.*geoblacklight.json$/
|
28
|
+
doc = JSON.parse(File.read(path))
|
29
|
+
begin
|
30
|
+
solr.update params: { commitWithin: 500, overwrite: true },
|
31
|
+
data: [doc].to_json,
|
32
|
+
headers: { 'Content-Type' => 'application/json' }
|
33
|
+
|
34
|
+
rescue RSolr::Error::Http => error
|
35
|
+
puts error
|
33
36
|
end
|
34
37
|
end
|
35
38
|
end
|