geo_combine 0.0.5 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.coveralls.yml +1 -0
- data/README.md +24 -4
- data/bin/geocombine +5 -0
- data/geo_combine.gemspec +4 -0
- data/lib/geo_combine/cli.rb +25 -0
- data/lib/geo_combine/esri_open_data.rb +97 -0
- data/lib/geo_combine/formats.rb +11 -0
- data/lib/geo_combine/formatting.rb +29 -0
- data/lib/geo_combine/geoblacklight.rb +79 -13
- data/lib/geo_combine/geometry_types.rb +11 -0
- data/lib/geo_combine/subjects.rb +29 -0
- data/lib/geo_combine/version.rb +1 -1
- data/lib/geo_combine.rb +21 -3
- data/lib/schema/geoblacklight-schema.json +169 -0
- data/lib/tasks/geo_combine.rake +16 -13
- data/lib/xslt/fgdc2geoBL.xsl +361 -418
- data/lib/xslt/fgdc2html.xsl +1917 -1025
- data/lib/xslt/iso2geoBL.xsl +173 -306
- data/lib/xslt/iso2html.xsl +1059 -1734
- data/spec/features/fgdc2html_spec.rb +42 -0
- data/spec/features/iso2html_spec.rb +50 -0
- data/spec/fixtures/docs/basic_geoblacklight.json +29 -0
- data/spec/fixtures/docs/esri_open_data.json +53 -0
- data/spec/fixtures/docs/full_geoblacklight.json +39 -0
- data/spec/fixtures/json_docs.rb +21 -0
- data/spec/fixtures/xml_docs.rb +0 -44
- data/spec/helpers.rb +4 -0
- data/spec/lib/geo_combine/esri_open_data_spec.rb +96 -0
- data/spec/lib/geo_combine/fgdc_spec.rb +77 -2
- data/spec/lib/geo_combine/formatting_spec.rb +22 -0
- data/spec/lib/geo_combine/geoblacklight_spec.rb +48 -5
- data/spec/lib/geo_combine/iso19139_spec.rb +14 -1
- data/spec/spec_helper.rb +3 -0
- metadata +85 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8627401efabab03d06f210ee160e7e09c7223398
|
4
|
+
data.tar.gz: dcdc93c201e689dc2c9b8245ac2076617af34e39
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9c957f3b783684f18c18c3265349c7efa4db3aa12094f573e9d4d9e5a2366f3464e390add7a12734213ca1801c5ef3d0d820a8139fdce47c07ba99e037f71e5f
|
7
|
+
data.tar.gz: e6bedee395328db1deed473721231e7a5d678e0bfa2a9b038e3f7de596e99622cc7dff7b4dc2d6db14d2d5734339d01fa9d5a462bfbe4363872afe87c98ecb48
|
data/.coveralls.yml
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
service_name: travis-ci
|
data/README.md
CHANGED
@@ -29,7 +29,7 @@ GeoCombine can be used as a set of rake tasks for cloning, updating, and indexin
|
|
29
29
|
|
30
30
|
```ruby
|
31
31
|
# Create a new ISO19139 object
|
32
|
-
> iso_metadata = GeoCombine::Iso19139.new('./tmp/edu.stanford.purl/bb/338/jh/0716/iso19139.xml')
|
32
|
+
> iso_metadata = GeoCombine::Iso19139.new('./tmp/opengeometadata/edu.stanford.purl/bb/338/jh/0716/iso19139.xml')
|
33
33
|
|
34
34
|
# Convert it to GeoBlacklight
|
35
35
|
> iso_metadata.to_geoblacklight
|
@@ -41,13 +41,25 @@ GeoCombine can be used as a set of rake tasks for cloning, updating, and indexin
|
|
41
41
|
> iso_metadata.to_html
|
42
42
|
```
|
43
43
|
|
44
|
+
## Command line ##
|
45
|
+
|
46
|
+
GeoCombine's tasks can be run either as rake tasks or as standalone executables.
|
47
|
+
|
44
48
|
### Clone all OpenGeoMetadata repositories
|
45
49
|
|
46
50
|
```sh
|
47
51
|
$ rake geocombine:clone
|
48
52
|
```
|
49
53
|
|
50
|
-
|
54
|
+
```sh
|
55
|
+
$ bundle exec geocombine clone
|
56
|
+
```
|
57
|
+
|
58
|
+
Will clone all edu.* OpenGeoMetadata repositories into `./tmp/opengeometadata`. Location of the OpenGeoMetadata repositories can be configured using the `OGM_PATH` environment variable.
|
59
|
+
|
60
|
+
```sh
|
61
|
+
$ OGM_PATH='my/custom/location' rake geocombine:clone
|
62
|
+
```
|
51
63
|
|
52
64
|
### Pull all OpenGeoMetadata repositories
|
53
65
|
|
@@ -55,7 +67,11 @@ Will clone all edu.* OpenGeoMetadata repositories into `./tmp`
|
|
55
67
|
$ rake geocombine:pull
|
56
68
|
```
|
57
69
|
|
58
|
-
|
70
|
+
```sh
|
71
|
+
$ bundle exec geocombine pull
|
72
|
+
```
|
73
|
+
|
74
|
+
Runs `git pull origin master` on all cloned repositories in `./tmp/opengeometadata` (or custom path with configured environment variable `OGM_PATH`)
|
59
75
|
|
60
76
|
### Index all of the GeoBlacklight documents
|
61
77
|
|
@@ -63,7 +79,11 @@ Runs `git pull origin master` on all cloned repositories in `./tmp`
|
|
63
79
|
$ rake geocombine:index
|
64
80
|
```
|
65
81
|
|
66
|
-
|
82
|
+
```sh
|
83
|
+
$ bundle exec geocombine index
|
84
|
+
```
|
85
|
+
|
86
|
+
Indexs all of the `geoblacklight.json` files in cloned repositories to a Solr index running at http://127.0.0.1:8983/solr
|
67
87
|
|
68
88
|
## Contributing
|
69
89
|
|
data/bin/geocombine
ADDED
data/geo_combine.gemspec
CHANGED
@@ -20,8 +20,12 @@ Gem::Specification.new do |spec|
|
|
20
20
|
|
21
21
|
spec.add_dependency 'rsolr'
|
22
22
|
spec.add_dependency 'nokogiri'
|
23
|
+
spec.add_dependency 'json-schema'
|
24
|
+
spec.add_dependency 'sanitize'
|
25
|
+
spec.add_dependency 'thor'
|
23
26
|
|
24
27
|
spec.add_development_dependency "bundler", "~> 1.7"
|
25
28
|
spec.add_development_dependency "rake", "~> 10.0"
|
26
29
|
spec.add_development_dependency 'rspec'
|
30
|
+
spec.add_development_dependency 'rspec-html-matchers'
|
27
31
|
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'thor'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
root = Gem::Specification.find_by_name('geo_combine').gem_dir
|
5
|
+
tasks = File.join(root, 'lib/tasks/*.rake')
|
6
|
+
Dir.glob(tasks).each { |r| load r }
|
7
|
+
|
8
|
+
module GeoCombine
|
9
|
+
class CLI < Thor
|
10
|
+
desc 'clone', 'Clone all OpenGeoMetadata repositories'
|
11
|
+
def clone
|
12
|
+
Rake::Task['geocombine:clone'].invoke
|
13
|
+
end
|
14
|
+
|
15
|
+
desc 'pull', '"git pull" OpenGeoMetadata repositories'
|
16
|
+
def pull
|
17
|
+
Rake::Task['geocombine:pull'].invoke
|
18
|
+
end
|
19
|
+
|
20
|
+
desc "index", "Index all of the GeoBlacklight documents"
|
21
|
+
def index
|
22
|
+
Rake::Task['geocombine:index'].invoke
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,97 @@
|
|
1
|
+
module GeoCombine
|
2
|
+
# Data model for ESRI's open data portal metadata
|
3
|
+
class EsriOpenData
|
4
|
+
include GeoCombine::Formatting
|
5
|
+
attr_reader :metadata
|
6
|
+
|
7
|
+
##
|
8
|
+
# Initializes an EsriOpenData object for parsing
|
9
|
+
# @param [String] metadata a valid serialized JSON string from an ESRI Open
|
10
|
+
# Data portal
|
11
|
+
def initialize(metadata)
|
12
|
+
@metadata = JSON.parse(metadata)
|
13
|
+
@geometry = @metadata['extent']['coordinates']
|
14
|
+
end
|
15
|
+
|
16
|
+
##
|
17
|
+
# Creates and returns a Geoblacklight schema object from this metadata
|
18
|
+
# @return [GeoCombine::Geoblacklight]
|
19
|
+
def to_geoblacklight
|
20
|
+
GeoCombine::Geoblacklight.new(geoblacklight_terms.to_json)
|
21
|
+
end
|
22
|
+
|
23
|
+
##
|
24
|
+
# Builds a Geoblacklight Schema type hash from Esri Open Data portal
|
25
|
+
# metadata
|
26
|
+
# @return [Hash]
|
27
|
+
def geoblacklight_terms
|
28
|
+
{
|
29
|
+
uuid: @metadata['id'],
|
30
|
+
dc_identifier_s: @metadata['id'],
|
31
|
+
dc_title_s: @metadata['name'],
|
32
|
+
dc_description_s: sanitize_and_remove_lines(@metadata['description']),
|
33
|
+
dc_rights_s: 'Public',
|
34
|
+
dct_provenance_s: @metadata['owner'],
|
35
|
+
dct_references_s: references,
|
36
|
+
georss_box_s: georss_box,
|
37
|
+
# layer_id_s is used for describing a layer id for a web serivce (WMS, WFS) but is still a required field
|
38
|
+
layer_id_s: '',
|
39
|
+
layer_geom_type_s: @metadata['geometry_type'],
|
40
|
+
layer_modified_dt: @metadata['updated_at'],
|
41
|
+
layer_slug_s: @metadata['id'],
|
42
|
+
solr_geom: envelope,
|
43
|
+
# solr_year_i: '', No equivalent in Esri Open Data metadata
|
44
|
+
dc_subject_sm: @metadata['tags']
|
45
|
+
}
|
46
|
+
end
|
47
|
+
|
48
|
+
##
|
49
|
+
# Converts references to json
|
50
|
+
# @return [String]
|
51
|
+
def references
|
52
|
+
references_hash.to_json
|
53
|
+
end
|
54
|
+
|
55
|
+
##
|
56
|
+
# Builds references used for dct_references
|
57
|
+
# @return [Hash]
|
58
|
+
def references_hash
|
59
|
+
{
|
60
|
+
'http://schema.org/url' => @metadata['landing_page'],
|
61
|
+
'http://resources.arcgis.com/en/help/arcgis-rest-api' => @metadata['url']
|
62
|
+
}
|
63
|
+
end
|
64
|
+
|
65
|
+
##
|
66
|
+
# Builds a GeoRSS box
|
67
|
+
# @return [String]
|
68
|
+
def georss_box
|
69
|
+
"#{south} #{west} #{north} #{east}"
|
70
|
+
end
|
71
|
+
|
72
|
+
##
|
73
|
+
# Builds a Solr Envelope using CQL syntax
|
74
|
+
# @return [String]
|
75
|
+
def envelope
|
76
|
+
"ENVELOPE(#{west}, #{east}, #{north}, #{south})"
|
77
|
+
end
|
78
|
+
|
79
|
+
private
|
80
|
+
|
81
|
+
def north
|
82
|
+
@geometry[1][1]
|
83
|
+
end
|
84
|
+
|
85
|
+
def south
|
86
|
+
@geometry[0][1]
|
87
|
+
end
|
88
|
+
|
89
|
+
def east
|
90
|
+
@geometry[1][0]
|
91
|
+
end
|
92
|
+
|
93
|
+
def west
|
94
|
+
@geometry[0][0]
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module GeoCombine
|
2
|
+
##
|
3
|
+
# Mixin used for formatting metadata fields
|
4
|
+
module Formatting
|
5
|
+
##
|
6
|
+
# Sanitizes html from a text input
|
7
|
+
# @param [String] text
|
8
|
+
# @return [String]
|
9
|
+
def sanitize(text)
|
10
|
+
Sanitize.fragment(text)
|
11
|
+
end
|
12
|
+
|
13
|
+
##
|
14
|
+
# Removes line breaks from a text input
|
15
|
+
# @param [String] text
|
16
|
+
# @return [String]
|
17
|
+
def remove_lines(text)
|
18
|
+
text.gsub(/\n/, '')
|
19
|
+
end
|
20
|
+
|
21
|
+
##
|
22
|
+
# Sanitizes and removes lines from a text block
|
23
|
+
# @param [String] text
|
24
|
+
# @return [String]
|
25
|
+
def sanitize_and_remove_lines(text)
|
26
|
+
remove_lines(sanitize(text))
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -1,25 +1,91 @@
|
|
1
1
|
module GeoCombine
|
2
|
-
class Geoblacklight
|
2
|
+
class Geoblacklight
|
3
|
+
include GeoCombine::Formats
|
4
|
+
include GeoCombine::Subjects
|
5
|
+
include GeoCombine::GeometryTypes
|
6
|
+
|
7
|
+
attr_reader :metadata
|
8
|
+
|
9
|
+
##
|
10
|
+
# Initializes a GeoBlacklight object
|
11
|
+
# @param [String] metadata be a valid JSON string document in
|
12
|
+
# GeoBlacklight-Schema
|
13
|
+
# @param [Hash] fields enhancements to metadata that are merged with @metadata
|
14
|
+
def initialize(metadata, fields = {})
|
15
|
+
@metadata = JSON.parse(metadata).merge(fields)
|
16
|
+
end
|
17
|
+
|
18
|
+
##
|
19
|
+
# Calls metadata enhancement methods for each key, value pair in the
|
20
|
+
# metadata hash
|
21
|
+
def enhance_metadata
|
22
|
+
@metadata.each do |key, value|
|
23
|
+
translate_formats(key, value)
|
24
|
+
enhance_subjects(key, value)
|
25
|
+
format_proper_date(key, value)
|
26
|
+
fields_should_be_array(key, value)
|
27
|
+
translate_geometry_type(key, value)
|
28
|
+
end
|
29
|
+
end
|
3
30
|
|
4
31
|
##
|
5
32
|
# Returns a string of JSON from a GeoBlacklight hash
|
6
33
|
# @return (String)
|
7
34
|
def to_json
|
8
|
-
|
35
|
+
@metadata.to_json
|
9
36
|
end
|
10
37
|
|
11
38
|
##
|
12
|
-
#
|
13
|
-
# @return
|
14
|
-
def
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
39
|
+
# Validates a GeoBlacklight-Schema json document
|
40
|
+
# @return [Boolean]
|
41
|
+
def valid?
|
42
|
+
schema = JSON.parse(File.read(File.join(File.dirname(__FILE__), '../schema/geoblacklight-schema.json')))
|
43
|
+
JSON::Validator.validate!(schema, to_json, validate_schema: true)
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
|
48
|
+
##
|
49
|
+
# Enhances the 'dc_format_s' field by translating a format type to a valid
|
50
|
+
# GeoBlacklight-Schema format
|
51
|
+
def translate_formats(key, value)
|
52
|
+
@metadata[key] = formats[value] if key == 'dc_format_s' && formats.include?(value)
|
53
|
+
end
|
54
|
+
|
55
|
+
##
|
56
|
+
# Enhances the 'layer_geom_type_s' field by translating from known types
|
57
|
+
def translate_geometry_type(key, value)
|
58
|
+
@metadata[key] = geometry_types[value] if key == 'layer_geom_type_s' && geometry_types.include?(value)
|
59
|
+
end
|
60
|
+
|
61
|
+
##
|
62
|
+
# Enhances the 'dc_subject_sm' field by translating subjects to ISO topic
|
63
|
+
# categories
|
64
|
+
def enhance_subjects(key, value)
|
65
|
+
@metadata[key] = value.map do |val|
|
66
|
+
if subjects.include?(val)
|
67
|
+
subjects[val]
|
68
|
+
else
|
69
|
+
val
|
70
|
+
end
|
71
|
+
end if key == 'dc_subject_sm'
|
72
|
+
end
|
73
|
+
|
74
|
+
##
|
75
|
+
# Formats the 'layer_modified_dt' to a valid valid RFC3339 date/time string
|
76
|
+
# and ISO8601 (for indexing into Solr)
|
77
|
+
def format_proper_date(key, value)
|
78
|
+
@metadata[key] = Time.parse(value).utc.iso8601 if key == 'layer_modified_dt'
|
79
|
+
end
|
80
|
+
|
81
|
+
def fields_should_be_array(key, value)
|
82
|
+
@metadata[key] = [value] if should_be_array.include?(key) && !value.kind_of?(Array)
|
83
|
+
end
|
84
|
+
|
85
|
+
##
|
86
|
+
# GeoBlacklight-Schema fields that should be type Array
|
87
|
+
def should_be_array
|
88
|
+
['dc_creator_sm', 'dc_subject_sm', 'dct_spatial_sm', 'dct_temporal_sm', 'dct_isPartOf_sm']
|
23
89
|
end
|
24
90
|
end
|
25
91
|
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module GeoCombine
|
2
|
+
##
|
3
|
+
# Translation dictionary to ISO topics
|
4
|
+
module Subjects
|
5
|
+
def subjects
|
6
|
+
{
|
7
|
+
'farming' => 'Farming',
|
8
|
+
'biota' => 'Biology and Ecology',
|
9
|
+
'climatologyMeteorologyAtmosphere' => 'Climatology, Meteorology and Atmosphere',
|
10
|
+
'boundaries' => 'Boundaries',
|
11
|
+
'elevation' => 'Elevation',
|
12
|
+
'environment' => 'Environment',
|
13
|
+
'geoscientificInformation' => 'Geoscientific Information',
|
14
|
+
'health' => 'Health',
|
15
|
+
'imageryBaseMapsEarthCover' => 'Imagery and Base Maps',
|
16
|
+
'intelligenceMilitary' => 'Military',
|
17
|
+
'inlandWaters' => 'Inland Waters',
|
18
|
+
'location' => 'Location',
|
19
|
+
'oceans' => 'Oceans',
|
20
|
+
'planningCadastre' => 'Planning and Cadastral',
|
21
|
+
'structure' => 'Structure',
|
22
|
+
'transportation' => 'Transportation',
|
23
|
+
'utilitiesCommunication' => 'Utilities and Communication',
|
24
|
+
'society' => 'Society',
|
25
|
+
'economy' => 'Economy'
|
26
|
+
}
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
data/lib/geo_combine/version.rb
CHANGED
data/lib/geo_combine.rb
CHANGED
@@ -1,4 +1,7 @@
|
|
1
1
|
require 'nokogiri'
|
2
|
+
require 'json'
|
3
|
+
require 'json-schema'
|
4
|
+
require 'sanitize'
|
2
5
|
|
3
6
|
module GeoCombine
|
4
7
|
|
@@ -28,9 +31,12 @@ module GeoCombine
|
|
28
31
|
|
29
32
|
##
|
30
33
|
# Perform an XSLT tranformation on metadata using an object's XSL
|
31
|
-
# @return
|
32
|
-
|
33
|
-
|
34
|
+
# @return fields additional GeoBlacklight fields to be passed to
|
35
|
+
# GeoCombine::Geoblacklight on its instantiation
|
36
|
+
# @return [GeoCombine::Geoblacklight] the data transformed into
|
37
|
+
# geoblacklight schema, returned as a GeoCombine::Geoblacklight
|
38
|
+
def to_geoblacklight fields = {}
|
39
|
+
GeoCombine::Geoblacklight.new(xsl_geoblacklight.apply_to(@metadata), fields)
|
34
40
|
end
|
35
41
|
|
36
42
|
##
|
@@ -42,7 +48,19 @@ module GeoCombine
|
|
42
48
|
end
|
43
49
|
end
|
44
50
|
|
51
|
+
# Require translation mixins
|
52
|
+
require 'geo_combine/formats'
|
53
|
+
require 'geo_combine/subjects'
|
54
|
+
require 'geo_combine/geometry_types'
|
55
|
+
|
56
|
+
# Require helper mixins
|
57
|
+
require 'geo_combine/formatting'
|
58
|
+
|
59
|
+
# Require additional classes
|
45
60
|
require 'geo_combine/fgdc'
|
46
61
|
require 'geo_combine/geoblacklight'
|
47
62
|
require 'geo_combine/iso19139'
|
63
|
+
require 'geo_combine/esri_open_data'
|
64
|
+
|
65
|
+
# Require gem files
|
48
66
|
require 'geo_combine/version'
|
@@ -0,0 +1,169 @@
|
|
1
|
+
{
|
2
|
+
"$schema": "http://json-schema.org/draft-04/schema#",
|
3
|
+
"description": "Schema for GeoBlacklight as implemented for Solr 4.10+. See http://journal.code4lib.org/articles/9710 for more details. Note that the Solr schema uses dynamic typing based on the suffix of the field name. For example, _s denotes a string where _sm denotes a multi-valued string (array of strings).",
|
4
|
+
"id": "http://geoblacklight.org/schema",
|
5
|
+
"title": "GeoBlacklight Schema",
|
6
|
+
"required": [
|
7
|
+
"uuid",
|
8
|
+
"dc_identifier_s",
|
9
|
+
"dc_title_s",
|
10
|
+
"dc_description_s",
|
11
|
+
"dc_rights_s",
|
12
|
+
"dct_provenance_s",
|
13
|
+
"dct_references_s",
|
14
|
+
"georss_box_s",
|
15
|
+
"layer_id_s",
|
16
|
+
"layer_geom_type_s",
|
17
|
+
"layer_modified_dt",
|
18
|
+
"layer_slug_s",
|
19
|
+
"solr_geom",
|
20
|
+
"solr_year_i"
|
21
|
+
],
|
22
|
+
"type": "object",
|
23
|
+
"properties": {
|
24
|
+
"uuid": {
|
25
|
+
"type": "string",
|
26
|
+
"description": "Unique identifier for layer that is globally unique."
|
27
|
+
|
28
|
+
},
|
29
|
+
"dc_identifier_s": {
|
30
|
+
"type": "string",
|
31
|
+
"description": "Unique identifier for layer. May be same as UUID but may be an alternate identifier."
|
32
|
+
},
|
33
|
+
"dc_title_s": {
|
34
|
+
"type": "string",
|
35
|
+
"description": "Title for the layer."
|
36
|
+
|
37
|
+
},
|
38
|
+
"dc_description_s": {
|
39
|
+
"type": "string",
|
40
|
+
"description": "Description for the layer."
|
41
|
+
|
42
|
+
},
|
43
|
+
"dc_rights_s": {
|
44
|
+
"type": "string",
|
45
|
+
"enum": ["Public", "Restricted"],
|
46
|
+
"description": "Access rights for the layer."
|
47
|
+
|
48
|
+
},
|
49
|
+
"dct_provenance_s": {
|
50
|
+
"type": "string",
|
51
|
+
"description": "Institution who holds the layer."
|
52
|
+
|
53
|
+
},
|
54
|
+
"dct_references_s": {
|
55
|
+
"type": "string",
|
56
|
+
"description": "JSON hash for external resources, where each key is a URI for the protocol or format and the value is the URL to the resource."
|
57
|
+
|
58
|
+
},
|
59
|
+
"georss_box_s": {
|
60
|
+
"type": "string",
|
61
|
+
"description": "Bounding box as maximum values for S W N E. Example: 12.6 -119.4 19.9 84.8."
|
62
|
+
|
63
|
+
},
|
64
|
+
"layer_id_s": {
|
65
|
+
"type": "string",
|
66
|
+
"description": "The complete identifier for the layer via WMS/WFS/WCS protocol. Example: druid:vr593vj7147."
|
67
|
+
|
68
|
+
},
|
69
|
+
"layer_geom_type_s": {
|
70
|
+
"type": "string",
|
71
|
+
"enum": ["Point", "Line", "Polygon", "Raster", "Scanned Map", "Mixed"],
|
72
|
+
"description": "Geometry type for layer data, using controlled vocabulary."
|
73
|
+
},
|
74
|
+
"layer_modified_dt": {
|
75
|
+
"type": "string",
|
76
|
+
"format": "date-time",
|
77
|
+
"description": "Last modification date for the metadata record, using XML Schema dateTime format (YYYY-MM-DDThh:mm:ssZ)."
|
78
|
+
},
|
79
|
+
"layer_slug_s": {
|
80
|
+
"type": "string",
|
81
|
+
"description": "Unique identifier visible to the user, used for Permalinks. Example: stanford-vr593vj7147."
|
82
|
+
|
83
|
+
},
|
84
|
+
"solr_geom": {
|
85
|
+
"type": "string",
|
86
|
+
"pattern": "ENVELOPE(.*,.*,.*,.*)",
|
87
|
+
"description": "Derived from georss_polygon_s or georss_box_s. Shape of the layer as a ENVELOPE WKT using W E N S. Example: ENVELOPE(76.76, 84.76, 19.91, 12.62). Note that this field is indexed as a Solr spatial (RPT) field."
|
88
|
+
|
89
|
+
},
|
90
|
+
"solr_year_i": {
|
91
|
+
"type": "integer",
|
92
|
+
"description": "Derived from dct_temporal_sm. Year for which layer is valid and only a single value. Example: 1989. Note that this field is indexed as a Solr numeric field."
|
93
|
+
|
94
|
+
},
|
95
|
+
"dc_creator_sm": {
|
96
|
+
"type": "array",
|
97
|
+
"items": {
|
98
|
+
"type": "string"
|
99
|
+
},
|
100
|
+
"description": "Author(s). Example: George Washington, Thomas Jefferson."
|
101
|
+
|
102
|
+
},
|
103
|
+
"dc_format_s": {
|
104
|
+
"type": "string",
|
105
|
+
"enum": ["Shapefile", "GeoTIFF", "ArcGRID"],
|
106
|
+
"description": "File format for the layer, using a controlled vocabulary."
|
107
|
+
|
108
|
+
},
|
109
|
+
"dc_language_s": {
|
110
|
+
"type": "string",
|
111
|
+
"description": "Language for the layer. Example: English."
|
112
|
+
|
113
|
+
},
|
114
|
+
"dc_publisher_s": {
|
115
|
+
"type": "string",
|
116
|
+
"description": "Publisher. Example: ML InfoMap."
|
117
|
+
|
118
|
+
},
|
119
|
+
"dc_subject_sm": {
|
120
|
+
"type": "array",
|
121
|
+
"items": {
|
122
|
+
"type": "string"
|
123
|
+
},
|
124
|
+
"description": "Subjects, preferrably in a controlled vocabulary. Examples: Census, Human settlements."
|
125
|
+
|
126
|
+
},
|
127
|
+
"dc_type_s": {
|
128
|
+
"type": "string",
|
129
|
+
"enum": ["Dataset", "Image", "PhysicalObject"],
|
130
|
+
"description": "Resource type, using DCMI Type Vocabulary."
|
131
|
+
|
132
|
+
},
|
133
|
+
"dct_spatial_sm": {
|
134
|
+
"type": "array",
|
135
|
+
"items": {
|
136
|
+
"type": "string"
|
137
|
+
},
|
138
|
+
"description": "Spatial coverage and place names, preferrably in a controlled vocabulary. Example: 'Paris, France'."
|
139
|
+
|
140
|
+
},
|
141
|
+
"dct_temporal_sm": {
|
142
|
+
"type": "array",
|
143
|
+
"items": {
|
144
|
+
"type": "string"
|
145
|
+
},
|
146
|
+
"description": "Temporal coverage, typically years or dates. Example: 1989, circa 2010, 2007-2009. Note that this field is not in a specific date format."
|
147
|
+
|
148
|
+
},
|
149
|
+
"dct_issued_dt": {
|
150
|
+
"type": "string",
|
151
|
+
"format": "date-time",
|
152
|
+
"description": "Issued date for the layer, using XML Schema dateTime format (YYYY-MM-DDThh:mm:ssZ)."
|
153
|
+
|
154
|
+
},
|
155
|
+
"dct_isPartOf_sm": {
|
156
|
+
"type": "array",
|
157
|
+
"items": {
|
158
|
+
"type": "string"
|
159
|
+
},
|
160
|
+
"description": "Holding dataset for the layer, such as the name of a collection. Example: Village Maps of India."
|
161
|
+
|
162
|
+
},
|
163
|
+
"georss_point_s": {
|
164
|
+
"type": "string",
|
165
|
+
"description": "Point representation for layer as y, x - i.e., centroid. Example: 12.6 -119.4."
|
166
|
+
|
167
|
+
}
|
168
|
+
}
|
169
|
+
}
|
data/lib/tasks/geo_combine.rake
CHANGED
@@ -4,32 +4,35 @@ require 'json'
|
|
4
4
|
require 'rsolr'
|
5
5
|
|
6
6
|
namespace :geocombine do
|
7
|
+
ogm_path = ENV['OGM_PATH'] || 'tmp/opengeometadata'
|
8
|
+
solr_url = ENV['SOLR_URL'] || 'http://127.0.0.1:8983/solr/blacklight-core'
|
7
9
|
desc 'Clone all OpenGeoMetadata repositories'
|
8
10
|
task :clone do
|
9
11
|
ogm_api_uri = URI('https://api.github.com/orgs/opengeometadata/repos')
|
10
12
|
ogm_repos = JSON.parse(Net::HTTP.get(ogm_api_uri)).map{ |repo| repo['git_url']}
|
11
13
|
ogm_repos.each do |repo|
|
12
|
-
if repo =~ /^git:\/\/github.com\/OpenGeoMetadata\/edu
|
13
|
-
system "cd
|
14
|
+
if repo =~ /^git:\/\/github.com\/OpenGeoMetadata\/(edu|org|uk)\..*/
|
15
|
+
system "mkdir -p #{ogm_path} && cd #{ogm_path} && git clone #{repo}"
|
14
16
|
end
|
15
17
|
end
|
16
18
|
end
|
17
19
|
desc '"git pull" OpenGeoMetadata repositories'
|
18
20
|
task :pull do
|
19
|
-
Dir.glob(
|
21
|
+
Dir.glob("#{ogm_path}/*").map{ |dir| system "cd #{dir} && git pull origin master" if dir =~ /.*(edu|org|uk)\..*./ }
|
20
22
|
end
|
21
23
|
desc 'Index all of the GeoBlacklight documents'
|
22
24
|
task :index do
|
23
|
-
solr = RSolr.connect :url =>
|
24
|
-
Find.find(
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
25
|
+
solr = RSolr.connect :url => solr_url
|
26
|
+
Find.find(ogm_path) do |path|
|
27
|
+
next unless path =~ /.*geoblacklight.json$/
|
28
|
+
doc = JSON.parse(File.read(path))
|
29
|
+
begin
|
30
|
+
solr.update params: { commitWithin: 500, overwrite: true },
|
31
|
+
data: [doc].to_json,
|
32
|
+
headers: { 'Content-Type' => 'application/json' }
|
33
|
+
|
34
|
+
rescue RSolr::Error::Http => error
|
35
|
+
puts error
|
33
36
|
end
|
34
37
|
end
|
35
38
|
end
|