geo_combine 0.0.5 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 23a7b7ef3140404f61f2c0143e25556163178bb1
4
- data.tar.gz: f776389c9604fd18cbdf958c2f68b2efd4181fd2
3
+ metadata.gz: 8627401efabab03d06f210ee160e7e09c7223398
4
+ data.tar.gz: dcdc93c201e689dc2c9b8245ac2076617af34e39
5
5
  SHA512:
6
- metadata.gz: 1b9ecf8ae11b1e211665e5d90921b32a5f53c7d7e07bfbb5b7d7ab46c6cf27e3e759230b94ff6c8197ba4c38b8a1eb11e299dc3e5b879c15907db01f9982e728
7
- data.tar.gz: 12e7c094382bc5ea0fe803c2fe7b1aef653102d3082ceb4b9f1068e3d4a1124a571b3b3189030ac5378bcdc3f84cbdf86e3159085460adf565931a2b4b68d419
6
+ metadata.gz: 9c957f3b783684f18c18c3265349c7efa4db3aa12094f573e9d4d9e5a2366f3464e390add7a12734213ca1801c5ef3d0d820a8139fdce47c07ba99e037f71e5f
7
+ data.tar.gz: e6bedee395328db1deed473721231e7a5d678e0bfa2a9b038e3f7de596e99622cc7dff7b4dc2d6db14d2d5734339d01fa9d5a462bfbe4363872afe87c98ecb48
data/.coveralls.yml ADDED
@@ -0,0 +1 @@
1
+ service_name: travis-ci
data/README.md CHANGED
@@ -29,7 +29,7 @@ GeoCombine can be used as a set of rake tasks for cloning, updating, and indexin
29
29
 
30
30
  ```ruby
31
31
  # Create a new ISO19139 object
32
- > iso_metadata = GeoCombine::Iso19139.new('./tmp/edu.stanford.purl/bb/338/jh/0716/iso19139.xml')
32
+ > iso_metadata = GeoCombine::Iso19139.new('./tmp/opengeometadata/edu.stanford.purl/bb/338/jh/0716/iso19139.xml')
33
33
 
34
34
  # Convert it to GeoBlacklight
35
35
  > iso_metadata.to_geoblacklight
@@ -41,13 +41,25 @@ GeoCombine can be used as a set of rake tasks for cloning, updating, and indexin
41
41
  > iso_metadata.to_html
42
42
  ```
43
43
 
44
+ ## Command line ##
45
+
46
+ GeoCombine's tasks can be run either as rake tasks or as standalone executables.
47
+
44
48
  ### Clone all OpenGeoMetadata repositories
45
49
 
46
50
  ```sh
47
51
  $ rake geocombine:clone
48
52
  ```
49
53
 
50
- Will clone all edu.* OpenGeoMetadata repositories into `./tmp`
54
+ ```sh
55
+ $ bundle exec geocombine clone
56
+ ```
57
+
58
+ Will clone all edu.* OpenGeoMetadata repositories into `./tmp/opengeometadata`. Location of the OpenGeoMetadata repositories can be configured using the `OGM_PATH` environment variable.
59
+
60
+ ```sh
61
+ $ OGM_PATH='my/custom/location' rake geocombine:clone
62
+ ```
51
63
 
52
64
  ### Pull all OpenGeoMetadata repositories
53
65
 
@@ -55,7 +67,11 @@ Will clone all edu.* OpenGeoMetadata repositories into `./tmp`
55
67
  $ rake geocombine:pull
56
68
  ```
57
69
 
58
- Runs `git pull origin master` on all cloned repositories in `./tmp`
70
+ ```sh
71
+ $ bundle exec geocombine pull
72
+ ```
73
+
74
+ Runs `git pull origin master` on all cloned repositories in `./tmp/opengeometadata` (or custom path with configured environment variable `OGM_PATH`)
59
75
 
60
76
  ### Index all of the GeoBlacklight documents
61
77
 
@@ -63,7 +79,11 @@ Runs `git pull origin master` on all cloned repositories in `./tmp`
63
79
  $ rake geocombine:index
64
80
  ```
65
81
 
66
- Indexs all of the `geoblacklight.xml` files in cloned repositories to a Solr index running at http://127.0.0.1:8983/solr
82
+ ```sh
83
+ $ bundle exec geocombine index
84
+ ```
85
+
86
+ Indexs all of the `geoblacklight.json` files in cloned repositories to a Solr index running at http://127.0.0.1:8983/solr
67
87
 
68
88
  ## Contributing
69
89
 
data/bin/geocombine ADDED
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'geo_combine/cli'
4
+
5
+ GeoCombine::CLI.start(ARGV)
data/geo_combine.gemspec CHANGED
@@ -20,8 +20,12 @@ Gem::Specification.new do |spec|
20
20
 
21
21
  spec.add_dependency 'rsolr'
22
22
  spec.add_dependency 'nokogiri'
23
+ spec.add_dependency 'json-schema'
24
+ spec.add_dependency 'sanitize'
25
+ spec.add_dependency 'thor'
23
26
 
24
27
  spec.add_development_dependency "bundler", "~> 1.7"
25
28
  spec.add_development_dependency "rake", "~> 10.0"
26
29
  spec.add_development_dependency 'rspec'
30
+ spec.add_development_dependency 'rspec-html-matchers'
27
31
  end
@@ -0,0 +1,25 @@
1
+ require 'thor'
2
+ require 'rake'
3
+
4
+ root = Gem::Specification.find_by_name('geo_combine').gem_dir
5
+ tasks = File.join(root, 'lib/tasks/*.rake')
6
+ Dir.glob(tasks).each { |r| load r }
7
+
8
+ module GeoCombine
9
+ class CLI < Thor
10
+ desc 'clone', 'Clone all OpenGeoMetadata repositories'
11
+ def clone
12
+ Rake::Task['geocombine:clone'].invoke
13
+ end
14
+
15
+ desc 'pull', '"git pull" OpenGeoMetadata repositories'
16
+ def pull
17
+ Rake::Task['geocombine:pull'].invoke
18
+ end
19
+
20
+ desc "index", "Index all of the GeoBlacklight documents"
21
+ def index
22
+ Rake::Task['geocombine:index'].invoke
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,97 @@
1
+ module GeoCombine
2
+ # Data model for ESRI's open data portal metadata
3
+ class EsriOpenData
4
+ include GeoCombine::Formatting
5
+ attr_reader :metadata
6
+
7
+ ##
8
+ # Initializes an EsriOpenData object for parsing
9
+ # @param [String] metadata a valid serialized JSON string from an ESRI Open
10
+ # Data portal
11
+ def initialize(metadata)
12
+ @metadata = JSON.parse(metadata)
13
+ @geometry = @metadata['extent']['coordinates']
14
+ end
15
+
16
+ ##
17
+ # Creates and returns a Geoblacklight schema object from this metadata
18
+ # @return [GeoCombine::Geoblacklight]
19
+ def to_geoblacklight
20
+ GeoCombine::Geoblacklight.new(geoblacklight_terms.to_json)
21
+ end
22
+
23
+ ##
24
+ # Builds a Geoblacklight Schema type hash from Esri Open Data portal
25
+ # metadata
26
+ # @return [Hash]
27
+ def geoblacklight_terms
28
+ {
29
+ uuid: @metadata['id'],
30
+ dc_identifier_s: @metadata['id'],
31
+ dc_title_s: @metadata['name'],
32
+ dc_description_s: sanitize_and_remove_lines(@metadata['description']),
33
+ dc_rights_s: 'Public',
34
+ dct_provenance_s: @metadata['owner'],
35
+ dct_references_s: references,
36
+ georss_box_s: georss_box,
37
+ # layer_id_s is used for describing a layer id for a web serivce (WMS, WFS) but is still a required field
38
+ layer_id_s: '',
39
+ layer_geom_type_s: @metadata['geometry_type'],
40
+ layer_modified_dt: @metadata['updated_at'],
41
+ layer_slug_s: @metadata['id'],
42
+ solr_geom: envelope,
43
+ # solr_year_i: '', No equivalent in Esri Open Data metadata
44
+ dc_subject_sm: @metadata['tags']
45
+ }
46
+ end
47
+
48
+ ##
49
+ # Converts references to json
50
+ # @return [String]
51
+ def references
52
+ references_hash.to_json
53
+ end
54
+
55
+ ##
56
+ # Builds references used for dct_references
57
+ # @return [Hash]
58
+ def references_hash
59
+ {
60
+ 'http://schema.org/url' => @metadata['landing_page'],
61
+ 'http://resources.arcgis.com/en/help/arcgis-rest-api' => @metadata['url']
62
+ }
63
+ end
64
+
65
+ ##
66
+ # Builds a GeoRSS box
67
+ # @return [String]
68
+ def georss_box
69
+ "#{south} #{west} #{north} #{east}"
70
+ end
71
+
72
+ ##
73
+ # Builds a Solr Envelope using CQL syntax
74
+ # @return [String]
75
+ def envelope
76
+ "ENVELOPE(#{west}, #{east}, #{north}, #{south})"
77
+ end
78
+
79
+ private
80
+
81
+ def north
82
+ @geometry[1][1]
83
+ end
84
+
85
+ def south
86
+ @geometry[0][1]
87
+ end
88
+
89
+ def east
90
+ @geometry[1][0]
91
+ end
92
+
93
+ def west
94
+ @geometry[0][0]
95
+ end
96
+ end
97
+ end
@@ -0,0 +1,11 @@
1
+ module GeoCombine
2
+ ##
3
+ # Translation dictionary for mime-type to valid GeoBlacklight-Schema formats
4
+ module Formats
5
+ def formats
6
+ {
7
+ 'application/x-esri-shapefile' => 'Shapefile'
8
+ }
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,29 @@
1
+ module GeoCombine
2
+ ##
3
+ # Mixin used for formatting metadata fields
4
+ module Formatting
5
+ ##
6
+ # Sanitizes html from a text input
7
+ # @param [String] text
8
+ # @return [String]
9
+ def sanitize(text)
10
+ Sanitize.fragment(text)
11
+ end
12
+
13
+ ##
14
+ # Removes line breaks from a text input
15
+ # @param [String] text
16
+ # @return [String]
17
+ def remove_lines(text)
18
+ text.gsub(/\n/, '')
19
+ end
20
+
21
+ ##
22
+ # Sanitizes and removes lines from a text block
23
+ # @param [String] text
24
+ # @return [String]
25
+ def sanitize_and_remove_lines(text)
26
+ remove_lines(sanitize(text))
27
+ end
28
+ end
29
+ end
@@ -1,25 +1,91 @@
1
1
  module GeoCombine
2
- class Geoblacklight < Metadata
2
+ class Geoblacklight
3
+ include GeoCombine::Formats
4
+ include GeoCombine::Subjects
5
+ include GeoCombine::GeometryTypes
6
+
7
+ attr_reader :metadata
8
+
9
+ ##
10
+ # Initializes a GeoBlacklight object
11
+ # @param [String] metadata be a valid JSON string document in
12
+ # GeoBlacklight-Schema
13
+ # @param [Hash] fields enhancements to metadata that are merged with @metadata
14
+ def initialize(metadata, fields = {})
15
+ @metadata = JSON.parse(metadata).merge(fields)
16
+ end
17
+
18
+ ##
19
+ # Calls metadata enhancement methods for each key, value pair in the
20
+ # metadata hash
21
+ def enhance_metadata
22
+ @metadata.each do |key, value|
23
+ translate_formats(key, value)
24
+ enhance_subjects(key, value)
25
+ format_proper_date(key, value)
26
+ fields_should_be_array(key, value)
27
+ translate_geometry_type(key, value)
28
+ end
29
+ end
3
30
 
4
31
  ##
5
32
  # Returns a string of JSON from a GeoBlacklight hash
6
33
  # @return (String)
7
34
  def to_json
8
- to_hash.to_json
35
+ @metadata.to_json
9
36
  end
10
37
 
11
38
  ##
12
- # Returns a hash from a GeoBlacklight object
13
- # @return (Hash)
14
- def to_hash
15
- hash = {}
16
- @metadata.css('field').each do |field|
17
- (hash[field.attributes['name'].value] ||= []) << field.children.text
18
- end
19
- hash.collect do |key, value|
20
- hash[key] = value.count > 1 ? { key => value } : { key => value[0] }
21
- end
22
- hash
39
+ # Validates a GeoBlacklight-Schema json document
40
+ # @return [Boolean]
41
+ def valid?
42
+ schema = JSON.parse(File.read(File.join(File.dirname(__FILE__), '../schema/geoblacklight-schema.json')))
43
+ JSON::Validator.validate!(schema, to_json, validate_schema: true)
44
+ end
45
+
46
+ private
47
+
48
+ ##
49
+ # Enhances the 'dc_format_s' field by translating a format type to a valid
50
+ # GeoBlacklight-Schema format
51
+ def translate_formats(key, value)
52
+ @metadata[key] = formats[value] if key == 'dc_format_s' && formats.include?(value)
53
+ end
54
+
55
+ ##
56
+ # Enhances the 'layer_geom_type_s' field by translating from known types
57
+ def translate_geometry_type(key, value)
58
+ @metadata[key] = geometry_types[value] if key == 'layer_geom_type_s' && geometry_types.include?(value)
59
+ end
60
+
61
+ ##
62
+ # Enhances the 'dc_subject_sm' field by translating subjects to ISO topic
63
+ # categories
64
+ def enhance_subjects(key, value)
65
+ @metadata[key] = value.map do |val|
66
+ if subjects.include?(val)
67
+ subjects[val]
68
+ else
69
+ val
70
+ end
71
+ end if key == 'dc_subject_sm'
72
+ end
73
+
74
+ ##
75
+ # Formats the 'layer_modified_dt' to a valid valid RFC3339 date/time string
76
+ # and ISO8601 (for indexing into Solr)
77
+ def format_proper_date(key, value)
78
+ @metadata[key] = Time.parse(value).utc.iso8601 if key == 'layer_modified_dt'
79
+ end
80
+
81
+ def fields_should_be_array(key, value)
82
+ @metadata[key] = [value] if should_be_array.include?(key) && !value.kind_of?(Array)
83
+ end
84
+
85
+ ##
86
+ # GeoBlacklight-Schema fields that should be type Array
87
+ def should_be_array
88
+ ['dc_creator_sm', 'dc_subject_sm', 'dct_spatial_sm', 'dct_temporal_sm', 'dct_isPartOf_sm']
23
89
  end
24
90
  end
25
91
  end
@@ -0,0 +1,11 @@
1
+ module GeoCombine
2
+ module GeometryTypes
3
+ def geometry_types
4
+ {
5
+ 'esriGeometryPoint' => 'Point',
6
+ 'esriGeometryPolygon' => 'Polygon',
7
+ 'esriGeometryPolyline' => 'Line'
8
+ }
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,29 @@
1
+ module GeoCombine
2
+ ##
3
+ # Translation dictionary to ISO topics
4
+ module Subjects
5
+ def subjects
6
+ {
7
+ 'farming' => 'Farming',
8
+ 'biota' => 'Biology and Ecology',
9
+ 'climatologyMeteorologyAtmosphere' => 'Climatology, Meteorology and Atmosphere',
10
+ 'boundaries' => 'Boundaries',
11
+ 'elevation' => 'Elevation',
12
+ 'environment' => 'Environment',
13
+ 'geoscientificInformation' => 'Geoscientific Information',
14
+ 'health' => 'Health',
15
+ 'imageryBaseMapsEarthCover' => 'Imagery and Base Maps',
16
+ 'intelligenceMilitary' => 'Military',
17
+ 'inlandWaters' => 'Inland Waters',
18
+ 'location' => 'Location',
19
+ 'oceans' => 'Oceans',
20
+ 'planningCadastre' => 'Planning and Cadastral',
21
+ 'structure' => 'Structure',
22
+ 'transportation' => 'Transportation',
23
+ 'utilitiesCommunication' => 'Utilities and Communication',
24
+ 'society' => 'Society',
25
+ 'economy' => 'Economy'
26
+ }
27
+ end
28
+ end
29
+ end
@@ -1,3 +1,3 @@
1
1
  module GeoCombine
2
- VERSION = '0.0.5'
2
+ VERSION = '0.1.0'
3
3
  end
data/lib/geo_combine.rb CHANGED
@@ -1,4 +1,7 @@
1
1
  require 'nokogiri'
2
+ require 'json'
3
+ require 'json-schema'
4
+ require 'sanitize'
2
5
 
3
6
  module GeoCombine
4
7
 
@@ -28,9 +31,12 @@ module GeoCombine
28
31
 
29
32
  ##
30
33
  # Perform an XSLT tranformation on metadata using an object's XSL
31
- # @return [GeoCombine::Geoblacklight] the data transformed into geoblacklight schema, returned as a GeoCombine::Geoblacklight
32
- def to_geoblacklight
33
- GeoCombine::Geoblacklight.new(xsl_geoblacklight.transform(@metadata))
34
+ # @return fields additional GeoBlacklight fields to be passed to
35
+ # GeoCombine::Geoblacklight on its instantiation
36
+ # @return [GeoCombine::Geoblacklight] the data transformed into
37
+ # geoblacklight schema, returned as a GeoCombine::Geoblacklight
38
+ def to_geoblacklight fields = {}
39
+ GeoCombine::Geoblacklight.new(xsl_geoblacklight.apply_to(@metadata), fields)
34
40
  end
35
41
 
36
42
  ##
@@ -42,7 +48,19 @@ module GeoCombine
42
48
  end
43
49
  end
44
50
 
51
+ # Require translation mixins
52
+ require 'geo_combine/formats'
53
+ require 'geo_combine/subjects'
54
+ require 'geo_combine/geometry_types'
55
+
56
+ # Require helper mixins
57
+ require 'geo_combine/formatting'
58
+
59
+ # Require additional classes
45
60
  require 'geo_combine/fgdc'
46
61
  require 'geo_combine/geoblacklight'
47
62
  require 'geo_combine/iso19139'
63
+ require 'geo_combine/esri_open_data'
64
+
65
+ # Require gem files
48
66
  require 'geo_combine/version'
@@ -0,0 +1,169 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-04/schema#",
3
+ "description": "Schema for GeoBlacklight as implemented for Solr 4.10+. See http://journal.code4lib.org/articles/9710 for more details. Note that the Solr schema uses dynamic typing based on the suffix of the field name. For example, _s denotes a string where _sm denotes a multi-valued string (array of strings).",
4
+ "id": "http://geoblacklight.org/schema",
5
+ "title": "GeoBlacklight Schema",
6
+ "required": [
7
+ "uuid",
8
+ "dc_identifier_s",
9
+ "dc_title_s",
10
+ "dc_description_s",
11
+ "dc_rights_s",
12
+ "dct_provenance_s",
13
+ "dct_references_s",
14
+ "georss_box_s",
15
+ "layer_id_s",
16
+ "layer_geom_type_s",
17
+ "layer_modified_dt",
18
+ "layer_slug_s",
19
+ "solr_geom",
20
+ "solr_year_i"
21
+ ],
22
+ "type": "object",
23
+ "properties": {
24
+ "uuid": {
25
+ "type": "string",
26
+ "description": "Unique identifier for layer that is globally unique."
27
+
28
+ },
29
+ "dc_identifier_s": {
30
+ "type": "string",
31
+ "description": "Unique identifier for layer. May be same as UUID but may be an alternate identifier."
32
+ },
33
+ "dc_title_s": {
34
+ "type": "string",
35
+ "description": "Title for the layer."
36
+
37
+ },
38
+ "dc_description_s": {
39
+ "type": "string",
40
+ "description": "Description for the layer."
41
+
42
+ },
43
+ "dc_rights_s": {
44
+ "type": "string",
45
+ "enum": ["Public", "Restricted"],
46
+ "description": "Access rights for the layer."
47
+
48
+ },
49
+ "dct_provenance_s": {
50
+ "type": "string",
51
+ "description": "Institution who holds the layer."
52
+
53
+ },
54
+ "dct_references_s": {
55
+ "type": "string",
56
+ "description": "JSON hash for external resources, where each key is a URI for the protocol or format and the value is the URL to the resource."
57
+
58
+ },
59
+ "georss_box_s": {
60
+ "type": "string",
61
+ "description": "Bounding box as maximum values for S W N E. Example: 12.6 -119.4 19.9 84.8."
62
+
63
+ },
64
+ "layer_id_s": {
65
+ "type": "string",
66
+ "description": "The complete identifier for the layer via WMS/WFS/WCS protocol. Example: druid:vr593vj7147."
67
+
68
+ },
69
+ "layer_geom_type_s": {
70
+ "type": "string",
71
+ "enum": ["Point", "Line", "Polygon", "Raster", "Scanned Map", "Mixed"],
72
+ "description": "Geometry type for layer data, using controlled vocabulary."
73
+ },
74
+ "layer_modified_dt": {
75
+ "type": "string",
76
+ "format": "date-time",
77
+ "description": "Last modification date for the metadata record, using XML Schema dateTime format (YYYY-MM-DDThh:mm:ssZ)."
78
+ },
79
+ "layer_slug_s": {
80
+ "type": "string",
81
+ "description": "Unique identifier visible to the user, used for Permalinks. Example: stanford-vr593vj7147."
82
+
83
+ },
84
+ "solr_geom": {
85
+ "type": "string",
86
+ "pattern": "ENVELOPE(.*,.*,.*,.*)",
87
+ "description": "Derived from georss_polygon_s or georss_box_s. Shape of the layer as a ENVELOPE WKT using W E N S. Example: ENVELOPE(76.76, 84.76, 19.91, 12.62). Note that this field is indexed as a Solr spatial (RPT) field."
88
+
89
+ },
90
+ "solr_year_i": {
91
+ "type": "integer",
92
+ "description": "Derived from dct_temporal_sm. Year for which layer is valid and only a single value. Example: 1989. Note that this field is indexed as a Solr numeric field."
93
+
94
+ },
95
+ "dc_creator_sm": {
96
+ "type": "array",
97
+ "items": {
98
+ "type": "string"
99
+ },
100
+ "description": "Author(s). Example: George Washington, Thomas Jefferson."
101
+
102
+ },
103
+ "dc_format_s": {
104
+ "type": "string",
105
+ "enum": ["Shapefile", "GeoTIFF", "ArcGRID"],
106
+ "description": "File format for the layer, using a controlled vocabulary."
107
+
108
+ },
109
+ "dc_language_s": {
110
+ "type": "string",
111
+ "description": "Language for the layer. Example: English."
112
+
113
+ },
114
+ "dc_publisher_s": {
115
+ "type": "string",
116
+ "description": "Publisher. Example: ML InfoMap."
117
+
118
+ },
119
+ "dc_subject_sm": {
120
+ "type": "array",
121
+ "items": {
122
+ "type": "string"
123
+ },
124
+ "description": "Subjects, preferrably in a controlled vocabulary. Examples: Census, Human settlements."
125
+
126
+ },
127
+ "dc_type_s": {
128
+ "type": "string",
129
+ "enum": ["Dataset", "Image", "PhysicalObject"],
130
+ "description": "Resource type, using DCMI Type Vocabulary."
131
+
132
+ },
133
+ "dct_spatial_sm": {
134
+ "type": "array",
135
+ "items": {
136
+ "type": "string"
137
+ },
138
+ "description": "Spatial coverage and place names, preferrably in a controlled vocabulary. Example: 'Paris, France'."
139
+
140
+ },
141
+ "dct_temporal_sm": {
142
+ "type": "array",
143
+ "items": {
144
+ "type": "string"
145
+ },
146
+ "description": "Temporal coverage, typically years or dates. Example: 1989, circa 2010, 2007-2009. Note that this field is not in a specific date format."
147
+
148
+ },
149
+ "dct_issued_dt": {
150
+ "type": "string",
151
+ "format": "date-time",
152
+ "description": "Issued date for the layer, using XML Schema dateTime format (YYYY-MM-DDThh:mm:ssZ)."
153
+
154
+ },
155
+ "dct_isPartOf_sm": {
156
+ "type": "array",
157
+ "items": {
158
+ "type": "string"
159
+ },
160
+ "description": "Holding dataset for the layer, such as the name of a collection. Example: Village Maps of India."
161
+
162
+ },
163
+ "georss_point_s": {
164
+ "type": "string",
165
+ "description": "Point representation for layer as y, x - i.e., centroid. Example: 12.6 -119.4."
166
+
167
+ }
168
+ }
169
+ }
@@ -4,32 +4,35 @@ require 'json'
4
4
  require 'rsolr'
5
5
 
6
6
  namespace :geocombine do
7
+ ogm_path = ENV['OGM_PATH'] || 'tmp/opengeometadata'
8
+ solr_url = ENV['SOLR_URL'] || 'http://127.0.0.1:8983/solr/blacklight-core'
7
9
  desc 'Clone all OpenGeoMetadata repositories'
8
10
  task :clone do
9
11
  ogm_api_uri = URI('https://api.github.com/orgs/opengeometadata/repos')
10
12
  ogm_repos = JSON.parse(Net::HTTP.get(ogm_api_uri)).map{ |repo| repo['git_url']}
11
13
  ogm_repos.each do |repo|
12
- if repo =~ /^git:\/\/github.com\/OpenGeoMetadata\/edu.*/
13
- system "cd tmp && git clone #{repo}"
14
+ if repo =~ /^git:\/\/github.com\/OpenGeoMetadata\/(edu|org|uk)\..*/
15
+ system "mkdir -p #{ogm_path} && cd #{ogm_path} && git clone #{repo}"
14
16
  end
15
17
  end
16
18
  end
17
19
  desc '"git pull" OpenGeoMetadata repositories'
18
20
  task :pull do
19
- Dir.glob('tmp/*').map{ |dir| system "cd #{dir} && git pull origin master" if dir =~ /.*edu.*./ }
21
+ Dir.glob("#{ogm_path}/*").map{ |dir| system "cd #{dir} && git pull origin master" if dir =~ /.*(edu|org|uk)\..*./ }
20
22
  end
21
23
  desc 'Index all of the GeoBlacklight documents'
22
24
  task :index do
23
- solr = RSolr.connect :url => 'http://127.0.0.1:8983/solr'
24
- Find.find('tmp') do |path|
25
- if path =~ /.*geoblacklight.xml$/
26
- doc = File.read(path)
27
- begin
28
- solr.update data: doc
29
- solr.commit
30
- rescue RSolr::Error::Http => error
31
- puts error
32
- end
25
+ solr = RSolr.connect :url => solr_url
26
+ Find.find(ogm_path) do |path|
27
+ next unless path =~ /.*geoblacklight.json$/
28
+ doc = JSON.parse(File.read(path))
29
+ begin
30
+ solr.update params: { commitWithin: 500, overwrite: true },
31
+ data: [doc].to_json,
32
+ headers: { 'Content-Type' => 'application/json' }
33
+
34
+ rescue RSolr::Error::Http => error
35
+ puts error
33
36
  end
34
37
  end
35
38
  end