gis_scraper 0.1.2.pre → 0.1.3.pre
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.travis.yml +17 -0
- data/README.md +103 -7
- data/gis_scraper.gemspec +2 -0
- data/lib/gis_scraper/layer.rb +76 -8
- data/lib/gis_scraper/version.rb +1 -1
- data/lib/gis_scraper.rb +4 -1
- metadata +18 -6
- data/exe/gisget +0 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d095719f299da91d96069b76373e675a37a81842
|
4
|
+
data.tar.gz: 4d35174846e8bb601151e5a862e4ad39f40ee7e0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 69eb9967e11ff58f9de54feb8032c6097a6e7855e817acf62451b08c3df378c1c7a7c7e579f790c35fb0dacf4a1db82d13eb6d3b1c7221a3b1cdb013be705e34
|
7
|
+
data.tar.gz: b114a1f53b9fa97a0960e819f2ac26ae010dad9d417782451687f72f7884a4f950e0562f76ac27de09108809cda00d5c83cbe05d272d980fd393a09317869776
|
data/.gitignore
CHANGED
data/.travis.yml
CHANGED
@@ -1,5 +1,22 @@
|
|
1
1
|
language: ruby
|
2
2
|
|
3
|
+
addons:
|
4
|
+
postgresql: "9.4"
|
5
|
+
|
6
|
+
services:
|
7
|
+
- postgresql
|
8
|
+
|
9
|
+
before_script:
|
10
|
+
- psql -c 'create database travis_ci_test;' -U postgres
|
11
|
+
- psql -U postgres -c 'create extension postgis;'
|
12
|
+
|
13
|
+
before_install:
|
14
|
+
- gem update bundler
|
15
|
+
# http://askubuntu.com/questions/206593/how-to-install-rgdal-on-ubuntu-12-10
|
16
|
+
- sudo apt-get update -qq
|
17
|
+
- sudo apt-get install -y aptitude
|
18
|
+
- sudo aptitude install -y libgdal-dev libproj-dev
|
19
|
+
|
3
20
|
rvm:
|
4
21
|
- 2.0.0
|
5
22
|
- 2.1.6
|
data/README.md
CHANGED
@@ -2,21 +2,117 @@
|
|
2
2
|
[![Gem Version](https://badge.fury.io/rb/gis_scraper.svg)](http://badge.fury.io/rb/gis_scraper)
|
3
3
|
[![Build status](https://secure.travis-ci.org/MatzFan/gis_scraper.svg)](http://travis-ci.org/MatzFan/gis_scraper)
|
4
4
|
|
5
|
-
Utility to recursively scrape ArcGIS MapServer data using REST API.
|
5
|
+
Utility to recursively scrape ArcGIS MapServer data using the ArcGIS REST API.
|
6
6
|
|
7
|
-
ArcGIS MapServer REST queries are limited to 1,000 objects in some cases. This tool makes repeated calls until all data for a given layer is extracted.
|
7
|
+
ArcGIS MapServer REST queries are limited to 1,000 objects in some cases. This tool makes repeated calls until all data for a given layer (and all sub-layers) is extracted. Output can be JSON file format or data may be written directly to Postgres database tables in PostGIS format. GIS clients - e.g. QGIS - can be configured to use vector layer data from PostGIS sources.
|
8
8
|
|
9
|
-
|
9
|
+
## Requirements
|
10
10
|
|
11
|
-
|
11
|
+
Ruby 2.0 or above - see Travis badge for tested Ruby versions.
|
12
12
|
|
13
|
+
A Postgres database with the PostGIS extension enabled for database export.
|
14
|
+
|
15
|
+
For data import to a database [GDAL](http://gdal.org) must be installed and specifically the [ogr2ogr](http://www.gdal.org/ogr2ogr.html) executable must be available in your path.
|
16
|
+
|
17
|
+
## Known Limitations
|
18
|
+
|
19
|
+
*NIX systems only - Linux/Mac OS X/Linux. ArcGIS MapServer data is readable directly by ArcGIS Windows clients.
|
20
|
+
|
21
|
+
The following esri geometry types are supported:
|
22
|
+
|
23
|
+
- esriGeometryPoint, esriGeometryMultipoint, esriGeometryLine, esriGeometryPolyline, esriGeometryPolygon
|
24
|
+
|
25
|
+
## Installation
|
26
|
+
|
27
|
+
Add this line to your application's Gemfile:
|
28
|
+
|
29
|
+
```ruby
|
30
|
+
gem 'gis_scraper'
|
31
|
+
```
|
32
|
+
|
33
|
+
And then execute:
|
34
|
+
|
35
|
+
$ bundle
|
36
|
+
|
37
|
+
Or install it yourself as:
|
38
|
+
|
39
|
+
$ gem install gis_scraper
|
40
|
+
|
41
|
+
## Configuration
|
42
|
+
|
43
|
+
Configuration options may be set via a hash or specified in a Yaml file. The following options are available:
|
44
|
+
|
45
|
+
- ```:threads``` Scraping is multi-threaded. The number of threads to use may be set with this option (default: 8)
|
46
|
+
- ```:output_path``` For JSON output, the path used to write files to (default: '~/Desktop')
|
47
|
+
|
48
|
+
The following options are used to connect to a database:
|
49
|
+
|
50
|
+
- ```:host``` (default: 'localhost')
|
51
|
+
- ```:port``` (default: 5432)
|
52
|
+
- ```:dbname``` (default: 'postgres')
|
53
|
+
- ```:user``` (default: 'postgres')
|
54
|
+
- ```:password``` (default: nil)
|
55
|
+
|
56
|
+
These additional options are available when using output to a database and are applied to the ```ogr2ogr``` command:
|
57
|
+
|
58
|
+
- ```:srs``` Used to overide the source spacial reference system. Currently only EPSG string format is valid - e.g. 'EPSG:3109' (default: no overide)
|
59
|
+
|
60
|
+
**To set via a hash**
|
61
|
+
|
62
|
+
```Ruby
|
63
|
+
GisScraper.configure(:threads => 16)
|
64
|
+
```
|
65
|
+
|
66
|
+
**Using a Yaml configuration file**
|
67
|
+
|
68
|
+
```Ruby
|
69
|
+
GisScraper.configure_with 'path-to-Yaml-file'
|
70
|
+
```
|
71
|
+
|
72
|
+
```Ruby
|
73
|
+
GisScraper.config # returns the hash of configuration values
|
74
|
+
```
|
75
|
+
|
76
|
+
## Usage
|
77
|
+
|
78
|
+
A Layer object must be instantiated with one required arg - a MapServer/Layer URL (ending in an integer representing the layer number). Example:
|
79
|
+
|
80
|
+
```
|
81
|
+
layer = Layer.new('http://gps.digimap.gg/arcgis/rest/services/StatesOfJersey/JerseyMappingOL/MapServer/0')
|
82
|
+
```
|
83
|
+
|
84
|
+
An optional second argument for the output path for JSON files may be specified. If so this overides the configuration option. Example:
|
85
|
+
|
86
|
+
```
|
87
|
+
layer = Layer.new('http://gps.digimap.gg/arcgis/rest/services/StatesOfJersey/JerseyMappingOL/MapServer/0', '~/Desktop')
|
13
88
|
```
|
14
|
-
|
89
|
+
|
90
|
+
**JSON output**
|
91
|
+
|
92
|
+
```
|
93
|
+
layer.output_json
|
15
94
|
```
|
16
95
|
|
17
96
|
If the layer is type 'Feature Layer', a single file of JSON data will be saved (named the same as the layer). If the layer is type 'Group Layer', the sub-group structure is traversed recursively thus: Directories for each sub-group layer are created and JSON data files for each constituent feature layer written to them.
|
18
97
|
|
19
|
-
**
|
98
|
+
**Output to a database**
|
99
|
+
|
100
|
+
Valid database config options must be set. The following command will convert JSON files, create tables for each layer (& sub-layers, if any) and import the data.
|
101
|
+
|
102
|
+
```
|
103
|
+
layer.output_to_db
|
104
|
+
```
|
105
|
+
|
106
|
+
## Specification and Tests
|
107
|
+
|
108
|
+
For the full specification clone this repo and run:
|
109
|
+
|
110
|
+
`rake spec`
|
111
|
+
|
112
|
+
## Contributing
|
113
|
+
|
114
|
+
Bug reports, pull requests (and feature requests) are welcome on GitHub at https://github.com/MatzFan/gis_scraper.
|
20
115
|
|
21
|
-
|
116
|
+
## License
|
22
117
|
|
118
|
+
The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses)
|
data/gis_scraper.gemspec
CHANGED
@@ -12,6 +12,7 @@ Gem::Specification.new do |s|
|
|
12
12
|
|
13
13
|
s.summary = %q{Scrapes ArcGIS data from MapServer REST API}
|
14
14
|
s.description = %q{Scrapes ArcGIS data from MapServer REST API}
|
15
|
+
s.required_ruby_version = '>= 2.0'
|
15
16
|
s.license = "MIT"
|
16
17
|
|
17
18
|
s.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(spec)/}) }
|
@@ -25,4 +26,5 @@ Gem::Specification.new do |s|
|
|
25
26
|
|
26
27
|
s.add_runtime_dependency 'mechanize', '~> 2.7'
|
27
28
|
s.add_runtime_dependency 'parallel', '~> 1.6'
|
29
|
+
s.add_development_dependency 'pg', '~> 0.18'
|
28
30
|
end
|
data/lib/gis_scraper/layer.rb
CHANGED
@@ -12,6 +12,8 @@ class Layer
|
|
12
12
|
end
|
13
13
|
|
14
14
|
class UnknownLayerType < StandardError; end
|
15
|
+
class NoDatabase < StandardError; end
|
16
|
+
class OgrMissing < StandardError; end
|
15
17
|
|
16
18
|
attr_reader :type, :id, :name
|
17
19
|
|
@@ -21,8 +23,21 @@ class Layer
|
|
21
23
|
'Annotation SubLayer']
|
22
24
|
QUERYABLE = ['Feature Layer', 'Annotation Layer']
|
23
25
|
|
24
|
-
|
25
|
-
|
26
|
+
CONN = [:host, :port, :dbname, :user, :password] # PG connection options
|
27
|
+
|
28
|
+
GEOM_TYPES = {esriGeometryPoint: 'POINT',
|
29
|
+
esriGeometryMultipoint: 'MULTIPOINT',
|
30
|
+
esriGeometryLine: 'LINESTRING',
|
31
|
+
esriGeometryPolyline: 'MULTILINESTRING',
|
32
|
+
esriGeometryPolygon: 'MULTIPOLYGON'}
|
33
|
+
|
34
|
+
|
35
|
+
OGR2OGR = 'ogr2ogr -f "PostgreSQL" PG:'
|
36
|
+
|
37
|
+
def initialize(url, output_path = nil)
|
38
|
+
@conn_hash = CONN.zip(CONN.map { |key| GisScraper.config[key] }).to_h
|
39
|
+
@url = url
|
40
|
+
@output_path = output_path || config_path
|
26
41
|
@ms_url = ms_url # map server url ending '../MapServer'
|
27
42
|
@id = id
|
28
43
|
@agent = Mechanize.new
|
@@ -33,12 +48,32 @@ class Layer
|
|
33
48
|
@name = name
|
34
49
|
end
|
35
50
|
|
36
|
-
def
|
51
|
+
def output_json
|
37
52
|
QUERYABLE.any? { |l| @type == l } ? write_json_files : process_sub_layers
|
38
53
|
end
|
39
54
|
|
55
|
+
def output_to_db
|
56
|
+
raise OgrMissing.new, 'ogr2ogr missing, is GDAL installed?' if !ogr2ogr?
|
57
|
+
raise NoDatabase.new, "No db connection: #{@conn_hash.inspect}" if !db?
|
58
|
+
@output_path = 'tmp' # write all files to the Gem's tmp dir
|
59
|
+
output_json
|
60
|
+
write_json_files_to_db_tables
|
61
|
+
end
|
62
|
+
|
40
63
|
private
|
41
64
|
|
65
|
+
def db?
|
66
|
+
PG.connect(@conn_hash) rescue nil
|
67
|
+
end
|
68
|
+
|
69
|
+
def ogr2ogr?
|
70
|
+
`ogr2ogr --version` rescue nil
|
71
|
+
end
|
72
|
+
|
73
|
+
def config_path
|
74
|
+
File.expand_path GisScraper.config[:output_path]
|
75
|
+
end
|
76
|
+
|
42
77
|
def ms_url
|
43
78
|
@url.split('/')[0..-2].join('/')
|
44
79
|
end
|
@@ -78,20 +113,53 @@ class Layer
|
|
78
113
|
end
|
79
114
|
|
80
115
|
def write_json_files
|
81
|
-
File.write "#{@
|
116
|
+
File.write "#{@output_path}/#{@name}.json", json_data("#{@ms_url}/#{@id}")
|
117
|
+
end
|
118
|
+
|
119
|
+
def write_json_files_to_db_tables
|
120
|
+
files.each do |f|
|
121
|
+
`#{OGR2OGR}"#{conn}" "#{f}" -nln #{base(f)} #{srs} -nlt #{geom(f)}`
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
def geom(file)
|
126
|
+
esri = esri_geom(file)
|
127
|
+
GEOM_TYPES[esri.to_sym] || raise("Unknown geometry type: '#{esri}'")
|
128
|
+
end
|
129
|
+
|
130
|
+
def esri_geom(file)
|
131
|
+
JSON.parse(File.read(file))['geometryType']
|
132
|
+
end
|
133
|
+
|
134
|
+
def srs
|
135
|
+
return '' unless GisScraper.config[:srs]
|
136
|
+
"-a_srs #{GisScraper.config[:srs]}" || ''
|
137
|
+
end
|
138
|
+
|
139
|
+
def base(full_file_name)
|
140
|
+
full_file_name.split('/').last[0..-6].downcase
|
141
|
+
end
|
142
|
+
|
143
|
+
def files
|
144
|
+
Dir.glob('tmp/**/*.json')
|
145
|
+
end
|
146
|
+
|
147
|
+
def conn
|
148
|
+
host, port, db, user, pwd = *@conn_hash.values
|
149
|
+
"host=#{host} port=#{port} dbname=#{db} user=#{user} password=#{pwd}"
|
82
150
|
end
|
83
151
|
|
84
152
|
def process_sub_layers
|
85
153
|
sub_layer_id_names.each do |hash|
|
86
154
|
name, id = hash['name'], hash['id']
|
87
|
-
path = "#{@
|
88
|
-
|
155
|
+
path = "#{@output_path}/#{name}"
|
156
|
+
recurse_json sub_layer(id, path), path
|
89
157
|
end
|
90
158
|
end
|
91
159
|
|
92
|
-
def
|
160
|
+
def recurse_json(layer, dir)
|
93
161
|
FileUtils.mkdir dir
|
94
|
-
layer.
|
162
|
+
layer.output_json
|
95
163
|
end
|
96
164
|
|
97
165
|
def sub_layer(id, path)
|
data/lib/gis_scraper/version.rb
CHANGED
data/lib/gis_scraper.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'yaml'
|
2
2
|
require 'mechanize'
|
3
3
|
require 'parallel'
|
4
|
+
require 'pg'
|
4
5
|
|
5
6
|
require 'gis_scraper/version'
|
6
7
|
require 'gis_scraper/feature_scraper'
|
@@ -9,7 +10,9 @@ require 'gis_scraper/layer'
|
|
9
10
|
# stackoverflow.com/questions/6233124/where-to-place-access-config-file-in-gem
|
10
11
|
module GisScraper
|
11
12
|
|
12
|
-
@config = {threads: 8
|
13
|
+
@config = {threads: 8, output_path: '~/Desktop',
|
14
|
+
host: 'localhost', port: 5432, dbname: 'postgres', user: 'postgres', password: nil,
|
15
|
+
srs: nil}
|
13
16
|
@valid_keys = @config.keys
|
14
17
|
|
15
18
|
def self.configure(opts = {})
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gis_scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3.pre
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Bruce Steedman
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-12-
|
11
|
+
date: 2015-12-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -80,11 +80,24 @@ dependencies:
|
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '1.6'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: pg
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0.18'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0.18'
|
83
97
|
description: Scrapes ArcGIS data from MapServer REST API
|
84
98
|
email:
|
85
99
|
- bruce.steedman@gmail.com
|
86
|
-
executables:
|
87
|
-
- gisget
|
100
|
+
executables: []
|
88
101
|
extensions: []
|
89
102
|
extra_rdoc_files: []
|
90
103
|
files:
|
@@ -97,7 +110,6 @@ files:
|
|
97
110
|
- Rakefile
|
98
111
|
- bin/console
|
99
112
|
- bin/setup
|
100
|
-
- exe/gisget
|
101
113
|
- gis_scraper.gemspec
|
102
114
|
- lib/gis_scraper.rb
|
103
115
|
- lib/gis_scraper/feature_scraper.rb
|
@@ -115,7 +127,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
115
127
|
requirements:
|
116
128
|
- - ">="
|
117
129
|
- !ruby/object:Gem::Version
|
118
|
-
version: '0'
|
130
|
+
version: '2.0'
|
119
131
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
120
132
|
requirements:
|
121
133
|
- - ">"
|