gis_scraper 0.1.2.pre → 0.1.3.pre
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.travis.yml +17 -0
- data/README.md +103 -7
- data/gis_scraper.gemspec +2 -0
- data/lib/gis_scraper/layer.rb +76 -8
- data/lib/gis_scraper/version.rb +1 -1
- data/lib/gis_scraper.rb +4 -1
- metadata +18 -6
- data/exe/gisget +0 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d095719f299da91d96069b76373e675a37a81842
|
4
|
+
data.tar.gz: 4d35174846e8bb601151e5a862e4ad39f40ee7e0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 69eb9967e11ff58f9de54feb8032c6097a6e7855e817acf62451b08c3df378c1c7a7c7e579f790c35fb0dacf4a1db82d13eb6d3b1c7221a3b1cdb013be705e34
|
7
|
+
data.tar.gz: b114a1f53b9fa97a0960e819f2ac26ae010dad9d417782451687f72f7884a4f950e0562f76ac27de09108809cda00d5c83cbe05d272d980fd393a09317869776
|
data/.gitignore
CHANGED
data/.travis.yml
CHANGED
@@ -1,5 +1,22 @@
|
|
1
1
|
language: ruby
|
2
2
|
|
3
|
+
addons:
|
4
|
+
postgresql: "9.4"
|
5
|
+
|
6
|
+
services:
|
7
|
+
- postgresql
|
8
|
+
|
9
|
+
before_script:
|
10
|
+
- psql -c 'create database travis_ci_test;' -U postgres
|
11
|
+
- psql -U postgres -c 'create extension postgis;'
|
12
|
+
|
13
|
+
before_install:
|
14
|
+
- gem update bundler
|
15
|
+
# http://askubuntu.com/questions/206593/how-to-install-rgdal-on-ubuntu-12-10
|
16
|
+
- sudo apt-get update -qq
|
17
|
+
- sudo apt-get install -y aptitude
|
18
|
+
- sudo aptitude install -y libgdal-dev libproj-dev
|
19
|
+
|
3
20
|
rvm:
|
4
21
|
- 2.0.0
|
5
22
|
- 2.1.6
|
data/README.md
CHANGED
@@ -2,21 +2,117 @@
|
|
2
2
|
[](http://badge.fury.io/rb/gis_scraper)
|
3
3
|
[](http://travis-ci.org/MatzFan/gis_scraper)
|
4
4
|
|
5
|
-
Utility to recursively scrape ArcGIS MapServer data using REST API.
|
5
|
+
Utility to recursively scrape ArcGIS MapServer data using the ArcGIS REST API.
|
6
6
|
|
7
|
-
ArcGIS MapServer REST queries are limited to 1,000 objects in some cases. This tool makes repeated calls until all data for a given layer is extracted.
|
7
|
+
ArcGIS MapServer REST queries are limited to 1,000 objects in some cases. This tool makes repeated calls until all data for a given layer (and all sub-layers) is extracted. Output can be JSON file format or data may be written directly to Postgres database tables in PostGIS format. GIS clients - e.g. QGIS - can be configured to use vector layer data from PostGIS sources.
|
8
8
|
|
9
|
-
|
9
|
+
## Requirements
|
10
10
|
|
11
|
-
|
11
|
+
Ruby 2.0 or above - see Travis badge for tested Ruby versions.
|
12
12
|
|
13
|
+
A Postgres database with the PostGIS extension enabled for database export.
|
14
|
+
|
15
|
+
For data import to a database [GDAL](http://gdal.org) must be installed and specifically the [ogr2ogr](http://www.gdal.org/ogr2ogr.html) executable must be available in your path.
|
16
|
+
|
17
|
+
## Known Limitations
|
18
|
+
|
19
|
+
*NIX systems only - Linux/Mac OS X/Linux. ArcGIS MapServer data is readable directly by ArcGIS Windows clients.
|
20
|
+
|
21
|
+
The following esri geometry types are supported:
|
22
|
+
|
23
|
+
- esriGeometryPoint, esriGeometryMultipoint, esriGeometryLine, esriGeometryPolyline, esriGeometryPolygon
|
24
|
+
|
25
|
+
## Installation
|
26
|
+
|
27
|
+
Add this line to your application's Gemfile:
|
28
|
+
|
29
|
+
```ruby
|
30
|
+
gem 'gis_scraper'
|
31
|
+
```
|
32
|
+
|
33
|
+
And then execute:
|
34
|
+
|
35
|
+
$ bundle
|
36
|
+
|
37
|
+
Or install it yourself as:
|
38
|
+
|
39
|
+
$ gem install gis_scraper
|
40
|
+
|
41
|
+
## Configuration
|
42
|
+
|
43
|
+
Configuration options may be set via a hash or specified in a Yaml file. The following options are available:
|
44
|
+
|
45
|
+
- ```:threads``` Scraping is multi-threaded. The number of threads to use may be set with this option (default: 8)
|
46
|
+
- ```:output_path``` For JSON output, the path used to write files to (default: '~/Desktop')
|
47
|
+
|
48
|
+
The following options are used to connect to a database:
|
49
|
+
|
50
|
+
- ```:host``` (default: 'localhost')
|
51
|
+
- ```:port``` (default: 5432)
|
52
|
+
- ```:dbname``` (default: 'postgres')
|
53
|
+
- ```:user``` (default: 'postgres')
|
54
|
+
- ```:password``` (default: nil)
|
55
|
+
|
56
|
+
These additional options are available when using output to a database and are applied to the ```ogr2ogr``` command:
|
57
|
+
|
58
|
+
- ```:srs``` Used to overide the source spacial reference system. Currently only EPSG string format is valid - e.g. 'EPSG:3109' (default: no overide)
|
59
|
+
|
60
|
+
**To set via a hash**
|
61
|
+
|
62
|
+
```Ruby
|
63
|
+
GisScraper.configure(:threads => 16)
|
64
|
+
```
|
65
|
+
|
66
|
+
**Using a Yaml configuration file**
|
67
|
+
|
68
|
+
```Ruby
|
69
|
+
GisScraper.configure_with 'path-to-Yaml-file'
|
70
|
+
```
|
71
|
+
|
72
|
+
```Ruby
|
73
|
+
GisScraper.config # returns the hash of configuration values
|
74
|
+
```
|
75
|
+
|
76
|
+
## Usage
|
77
|
+
|
78
|
+
A Layer object must be instantiated with one required arg - a MapServer/Layer URL (ending in an integer representing the layer number). Example:
|
79
|
+
|
80
|
+
```
|
81
|
+
layer = Layer.new('http://gps.digimap.gg/arcgis/rest/services/StatesOfJersey/JerseyMappingOL/MapServer/0')
|
82
|
+
```
|
83
|
+
|
84
|
+
An optional second argument for the output path for JSON files may be specified. If so this overides the configuration option. Example:
|
85
|
+
|
86
|
+
```
|
87
|
+
layer = Layer.new('http://gps.digimap.gg/arcgis/rest/services/StatesOfJersey/JerseyMappingOL/MapServer/0', '~/Desktop')
|
13
88
|
```
|
14
|
-
|
89
|
+
|
90
|
+
**JSON output**
|
91
|
+
|
92
|
+
```
|
93
|
+
layer.output_json
|
15
94
|
```
|
16
95
|
|
17
96
|
If the layer is type 'Feature Layer', a single file of JSON data will be saved (named the same as the layer). If the layer is type 'Group Layer', the sub-group structure is traversed recursively thus: Directories for each sub-group layer are created and JSON data files for each constituent feature layer written to them.
|
18
97
|
|
19
|
-
**
|
98
|
+
**Output to a database**
|
99
|
+
|
100
|
+
Valid database config options must be set. The following command will convert JSON files, create tables for each layer (& sub-layers, if any) and import the data.
|
101
|
+
|
102
|
+
```
|
103
|
+
layer.output_to_db
|
104
|
+
```
|
105
|
+
|
106
|
+
## Specification and Tests
|
107
|
+
|
108
|
+
For the full specification clone this repo and run:
|
109
|
+
|
110
|
+
`rake spec`
|
111
|
+
|
112
|
+
## Contributing
|
113
|
+
|
114
|
+
Bug reports, pull requests (and feature requests) are welcome on GitHub at https://github.com/MatzFan/gis_scraper.
|
20
115
|
|
21
|
-
|
116
|
+
## License
|
22
117
|
|
118
|
+
The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses)
|
data/gis_scraper.gemspec
CHANGED
@@ -12,6 +12,7 @@ Gem::Specification.new do |s|
|
|
12
12
|
|
13
13
|
s.summary = %q{Scrapes ArcGIS data from MapServer REST API}
|
14
14
|
s.description = %q{Scrapes ArcGIS data from MapServer REST API}
|
15
|
+
s.required_ruby_version = '>= 2.0'
|
15
16
|
s.license = "MIT"
|
16
17
|
|
17
18
|
s.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(spec)/}) }
|
@@ -25,4 +26,5 @@ Gem::Specification.new do |s|
|
|
25
26
|
|
26
27
|
s.add_runtime_dependency 'mechanize', '~> 2.7'
|
27
28
|
s.add_runtime_dependency 'parallel', '~> 1.6'
|
29
|
+
s.add_development_dependency 'pg', '~> 0.18'
|
28
30
|
end
|
data/lib/gis_scraper/layer.rb
CHANGED
@@ -12,6 +12,8 @@ class Layer
|
|
12
12
|
end
|
13
13
|
|
14
14
|
class UnknownLayerType < StandardError; end
|
15
|
+
class NoDatabase < StandardError; end
|
16
|
+
class OgrMissing < StandardError; end
|
15
17
|
|
16
18
|
attr_reader :type, :id, :name
|
17
19
|
|
@@ -21,8 +23,21 @@ class Layer
|
|
21
23
|
'Annotation SubLayer']
|
22
24
|
QUERYABLE = ['Feature Layer', 'Annotation Layer']
|
23
25
|
|
24
|
-
|
25
|
-
|
26
|
+
CONN = [:host, :port, :dbname, :user, :password] # PG connection options
|
27
|
+
|
28
|
+
GEOM_TYPES = {esriGeometryPoint: 'POINT',
|
29
|
+
esriGeometryMultipoint: 'MULTIPOINT',
|
30
|
+
esriGeometryLine: 'LINESTRING',
|
31
|
+
esriGeometryPolyline: 'MULTILINESTRING',
|
32
|
+
esriGeometryPolygon: 'MULTIPOLYGON'}
|
33
|
+
|
34
|
+
|
35
|
+
OGR2OGR = 'ogr2ogr -f "PostgreSQL" PG:'
|
36
|
+
|
37
|
+
def initialize(url, output_path = nil)
|
38
|
+
@conn_hash = CONN.zip(CONN.map { |key| GisScraper.config[key] }).to_h
|
39
|
+
@url = url
|
40
|
+
@output_path = output_path || config_path
|
26
41
|
@ms_url = ms_url # map server url ending '../MapServer'
|
27
42
|
@id = id
|
28
43
|
@agent = Mechanize.new
|
@@ -33,12 +48,32 @@ class Layer
|
|
33
48
|
@name = name
|
34
49
|
end
|
35
50
|
|
36
|
-
def
|
51
|
+
def output_json
|
37
52
|
QUERYABLE.any? { |l| @type == l } ? write_json_files : process_sub_layers
|
38
53
|
end
|
39
54
|
|
55
|
+
def output_to_db
|
56
|
+
raise OgrMissing.new, 'ogr2ogr missing, is GDAL installed?' if !ogr2ogr?
|
57
|
+
raise NoDatabase.new, "No db connection: #{@conn_hash.inspect}" if !db?
|
58
|
+
@output_path = 'tmp' # write all files to the Gem's tmp dir
|
59
|
+
output_json
|
60
|
+
write_json_files_to_db_tables
|
61
|
+
end
|
62
|
+
|
40
63
|
private
|
41
64
|
|
65
|
+
def db?
|
66
|
+
PG.connect(@conn_hash) rescue nil
|
67
|
+
end
|
68
|
+
|
69
|
+
def ogr2ogr?
|
70
|
+
`ogr2ogr --version` rescue nil
|
71
|
+
end
|
72
|
+
|
73
|
+
def config_path
|
74
|
+
File.expand_path GisScraper.config[:output_path]
|
75
|
+
end
|
76
|
+
|
42
77
|
def ms_url
|
43
78
|
@url.split('/')[0..-2].join('/')
|
44
79
|
end
|
@@ -78,20 +113,53 @@ class Layer
|
|
78
113
|
end
|
79
114
|
|
80
115
|
def write_json_files
|
81
|
-
File.write "#{@
|
116
|
+
File.write "#{@output_path}/#{@name}.json", json_data("#{@ms_url}/#{@id}")
|
117
|
+
end
|
118
|
+
|
119
|
+
def write_json_files_to_db_tables
|
120
|
+
files.each do |f|
|
121
|
+
`#{OGR2OGR}"#{conn}" "#{f}" -nln #{base(f)} #{srs} -nlt #{geom(f)}`
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
def geom(file)
|
126
|
+
esri = esri_geom(file)
|
127
|
+
GEOM_TYPES[esri.to_sym] || raise("Unknown geometry type: '#{esri}'")
|
128
|
+
end
|
129
|
+
|
130
|
+
def esri_geom(file)
|
131
|
+
JSON.parse(File.read(file))['geometryType']
|
132
|
+
end
|
133
|
+
|
134
|
+
def srs
|
135
|
+
return '' unless GisScraper.config[:srs]
|
136
|
+
"-a_srs #{GisScraper.config[:srs]}" || ''
|
137
|
+
end
|
138
|
+
|
139
|
+
def base(full_file_name)
|
140
|
+
full_file_name.split('/').last[0..-6].downcase
|
141
|
+
end
|
142
|
+
|
143
|
+
def files
|
144
|
+
Dir.glob('tmp/**/*.json')
|
145
|
+
end
|
146
|
+
|
147
|
+
def conn
|
148
|
+
host, port, db, user, pwd = *@conn_hash.values
|
149
|
+
"host=#{host} port=#{port} dbname=#{db} user=#{user} password=#{pwd}"
|
82
150
|
end
|
83
151
|
|
84
152
|
def process_sub_layers
|
85
153
|
sub_layer_id_names.each do |hash|
|
86
154
|
name, id = hash['name'], hash['id']
|
87
|
-
path = "#{@
|
88
|
-
|
155
|
+
path = "#{@output_path}/#{name}"
|
156
|
+
recurse_json sub_layer(id, path), path
|
89
157
|
end
|
90
158
|
end
|
91
159
|
|
92
|
-
def
|
160
|
+
def recurse_json(layer, dir)
|
93
161
|
FileUtils.mkdir dir
|
94
|
-
layer.
|
162
|
+
layer.output_json
|
95
163
|
end
|
96
164
|
|
97
165
|
def sub_layer(id, path)
|
data/lib/gis_scraper/version.rb
CHANGED
data/lib/gis_scraper.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'yaml'
|
2
2
|
require 'mechanize'
|
3
3
|
require 'parallel'
|
4
|
+
require 'pg'
|
4
5
|
|
5
6
|
require 'gis_scraper/version'
|
6
7
|
require 'gis_scraper/feature_scraper'
|
@@ -9,7 +10,9 @@ require 'gis_scraper/layer'
|
|
9
10
|
# stackoverflow.com/questions/6233124/where-to-place-access-config-file-in-gem
|
10
11
|
module GisScraper
|
11
12
|
|
12
|
-
@config = {threads: 8
|
13
|
+
@config = {threads: 8, output_path: '~/Desktop',
|
14
|
+
host: 'localhost', port: 5432, dbname: 'postgres', user: 'postgres', password: nil,
|
15
|
+
srs: nil}
|
13
16
|
@valid_keys = @config.keys
|
14
17
|
|
15
18
|
def self.configure(opts = {})
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gis_scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3.pre
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Bruce Steedman
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-12-
|
11
|
+
date: 2015-12-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -80,11 +80,24 @@ dependencies:
|
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '1.6'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: pg
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0.18'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0.18'
|
83
97
|
description: Scrapes ArcGIS data from MapServer REST API
|
84
98
|
email:
|
85
99
|
- bruce.steedman@gmail.com
|
86
|
-
executables:
|
87
|
-
- gisget
|
100
|
+
executables: []
|
88
101
|
extensions: []
|
89
102
|
extra_rdoc_files: []
|
90
103
|
files:
|
@@ -97,7 +110,6 @@ files:
|
|
97
110
|
- Rakefile
|
98
111
|
- bin/console
|
99
112
|
- bin/setup
|
100
|
-
- exe/gisget
|
101
113
|
- gis_scraper.gemspec
|
102
114
|
- lib/gis_scraper.rb
|
103
115
|
- lib/gis_scraper/feature_scraper.rb
|
@@ -115,7 +127,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
115
127
|
requirements:
|
116
128
|
- - ">="
|
117
129
|
- !ruby/object:Gem::Version
|
118
|
-
version: '0'
|
130
|
+
version: '2.0'
|
119
131
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
120
132
|
requirements:
|
121
133
|
- - ">"
|