gis_scraper 0.1.8.pre → 0.1.9.pre
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +9 -5
- data/lib/gis_scraper/layer.rb +25 -11
- data/lib/gis_scraper/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b4673aa6d74aa17a2b9cc7edd3629955edcf2893
|
4
|
+
data.tar.gz: 05f54e0c85a5f5cf53ea10c6ee4ce1206e4f7bc0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: aa65d5317c40e78c07a4007129c57079c42d7bd2389a2a98486b8970f566d8f097edb12ac1cef9811d11f9902c14ae5ecddcd7c3106be48cd2d1f22b2871d442
|
7
|
+
data.tar.gz: 5f9eb70359db47600488932df54bc76be6129b902e94e2ab6c9a07ed03537e07b693139ca34f63c26fd03dccc1c6220c57e1d737baaffca638c90a82ae5377d3
|
data/README.md
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
# gis_scraper Ruby Gem
|
2
|
-
[![Gem Version](https://badge.fury.io/rb/gis_scraper.svg)](http://badge.fury.io/rb/gis_scraper)
|
3
2
|
[![Build status](https://secure.travis-ci.org/MatzFan/gis_scraper.svg)](http://travis-ci.org/MatzFan/gis_scraper)
|
3
|
+
[![Gem Version](https://badge.fury.io/rb/gis_scraper.svg)](http://badge.fury.io/rb/gis_scraper)
|
4
4
|
|
5
5
|
Utility to recursively scrape ArcGIS MapServer data using the ArcGIS REST API.
|
6
6
|
|
7
|
-
ArcGIS MapServer REST queries are limited to 1,000 objects in some cases. This tool makes repeated calls until all data for a given layer (and all sub-layers) is extracted. Output can be
|
7
|
+
ArcGIS MapServer REST queries are limited to 1,000 objects in some cases. This tool makes repeated calls until all data for a given layer (and all sub-layers) is extracted. Output can be GeoJSON file format or data may be written directly to Postgres database tables in PostGIS format. GIS clients - e.g. QGIS - can be configured to use vector layer data from PostGIS sources.
|
8
8
|
|
9
9
|
## Requirements
|
10
10
|
|
@@ -16,12 +16,16 @@ For data import to a database [GDAL](http://gdal.org) must be installed and spec
|
|
16
16
|
|
17
17
|
## Known Limitations
|
18
18
|
|
19
|
-
*NIX systems only - Linux/Mac OS X
|
19
|
+
*NIX systems only - Linux/Mac OS X. ArcGIS MapServer data is readable directly by ArcGIS Windows clients 😉
|
20
20
|
|
21
|
-
The following esri geometry types are supported:
|
21
|
+
The following esri geometry types are so far supported:
|
22
22
|
|
23
23
|
- esriGeometryPoint, esriGeometryMultipoint, esriGeometryLine, esriGeometryPolyline, esriGeometryPolygon
|
24
24
|
|
25
|
+
Annotation layers are ignored, as are layers with no esri geometryType.
|
26
|
+
|
27
|
+
Currently the JSON data for a whole layer is held in memory before being output. For large layers - e.g. >100,000 objects - this can be multiple GB of memory. Is this causes a problem for you please add a comment to [issue #4](https://github.com/MatzFan/gis_scraper/issues/4).
|
28
|
+
|
25
29
|
## Installation
|
26
30
|
|
27
31
|
Add this line to your application's Gemfile:
|
@@ -97,7 +101,7 @@ If the layer is type 'Feature Layer', a single file of JSON data will be saved (
|
|
97
101
|
|
98
102
|
**Output to a database**
|
99
103
|
|
100
|
-
Valid database config options must be set. The following command will convert JSON files, create tables for each layer (& sub-layers, if any) and import the data.
|
104
|
+
Valid database config options must be set. The following command will convert JSON files, create tables for each layer (& sub-layers, if any) and import the data. Table names are lowercased, prefixed '_' and have spaces replaced with undescores. If a table with the same name exists the name is appended with '_'.
|
101
105
|
|
102
106
|
```
|
103
107
|
layer.output_to_db
|
data/lib/gis_scraper/layer.rb
CHANGED
@@ -9,6 +9,9 @@ class Layer
|
|
9
9
|
|
10
10
|
attr_reader :type
|
11
11
|
|
12
|
+
TABLES = "SELECT table_name FROM information_schema.tables\
|
13
|
+
WHERE table_schema = 'public'"
|
14
|
+
|
12
15
|
TYPE = %w(Group\ Layer Feature\ Layer Annotation\ Layer Annotation\ SubLayer)
|
13
16
|
|
14
17
|
CONN = [:host, :port, :dbname, :user, :password] # PG connection options
|
@@ -20,7 +23,7 @@ class Layer
|
|
20
23
|
'esriGeometryPolygon' => 'MULTIPOLYGON'}
|
21
24
|
|
22
25
|
MSURL = 'MapServer'
|
23
|
-
|
26
|
+
OGR = 'ogr2ogr -overwrite -f "PostgreSQL" PG:'
|
24
27
|
|
25
28
|
def initialize(url, path = nil)
|
26
29
|
@conn_hash = CONN.zip(CONN.map { |key| GisScraper.config[key] }).to_h
|
@@ -31,7 +34,7 @@ class Layer
|
|
31
34
|
@agent.pluggable_parser['text/plain'] = GisScraper::JSONParser
|
32
35
|
validate_url
|
33
36
|
@page_json = page_json
|
34
|
-
@type, @name, @sub_layer_ids = type, name, sub_layer_ids
|
37
|
+
@type, @name, @sub_layer_ids, @geo = type, name, sub_layer_ids, geo
|
35
38
|
end
|
36
39
|
|
37
40
|
def output_json
|
@@ -40,14 +43,14 @@ class Layer
|
|
40
43
|
|
41
44
|
def output_to_db
|
42
45
|
raise OgrMissing.new, 'ogr2ogr missing, is GDAL installed?' if !ogr2ogr?
|
43
|
-
raise NoDatabase.new, "No db connection: #{@conn_hash.inspect}" if !
|
46
|
+
raise NoDatabase.new, "No db connection: #{@conn_hash.inspect}" if !conn
|
44
47
|
output(:db)
|
45
48
|
end
|
46
49
|
|
47
50
|
private
|
48
51
|
|
49
52
|
def output(format) # recurses sub-layers, if any (none for Annotation layers)
|
50
|
-
@type == 'Feature Layer' ? method(format) : do_sub_layers(format)
|
53
|
+
(@type == 'Feature Layer' && @geo) ? method(format) : do_sub_layers(format)
|
51
54
|
end
|
52
55
|
|
53
56
|
def method(format)
|
@@ -60,7 +63,7 @@ class Layer
|
|
60
63
|
File.expand_path(path) if path
|
61
64
|
end
|
62
65
|
|
63
|
-
def
|
66
|
+
def conn
|
64
67
|
PG.connect(@conn_hash) rescue nil
|
65
68
|
end
|
66
69
|
|
@@ -122,18 +125,17 @@ class Layer
|
|
122
125
|
@output_path = Dir.mktmpdir('gis_scraper') # prefix for identification
|
123
126
|
begin
|
124
127
|
write_json
|
125
|
-
`#{
|
128
|
+
`#{OGR}"#{c_str}" "#{json_path}" -nln #{table} #{srs} -nlt #{geom}`
|
126
129
|
ensure
|
127
130
|
FileUtils.remove_entry @output_path
|
128
131
|
end
|
129
132
|
end
|
130
133
|
|
131
134
|
def geom
|
132
|
-
|
133
|
-
GEOM_TYPES[esri] || raise("Unknown geometry: '#{esri}' for layer #{@name}")
|
135
|
+
GEOM_TYPES[@geo] || raise("Unknown geometry: '#{@geo}' for layer #{@name}")
|
134
136
|
end
|
135
137
|
|
136
|
-
def
|
138
|
+
def geo
|
137
139
|
@page_json['geometryType']
|
138
140
|
end
|
139
141
|
|
@@ -142,11 +144,23 @@ class Layer
|
|
142
144
|
"-a_srs #{GisScraper.config[:srs]}" || ''
|
143
145
|
end
|
144
146
|
|
147
|
+
def tables # list of current db table names
|
148
|
+
conn.exec(TABLES).map { |tup| tup['table_name'] }
|
149
|
+
end
|
150
|
+
|
145
151
|
def table
|
146
|
-
|
152
|
+
table_name << table_suffix
|
147
153
|
end
|
148
154
|
|
149
|
-
def
|
155
|
+
def table_name
|
156
|
+
Shellwords.escape(@name.downcase.gsub(' ', '_')).prepend('_')
|
157
|
+
end
|
158
|
+
|
159
|
+
def table_suffix
|
160
|
+
tables.any? { |t| t == table_name } ? '_' : ''
|
161
|
+
end
|
162
|
+
|
163
|
+
def c_str
|
150
164
|
host, port, db, user, pwd = *@conn_hash.values
|
151
165
|
"host=#{host} port=#{port} dbname=#{db} user=#{user} password=#{pwd}"
|
152
166
|
end
|
data/lib/gis_scraper/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gis_scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.9.pre
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Bruce Steedman
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-01-
|
11
|
+
date: 2016-01-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|