gis_scraper 0.1.8.pre → 0.1.9.pre
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +9 -5
- data/lib/gis_scraper/layer.rb +25 -11
- data/lib/gis_scraper/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b4673aa6d74aa17a2b9cc7edd3629955edcf2893
|
4
|
+
data.tar.gz: 05f54e0c85a5f5cf53ea10c6ee4ce1206e4f7bc0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: aa65d5317c40e78c07a4007129c57079c42d7bd2389a2a98486b8970f566d8f097edb12ac1cef9811d11f9902c14ae5ecddcd7c3106be48cd2d1f22b2871d442
|
7
|
+
data.tar.gz: 5f9eb70359db47600488932df54bc76be6129b902e94e2ab6c9a07ed03537e07b693139ca34f63c26fd03dccc1c6220c57e1d737baaffca638c90a82ae5377d3
|
data/README.md
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
# gis_scraper Ruby Gem
|
2
|
-
[](http://badge.fury.io/rb/gis_scraper)
|
3
2
|
[](http://travis-ci.org/MatzFan/gis_scraper)
|
3
|
+
[](http://badge.fury.io/rb/gis_scraper)
|
4
4
|
|
5
5
|
Utility to recursively scrape ArcGIS MapServer data using the ArcGIS REST API.
|
6
6
|
|
7
|
-
ArcGIS MapServer REST queries are limited to 1,000 objects in some cases. This tool makes repeated calls until all data for a given layer (and all sub-layers) is extracted. Output can be
|
7
|
+
ArcGIS MapServer REST queries are limited to 1,000 objects in some cases. This tool makes repeated calls until all data for a given layer (and all sub-layers) is extracted. Output can be GeoJSON file format or data may be written directly to Postgres database tables in PostGIS format. GIS clients - e.g. QGIS - can be configured to use vector layer data from PostGIS sources.
|
8
8
|
|
9
9
|
## Requirements
|
10
10
|
|
@@ -16,12 +16,16 @@ For data import to a database [GDAL](http://gdal.org) must be installed and spec
|
|
16
16
|
|
17
17
|
## Known Limitations
|
18
18
|
|
19
|
-
*NIX systems only - Linux/Mac OS X
|
19
|
+
*NIX systems only - Linux/Mac OS X. ArcGIS MapServer data is readable directly by ArcGIS Windows clients 😉
|
20
20
|
|
21
|
-
The following esri geometry types are supported:
|
21
|
+
The following esri geometry types are so far supported:
|
22
22
|
|
23
23
|
- esriGeometryPoint, esriGeometryMultipoint, esriGeometryLine, esriGeometryPolyline, esriGeometryPolygon
|
24
24
|
|
25
|
+
Annotation layers are ignored, as are layers with no esri geometryType.
|
26
|
+
|
27
|
+
Currently the JSON data for a whole layer is held in memory before being output. For large layers - e.g. >100,000 objects - this can be multiple GB of memory. Is this causes a problem for you please add a comment to [issue #4](https://github.com/MatzFan/gis_scraper/issues/4).
|
28
|
+
|
25
29
|
## Installation
|
26
30
|
|
27
31
|
Add this line to your application's Gemfile:
|
@@ -97,7 +101,7 @@ If the layer is type 'Feature Layer', a single file of JSON data will be saved (
|
|
97
101
|
|
98
102
|
**Output to a database**
|
99
103
|
|
100
|
-
Valid database config options must be set. The following command will convert JSON files, create tables for each layer (& sub-layers, if any) and import the data.
|
104
|
+
Valid database config options must be set. The following command will convert JSON files, create tables for each layer (& sub-layers, if any) and import the data. Table names are lowercased, prefixed '_' and have spaces replaced with undescores. If a table with the same name exists the name is appended with '_'.
|
101
105
|
|
102
106
|
```
|
103
107
|
layer.output_to_db
|
data/lib/gis_scraper/layer.rb
CHANGED
@@ -9,6 +9,9 @@ class Layer
|
|
9
9
|
|
10
10
|
attr_reader :type
|
11
11
|
|
12
|
+
TABLES = "SELECT table_name FROM information_schema.tables\
|
13
|
+
WHERE table_schema = 'public'"
|
14
|
+
|
12
15
|
TYPE = %w(Group\ Layer Feature\ Layer Annotation\ Layer Annotation\ SubLayer)
|
13
16
|
|
14
17
|
CONN = [:host, :port, :dbname, :user, :password] # PG connection options
|
@@ -20,7 +23,7 @@ class Layer
|
|
20
23
|
'esriGeometryPolygon' => 'MULTIPOLYGON'}
|
21
24
|
|
22
25
|
MSURL = 'MapServer'
|
23
|
-
|
26
|
+
OGR = 'ogr2ogr -overwrite -f "PostgreSQL" PG:'
|
24
27
|
|
25
28
|
def initialize(url, path = nil)
|
26
29
|
@conn_hash = CONN.zip(CONN.map { |key| GisScraper.config[key] }).to_h
|
@@ -31,7 +34,7 @@ class Layer
|
|
31
34
|
@agent.pluggable_parser['text/plain'] = GisScraper::JSONParser
|
32
35
|
validate_url
|
33
36
|
@page_json = page_json
|
34
|
-
@type, @name, @sub_layer_ids = type, name, sub_layer_ids
|
37
|
+
@type, @name, @sub_layer_ids, @geo = type, name, sub_layer_ids, geo
|
35
38
|
end
|
36
39
|
|
37
40
|
def output_json
|
@@ -40,14 +43,14 @@ class Layer
|
|
40
43
|
|
41
44
|
def output_to_db
|
42
45
|
raise OgrMissing.new, 'ogr2ogr missing, is GDAL installed?' if !ogr2ogr?
|
43
|
-
raise NoDatabase.new, "No db connection: #{@conn_hash.inspect}" if !
|
46
|
+
raise NoDatabase.new, "No db connection: #{@conn_hash.inspect}" if !conn
|
44
47
|
output(:db)
|
45
48
|
end
|
46
49
|
|
47
50
|
private
|
48
51
|
|
49
52
|
def output(format) # recurses sub-layers, if any (none for Annotation layers)
|
50
|
-
@type == 'Feature Layer' ? method(format) : do_sub_layers(format)
|
53
|
+
(@type == 'Feature Layer' && @geo) ? method(format) : do_sub_layers(format)
|
51
54
|
end
|
52
55
|
|
53
56
|
def method(format)
|
@@ -60,7 +63,7 @@ class Layer
|
|
60
63
|
File.expand_path(path) if path
|
61
64
|
end
|
62
65
|
|
63
|
-
def
|
66
|
+
def conn
|
64
67
|
PG.connect(@conn_hash) rescue nil
|
65
68
|
end
|
66
69
|
|
@@ -122,18 +125,17 @@ class Layer
|
|
122
125
|
@output_path = Dir.mktmpdir('gis_scraper') # prefix for identification
|
123
126
|
begin
|
124
127
|
write_json
|
125
|
-
`#{
|
128
|
+
`#{OGR}"#{c_str}" "#{json_path}" -nln #{table} #{srs} -nlt #{geom}`
|
126
129
|
ensure
|
127
130
|
FileUtils.remove_entry @output_path
|
128
131
|
end
|
129
132
|
end
|
130
133
|
|
131
134
|
def geom
|
132
|
-
|
133
|
-
GEOM_TYPES[esri] || raise("Unknown geometry: '#{esri}' for layer #{@name}")
|
135
|
+
GEOM_TYPES[@geo] || raise("Unknown geometry: '#{@geo}' for layer #{@name}")
|
134
136
|
end
|
135
137
|
|
136
|
-
def
|
138
|
+
def geo
|
137
139
|
@page_json['geometryType']
|
138
140
|
end
|
139
141
|
|
@@ -142,11 +144,23 @@ class Layer
|
|
142
144
|
"-a_srs #{GisScraper.config[:srs]}" || ''
|
143
145
|
end
|
144
146
|
|
147
|
+
def tables # list of current db table names
|
148
|
+
conn.exec(TABLES).map { |tup| tup['table_name'] }
|
149
|
+
end
|
150
|
+
|
145
151
|
def table
|
146
|
-
|
152
|
+
table_name << table_suffix
|
147
153
|
end
|
148
154
|
|
149
|
-
def
|
155
|
+
def table_name
|
156
|
+
Shellwords.escape(@name.downcase.gsub(' ', '_')).prepend('_')
|
157
|
+
end
|
158
|
+
|
159
|
+
def table_suffix
|
160
|
+
tables.any? { |t| t == table_name } ? '_' : ''
|
161
|
+
end
|
162
|
+
|
163
|
+
def c_str
|
150
164
|
host, port, db, user, pwd = *@conn_hash.values
|
151
165
|
"host=#{host} port=#{port} dbname=#{db} user=#{user} password=#{pwd}"
|
152
166
|
end
|
data/lib/gis_scraper/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gis_scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.9.pre
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Bruce Steedman
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-01-
|
11
|
+
date: 2016-01-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|