gis_scraper 0.1.9.pre → 0.1.10.pre

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b4673aa6d74aa17a2b9cc7edd3629955edcf2893
4
- data.tar.gz: 05f54e0c85a5f5cf53ea10c6ee4ce1206e4f7bc0
3
+ metadata.gz: 6c95a3f2a8c7e50cf4df5e6f0f363d660e19c5a8
4
+ data.tar.gz: 5b6e929eec3af5ad0b767d34239de3d00d8e4d67
5
5
  SHA512:
6
- metadata.gz: aa65d5317c40e78c07a4007129c57079c42d7bd2389a2a98486b8970f566d8f097edb12ac1cef9811d11f9902c14ae5ecddcd7c3106be48cd2d1f22b2871d442
7
- data.tar.gz: 5f9eb70359db47600488932df54bc76be6129b902e94e2ab6c9a07ed03537e07b693139ca34f63c26fd03dccc1c6220c57e1d737baaffca638c90a82ae5377d3
6
+ metadata.gz: d13788b449d9c87a009816b48ef89038a7b686307ed68a915b02059580d6189ca946e3889485ee26f50d2e2cb1b296504cb32353d8f4461c7435e5e58fe86a27
7
+ data.tar.gz: cb6ec2b8c1b747a32e799dc7a905d7824b3e7f5b2a6382230ad7d4017ef2a13773a7ee4c63af900edacbf02454d0d18c45989f9a1894dcd0310cf1100807e540
data/.travis.yml CHANGED
@@ -1,26 +1,21 @@
1
1
  language: ruby
2
-
3
- addons:
4
- postgresql: "9.4"
2
+ rvm:
3
+ - 2.1
4
+ - 2.2
5
5
 
6
6
  services:
7
7
  - postgresql
8
8
 
9
- before_script:
10
- - psql -c 'create database travis_ci_test;' -U postgres
11
- - psql -U postgres -c 'create extension postgis;'
9
+ addons:
10
+ postgresql: "9.4"
12
11
 
13
12
  before_install:
14
13
  - gem update bundler
15
- # http://askubuntu.com/questions/206593/how-to-install-rgdal-on-ubuntu-12-10
16
- - sudo apt-get update -qq
17
- - sudo apt-get install -y aptitude
18
- - sudo aptitude install -y libgdal-dev libproj-dev
19
14
 
20
- rvm:
21
- - 2.0.0
22
- - 2.1.6
23
- - 2.2.3
24
- - rbx-2.9
15
+ before_script:
16
+ - sudo rm /etc/apt/sources.list.d/ubuntugis-stable-source.list # https://github.com/travis-ci/travis-ci/issues/2401
17
+ - sudo apt-get update -qq
18
+ - sudo apt-get install gdal-bin -y
19
+ - psql -c "create extension postgis" -U $USER -d $USER # default db name is same as user
25
20
 
26
21
  script: bundle exec rspec spec
data/README.md CHANGED
@@ -79,22 +79,22 @@ GisScraper.config # returns the hash of configuration values
79
79
 
80
80
  ## Usage
81
81
 
82
- A Layer object must be instantiated with one required arg - a MapServer/Layer URL (ending in an integer representing the layer number). Example:
82
+ A LayerWriter object must be instantiated with one required arg - a Service/Layer URL (ending in an integer representing the layer number). Example:
83
83
 
84
84
  ```
85
- layer = Layer.new('http://gps.digimap.gg/arcgis/rest/services/StatesOfJersey/JerseyMappingOL/MapServer/0')
85
+ writer = LayerWriter.new('http://gps.digimap.gg/arcgis/rest/services/StatesOfJersey/JerseyMappingOL/MapServer/0')
86
86
  ```
87
87
 
88
88
  An optional second argument for the output path for JSON files may be specified. If so this overides the configuration option. Example:
89
89
 
90
90
  ```
91
- layer = Layer.new('http://gps.digimap.gg/arcgis/rest/services/StatesOfJersey/JerseyMappingOL/MapServer/0', '~/Desktop')
91
+ writer = LayerWriter.new('http://gps.digimap.gg/arcgis/rest/services/StatesOfJersey/JerseyMappingOL/MapServer/0', '~/Desktop')
92
92
  ```
93
93
 
94
94
  **JSON output**
95
95
 
96
96
  ```
97
- layer.output_json
97
+ writer.output_json
98
98
  ```
99
99
 
100
100
  If the layer is type 'Feature Layer', a single file of JSON data will be saved (named the same as the layer). If the layer is type 'Group Layer', the sub-group structure is traversed recursively thus: Directories for each sub-group layer are created and JSON data files for each constituent feature layer written to them.
@@ -104,7 +104,7 @@ If the layer is type 'Feature Layer', a single file of JSON data will be saved (
104
104
  Valid database config options must be set. The following command will convert JSON files, create tables for each layer (& sub-layers, if any) and import the data. Table names are lowercased, prefixed '_' and have spaces replaced with undescores. If a table with the same name exists the name is appended with '_'.
105
105
 
106
106
  ```
107
- layer.output_to_db
107
+ writer.output_to_db
108
108
  ```
109
109
 
110
110
  ## Specification and Tests
data/Rakefile CHANGED
@@ -3,4 +3,4 @@ require 'rspec/core/rake_task'
3
3
 
4
4
  RSpec::Core::RakeTask.new(:spec)
5
5
 
6
- task :default => :spec
6
+ task default: :spec
data/gis_scraper.gemspec CHANGED
@@ -10,21 +10,21 @@ Gem::Specification.new do |s|
10
10
  s.authors = ['Bruce Steedman']
11
11
  s.email = ['bruce.steedman@gmail.com']
12
12
 
13
- s.summary = %q{Scrapes ArcGIS data from MapServer REST API}
14
- s.description = %q{Scrapes ArcGIS data from MapServer REST API}
13
+ s.summary = 'Scrapes ArcGIS data from MapServer REST API'
14
+ s.description = 'Scrapes ArcGIS data from MapServer REST API'
15
15
  s.required_ruby_version = '>= 2.0'
16
- s.license = "MIT"
16
+ s.license = 'MIT'
17
17
 
18
- s.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(spec)/}) }
18
+ s.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(spec)/}) }
19
19
  s.bindir = 'exe'
20
20
  s.executables = s.files.grep(%r{^exe/}) { |f| File.basename(f) }
21
21
  s.require_paths = ['lib']
22
22
 
23
23
  s.add_development_dependency 'bundler', '~> 1.11'
24
24
  s.add_development_dependency 'rake', '~> 10.0'
25
- s.add_development_dependency 'rspec', '~> 3.0'
25
+ s.add_development_dependency 'rspec', '~> 3.4'
26
26
 
27
- s.add_runtime_dependency 'mechanize', '~> 2.7'
28
- s.add_runtime_dependency 'parallel', '~> 1.6'
27
+ s.add_runtime_dependency 'arcrest', '~> 0.0.2'
28
+ s.add_runtime_dependency 'parallel', '~> 1.9'
29
29
  s.add_development_dependency 'pg', '~> 0.18'
30
30
  end
data/lib/gis_scraper.rb CHANGED
@@ -1,27 +1,27 @@
1
1
  require 'yaml'
2
- require 'mechanize'
2
+ require 'arcrest'
3
+ # require 'mechanize'
3
4
  require 'parallel'
4
5
  require 'pg'
5
6
 
6
7
  require 'gis_scraper/version'
7
8
  require 'gis_scraper/feature_scraper'
8
- require 'gis_scraper/layer'
9
+ require 'gis_scraper/layer_writer'
9
10
 
10
11
  # stackoverflow.com/questions/6233124/where-to-place-access-config-file-in-gem
11
12
  module GisScraper
12
-
13
- @config = {threads: 8, output_path: '~/Desktop',
14
- host: 'localhost', port: 5432, dbname: 'postgres', user: 'postgres', password: nil,
15
- srs: nil}
13
+ @config = { threads: 8, output_path: '~/Desktop', host: 'localhost',
14
+ port: 5432, dbname: 'postgres', user: 'postgres', password: nil,
15
+ srs: nil }
16
16
  @valid_keys = @config.keys
17
17
 
18
18
  def self.configure(opts = {})
19
- opts.each { |k,v| @config[k.to_sym] = v if @valid_keys.include? k.to_sym }
19
+ opts.each { |k, v| @config[k.to_sym] = v if @valid_keys.include? k.to_sym }
20
20
  end
21
21
 
22
22
  def self.configure_with(path_to_yaml_file)
23
23
  begin
24
- config = YAML::load(IO.read(path_to_yaml_file))
24
+ config = YAML.load(IO.read(path_to_yaml_file))
25
25
  rescue Errno::ENOENT
26
26
  puts "YAML configuration file couldn't be found. Using defaults"
27
27
  return
@@ -37,13 +37,13 @@ module GisScraper
37
37
  @config
38
38
  end
39
39
 
40
- class JSONParser < Mechanize::File # shared by FeatureScraper & Layer
41
- attr_reader :json
42
-
43
- def initialize(uri=nil, response=nil, body=nil, code=nil)
44
- super(uri, response, body, code)
45
- @json = JSON.parse(body)
46
- end
47
- end
40
+ # shared by FeatureScraper & Layer
41
+ # class JSONParser < Mechanize::File
42
+ # attr_reader :json
48
43
 
44
+ # def initialize(uri = nil, response = nil, body = nil, code = nil)
45
+ # super(uri, response, body, code)
46
+ # @json = JSON.parse(body)
47
+ # end
48
+ # end
49
49
  end
@@ -1,71 +1,89 @@
1
+ # scrapes feature layers
1
2
  class FeatureScraper
3
+ API_CALL_LIMIT = 1000
4
+ STRING = 'esriFieldTypeString'.freeze
5
+ VARCHAR_MAX_SIZE = 10_485_760 # max size for PostgreSQL VARCHAR
2
6
 
3
7
  attr_reader :name
4
8
 
5
9
  def initialize(url)
6
10
  @url = url
7
- @agent = Mechanize.new
8
- @agent.pluggable_parser['text/plain'] = GisScraper::JSONParser
9
- @layer = layer # hash of json
10
- @name, @pk, @max = name, pk, max # maxRecordCount - usually 1000
11
- @form = form
11
+ @layer = layer
12
+ @json = json
13
+ @name = name
14
+ @pk = pk
15
+ @max = max # maxRecordCount - usually 1000
12
16
  @loops = loops
13
17
  @threads = GisScraper.config[:threads]
14
18
  end
15
19
 
16
20
  def json_data
17
- data(0).merge({'features' => features(@threads)}).to_json
21
+ query_without_features.merge('features' => all_features(@threads)).to_json
18
22
  end
19
23
 
20
24
  private
21
25
 
26
+ def query_without_features # check_field_length not needed ogr2ogr >= 1.11.5
27
+ check_field_length @layer.query(where: '1=0')
28
+ end
29
+
22
30
  def layer
23
- @agent.get(@url + '?f=pjson').json
31
+ ArcREST::Layer.new(@url)
32
+ end
33
+
34
+ def json
35
+ @layer.json
24
36
  end
25
37
 
26
38
  def name
27
- @layer['name']
39
+ @layer.name
40
+ end
41
+
42
+ def renderer
43
+ @layer.drawing_info['renderer']
28
44
  end
29
45
 
30
46
  def pk
31
- @layer['fields'].select { |f| f['type'] == 'esriFieldTypeOID' }[0]['name']
47
+ @json['fields'].select { |f| f['type'] == 'esriFieldTypeOID' }[0]['name']
32
48
  end
33
49
 
34
50
  def max
35
- @layer['maxRecordCount'].to_i
51
+ @layer.max_record_count || API_CALL_LIMIT
36
52
  end
37
53
 
38
- def form
39
- @agent.get(@url + '/query').forms.first
54
+ def count
55
+ @layer.count
40
56
  end
41
57
 
42
- def count
43
- set_query_params
44
- @form.submit(@form.buttons[1]).json['count'].to_i
58
+ def features(n)
59
+ @layer.features(where: where_text(n))
60
+ end
61
+
62
+ def check_field_length(hash) # https://trac.osgeo.org/gdal/ticket/6529
63
+ hash.merge check_fields(hash['fields'])
45
64
  end
46
65
 
47
- def set_query_params(loop_num = nil)
48
- @form.fields[0].value = where_text(loop_num)
49
- loop_num ? @form.radiobuttons[4].uncheck : @form.radiobuttons[4].check
50
- @form.fields[6].value = '*'
51
- @form.field_with(name: 'f').options[1].select # for JSON
66
+ def check_fields(fields)
67
+ { 'fields' => fields.map { |f| f['type'] == STRING ? esri_string(f) : f } }
52
68
  end
53
69
 
54
- def data(n)
55
- set_query_params(n)
56
- @form.submit(@form.buttons[1]).json
70
+ def esri_string(fields)
71
+ Hash[fields.map { |k, v| [k, k == 'length' ? truncate(v) : v] }] # nice :)
57
72
  end
58
73
 
59
- def features(t)
60
- Parallel.map(0...@loops, in_threads: t) { |n| data(n)['features'] }.flatten
74
+ def truncate(length)
75
+ length > VARCHAR_MAX_SIZE ? 0 : length
76
+ end
77
+
78
+ def all_features(threads)
79
+ Parallel.map(0...@loops, in_threads: threads) { |n| features(n) }.flatten
61
80
  end
62
81
 
63
82
  def loops
64
- (count.to_f/@max).ceil
83
+ (count.to_f / @max).ceil
65
84
  end
66
85
 
67
86
  def where_text(n)
68
87
  n ? "#{pk} > #{n * @max} AND #{pk} <= #{(n + 1) * @max}" : "#{pk} > 0"
69
88
  end
70
-
71
89
  end
@@ -1,8 +1,9 @@
1
1
  require 'fileutils'
2
2
  require 'tmpdir'
3
+ require 'shellwords'
3
4
 
4
- class Layer
5
-
5
+ # tool to write ArcGIS layer(s) to json or database output
6
+ class LayerWriter
6
7
  class UnknownLayerType < StandardError; end
7
8
  class NoDatabase < StandardError; end
8
9
  class OgrMissing < StandardError; end
@@ -10,31 +11,33 @@ class Layer
10
11
  attr_reader :type
11
12
 
12
13
  TABLES = "SELECT table_name FROM information_schema.tables\
13
- WHERE table_schema = 'public'"
14
+ WHERE table_schema = 'public'".freeze
14
15
 
15
- TYPE = %w(Group\ Layer Feature\ Layer Annotation\ Layer Annotation\ SubLayer)
16
+ TYPE = ['Group Layer', 'Feature Layer', 'Annotation Layer',
17
+ 'Annotation SubLayer'].freeze
16
18
 
17
- CONN = [:host, :port, :dbname, :user, :password] # PG connection options
19
+ CONN = [:host, :port, :dbname, :user, :password].freeze
18
20
 
19
- GEOM_TYPES = {'esriGeometryPoint' => 'POINT',
20
- 'esriGeometryMultipoint' => 'MULTIPOINT',
21
- 'esriGeometryLine' => 'LINESTRING',
22
- 'esriGeometryPolyline' => 'MULTILINESTRING',
23
- 'esriGeometryPolygon' => 'MULTIPOLYGON'}
21
+ GEOM_TYPES = { 'esriGeometryPoint' => 'POINT',
22
+ 'esriGeometryMultipoint' => 'MULTIPOINT',
23
+ 'esriGeometryLine' => 'LINESTRING',
24
+ 'esriGeometryPolyline' => 'MULTILINESTRING',
25
+ 'esriGeometryPolygon' => 'MULTIPOLYGON' }.freeze
24
26
 
25
- MSURL = 'MapServer'
26
- OGR = 'ogr2ogr -overwrite -f "PostgreSQL" PG:'
27
+ OGR = 'ogr2ogr -overwrite -f "PostgreSQL" PG:'.freeze
27
28
 
28
29
  def initialize(url, path = nil)
29
30
  @conn_hash = CONN.zip(CONN.map { |key| GisScraper.config[key] }).to_h
30
31
  @url = url
31
32
  @output_path = output_path(path) || config_path
32
- @id, @mapserver_url = id, mapserver_url # mapserver url ends '../MapServer'
33
- @agent = Mechanize.new
34
- @agent.pluggable_parser['text/plain'] = GisScraper::JSONParser
35
- validate_url
33
+ @id = id
34
+ @service_url = service_url
35
+ @layer = layer
36
36
  @page_json = page_json
37
- @type, @name, @sub_layer_ids, @geo = type, name, sub_layer_ids, geo
37
+ @type = type
38
+ @name = name
39
+ @sub_layer_ids = sub_layer_ids
40
+ @geo = geo
38
41
  end
39
42
 
40
43
  def output_json
@@ -42,8 +45,8 @@ class Layer
42
45
  end
43
46
 
44
47
  def output_to_db
45
- raise OgrMissing.new, 'ogr2ogr missing, is GDAL installed?' if !ogr2ogr?
46
- raise NoDatabase.new, "No db connection: #{@conn_hash.inspect}" if !conn
48
+ raise OgrMissing.new, 'ogr2ogr missing, is GDAL installed?' unless ogr2ogr?
49
+ raise NoDatabase.new, "No db connection: #{@conn_hash.inspect}" unless conn
47
50
  output(:db)
48
51
  end
49
52
 
@@ -75,7 +78,7 @@ class Layer
75
78
  File.expand_path GisScraper.config[:output_path]
76
79
  end
77
80
 
78
- def mapserver_url
81
+ def service_url
79
82
  @url.split('/')[0..-2].join('/')
80
83
  end
81
84
 
@@ -83,13 +86,12 @@ class Layer
83
86
  @url.split('/').last
84
87
  end
85
88
 
86
- def validate_url
87
- raise ArgumentError, 'URL must end with layer id' if @id.to_i.to_s != @id
88
- raise ArgumentError, 'Bad MapServer URL' if @mapserver_url[-9..-1] != MSURL
89
+ def layer
90
+ ArcREST::Layer.new @url
89
91
  end
90
92
 
91
93
  def page_json
92
- @agent.get(@url + '?f=pjson').json
94
+ @layer.json
93
95
  end
94
96
 
95
97
  def type
@@ -101,7 +103,7 @@ class Layer
101
103
  end
102
104
 
103
105
  def validate_type(type)
104
- raise UnknownLayerType, type unless (TYPE.any? { |t| t == type })
106
+ raise UnknownLayerType, type unless TYPE.any? { |t| t == type }
105
107
  type
106
108
  end
107
109
 
@@ -110,7 +112,7 @@ class Layer
110
112
  end
111
113
 
112
114
  def json_data
113
- FeatureScraper.new("#{@mapserver_url}/#{@id}").json_data
115
+ FeatureScraper.new("#{@service_url}/#{@id}").json_data
114
116
  end
115
117
 
116
118
  def write_json
@@ -125,14 +127,14 @@ class Layer
125
127
  @output_path = Dir.mktmpdir('gis_scraper') # prefix for identification
126
128
  begin
127
129
  write_json
128
- `#{OGR}"#{c_str}" "#{json_path}" -nln #{table} #{srs} -nlt #{geom}`
130
+ `#{OGR}"#{conn_str}" "#{json_path}" -nln #{table} #{srs} -nlt #{geom}`
129
131
  ensure
130
132
  FileUtils.remove_entry @output_path
131
133
  end
132
134
  end
133
135
 
134
136
  def geom
135
- GEOM_TYPES[@geo] || raise("Unknown geometry: '#{@geo}' for layer #{@name}")
137
+ GEOM_TYPES[@geo] || raise("Unknown geom: '#{@geo}' for layer #{@name}")
136
138
  end
137
139
 
138
140
  def geo
@@ -153,14 +155,14 @@ class Layer
153
155
  end
154
156
 
155
157
  def table_name
156
- Shellwords.escape(@name.downcase.gsub(' ', '_')).prepend('_')
158
+ Shellwords.escape(@name.downcase.tr(' ', '_')).prepend('_')
157
159
  end
158
160
 
159
161
  def table_suffix
160
162
  tables.any? { |t| t == table_name } ? '_' : ''
161
163
  end
162
164
 
163
- def c_str
165
+ def conn_str
164
166
  host, port, db, user, pwd = *@conn_hash.values
165
167
  "host=#{host} port=#{port} dbname=#{db} user=#{user} password=#{pwd}"
166
168
  end
@@ -172,11 +174,10 @@ class Layer
172
174
  end
173
175
 
174
176
  def sub_layer(id, path)
175
- Layer.new("#{@mapserver_url}/#{id}", path)
177
+ self.class.new("#{@service_url}/#{id}", path)
176
178
  end
177
179
 
178
180
  def replace_forwardslashes_with_underscores(string)
179
- string.gsub /\//, '_'
181
+ string.tr('/', '_')
180
182
  end
181
-
182
183
  end
@@ -1,3 +1,3 @@
1
1
  module GisScraper
2
- VERSION = '0.1.9.pre'
2
+ VERSION = '0.1.10.pre'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gis_scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.9.pre
4
+ version: 0.1.10.pre
5
5
  platform: ruby
6
6
  authors:
7
7
  - Bruce Steedman
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-01-15 00:00:00.000000000 Z
11
+ date: 2016-06-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -44,42 +44,42 @@ dependencies:
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '3.0'
47
+ version: '3.4'
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '3.0'
54
+ version: '3.4'
55
55
  - !ruby/object:Gem::Dependency
56
- name: mechanize
56
+ name: arcrest
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: '2.7'
61
+ version: 0.0.2
62
62
  type: :runtime
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
- version: '2.7'
68
+ version: 0.0.2
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: parallel
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
73
  - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: '1.6'
75
+ version: '1.9'
76
76
  type: :runtime
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: '1.6'
82
+ version: '1.9'
83
83
  - !ruby/object:Gem::Dependency
84
84
  name: pg
85
85
  requirement: !ruby/object:Gem::Requirement
@@ -113,7 +113,7 @@ files:
113
113
  - gis_scraper.gemspec
114
114
  - lib/gis_scraper.rb
115
115
  - lib/gis_scraper/feature_scraper.rb
116
- - lib/gis_scraper/layer.rb
116
+ - lib/gis_scraper/layer_writer.rb
117
117
  - lib/gis_scraper/version.rb
118
118
  homepage:
119
119
  licenses:
@@ -135,7 +135,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
135
135
  version: 1.3.1
136
136
  requirements: []
137
137
  rubyforge_project:
138
- rubygems_version: 2.4.8
138
+ rubygems_version: 2.5.1
139
139
  signing_key:
140
140
  specification_version: 4
141
141
  summary: Scrapes ArcGIS data from MapServer REST API