geocode_records 0.1.5 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/CHANGELOG +13 -0
- data/README.md +3 -27
- data/geocode_records.gemspec +4 -9
- data/lib/geocode_records/dump_sql_to_csv.rb +32 -12
- data/lib/geocode_records/geocode_csv.rb +57 -73
- data/lib/geocode_records/update_table_from_csv.rb +120 -26
- data/lib/geocode_records/version.rb +1 -1
- data/lib/geocode_records.rb +72 -54
- data/package.json +1 -1
- data/spec/geocode_records_spec.rb +27 -74
- data/spec/spec_helper.rb +59 -0
- metadata +14 -73
- data/lib/geocode_records/smarty_streets.rb +0 -38
- data/spec/geocode_records/smarty_streets_spec.rb +0 -39
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1cfba62c3b057b69bc8180fe3d09d7fa0a09c931
|
4
|
+
data.tar.gz: 9ab706ef30253318866feb80a9d2fa5ee947ace7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2a99ba57d347697a74160599861696897b564bc8f528dcccab87eccb0c5127a9743380c769e15ef66a6d94fb9dea693a526a1cd97ac86183169d778a5b3a254b
|
7
|
+
data.tar.gz: c10936fe5170800b77bc186099666a9dadd3e909074c9af39956519b90a1d5f864d929cbac964047ddbbfc7def9ad05bb149c7cdff66f5bc5f12fcb44437f665
|
data/.gitignore
CHANGED
data/CHANGELOG
CHANGED
@@ -1,3 +1,16 @@
|
|
1
|
+
1.0.0 / 2018-01-16
|
2
|
+
|
3
|
+
* Breaking changes
|
4
|
+
|
5
|
+
* Totally change the API
|
6
|
+
* Require smartystreets 1.8.2
|
7
|
+
* Require pgloader to be in your path
|
8
|
+
* Require psql to be in your path
|
9
|
+
|
10
|
+
* Enhancements
|
11
|
+
|
12
|
+
* Way faster for big files
|
13
|
+
|
1
14
|
0.1.5 / 2017-10-15
|
2
15
|
|
3
16
|
* Enhancements
|
data/README.md
CHANGED
@@ -1,31 +1,7 @@
|
|
1
1
|
# GeocodeRecords
|
2
2
|
|
3
|
-
|
3
|
+
As long as you do very specific things... quickly re-geocode tables.
|
4
4
|
|
5
|
-
##
|
5
|
+
## Copyright
|
6
6
|
|
7
|
-
|
8
|
-
|
9
|
-
```ruby
|
10
|
-
gem 'geocode_records'
|
11
|
-
```
|
12
|
-
|
13
|
-
And then execute:
|
14
|
-
|
15
|
-
$ bundle
|
16
|
-
|
17
|
-
Or install it yourself as:
|
18
|
-
|
19
|
-
$ gem install geocode_records
|
20
|
-
|
21
|
-
## Usage
|
22
|
-
|
23
|
-
TODO: Write usage instructions here
|
24
|
-
|
25
|
-
## Contributing
|
26
|
-
|
27
|
-
1. Fork it ( https://github.com/[my-github-username]/geocode_records/fork )
|
28
|
-
2. Create your feature branch (`git checkout -b my-new-feature`)
|
29
|
-
3. Commit your changes (`git commit -am 'Add some feature'`)
|
30
|
-
4. Push to the branch (`git push origin my-new-feature`)
|
31
|
-
5. Create a new Pull Request
|
7
|
+
Copyright 2018 Seamus Abshere
|
data/geocode_records.gemspec
CHANGED
@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
|
|
9
9
|
spec.authors = ["Seamus Abshere"]
|
10
10
|
spec.email = ["seamus@abshere.net"]
|
11
11
|
spec.summary = %q{Geocode an ActiveRecord::Relation with node_smartystreets}
|
12
|
-
spec.description = %q{A quick way to re-geocode a table}
|
12
|
+
spec.description = %q{A quick way to re-geocode a table. Requires 2 binaries, so YMMV.}
|
13
13
|
spec.homepage = "https://github.com/seamusabshere/geocode_records"
|
14
14
|
spec.license = "MIT"
|
15
15
|
|
@@ -17,18 +17,13 @@ Gem::Specification.new do |spec|
|
|
17
17
|
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
18
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
19
|
spec.require_paths = ["lib"]
|
20
|
-
|
21
|
-
spec.add_runtime_dependency 'activerecord', '>=4.1.9'
|
20
|
+
|
22
21
|
spec.add_runtime_dependency 'activesupport'
|
23
|
-
spec.add_runtime_dependency 'pg'
|
24
|
-
spec.add_runtime_dependency 'attr_extras'
|
25
|
-
spec.add_runtime_dependency 'zaru'
|
26
|
-
spec.add_runtime_dependency 'upsert'
|
27
|
-
spec.add_runtime_dependency 'pasqual'
|
28
22
|
|
23
|
+
spec.add_development_dependency 'activerecord', '>=4.1.9'
|
24
|
+
spec.add_development_dependency 'pg', '~>0.21'
|
29
25
|
spec.add_development_dependency "bundler", "~> 1.7"
|
30
26
|
spec.add_development_dependency "rake", "~> 10.0"
|
31
27
|
spec.add_development_dependency "rspec"
|
32
28
|
spec.add_development_dependency "pry"
|
33
|
-
|
34
29
|
end
|
@@ -1,18 +1,38 @@
|
|
1
|
-
class
|
2
|
-
|
3
|
-
|
1
|
+
class GeocodeRecords
|
2
|
+
class DumpSqlToCsv
|
3
|
+
attr_reader :database_url
|
4
|
+
attr_reader :glob
|
5
|
+
attr_reader :table_name
|
6
|
+
attr_reader :subquery
|
4
7
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
8
|
+
def initialize(
|
9
|
+
database_url:,
|
10
|
+
glob:,
|
11
|
+
table_name: nil,
|
12
|
+
subquery: nil)
|
13
|
+
@database_url = database_url
|
14
|
+
@glob = glob
|
15
|
+
@table_name = table_name
|
16
|
+
@subquery = subquery
|
17
|
+
end
|
9
18
|
|
10
|
-
|
11
|
-
|
19
|
+
def perform
|
20
|
+
memo = GeocodeRecords.new_tmp_path(subquery || table_name)
|
21
|
+
GeocodeRecords.run_sql(
|
22
|
+
database_url,
|
23
|
+
"\\copy (#{sql}) TO '#{memo}' DELIMITER ',' CSV HEADER"
|
24
|
+
)
|
25
|
+
memo
|
26
|
+
end
|
12
27
|
|
13
|
-
|
28
|
+
private
|
14
29
|
|
15
|
-
|
30
|
+
def sql
|
31
|
+
@sql ||= unless glob
|
32
|
+
"SELECT id, house_number_and_street, city, state, regexp_replace(postcode, '.0$', '') AS postcode FROM #{subquery ? "(#{subquery}) t1" : table_name} WHERE city IS NOT NULL OR postcode IS NOT NULL"
|
33
|
+
else
|
34
|
+
"SELECT id, glob FROM #{subquery ? "(#{subquery}) t1" : table_name} WHERE (city IS NULL AND postcode IS NULL) AND glob IS NOT NULL"
|
35
|
+
end
|
36
|
+
end
|
16
37
|
end
|
17
|
-
|
18
38
|
end
|
@@ -1,41 +1,62 @@
|
|
1
|
-
require '
|
2
|
-
require 'fileutils'
|
3
|
-
require 'csv'
|
4
|
-
require 'shellwords'
|
5
|
-
require 'zaru'
|
1
|
+
require 'json'
|
6
2
|
|
7
|
-
require 'geocode_records/smarty_streets'
|
8
|
-
|
9
|
-
# copied from hotdog/app/services/file_geocoder.rb with seamus variations
|
10
3
|
class GeocodeRecords
|
11
4
|
class GeocodeCsv
|
5
|
+
attr_reader :path
|
12
6
|
attr_reader :glob
|
13
7
|
attr_reader :include_invalid
|
14
8
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
9
|
+
REQUIRED_SMARTYSTREETS_VERSION = Gem::Version.new('1.8.2')
|
10
|
+
COLUMN_DEFINITION = {
|
11
|
+
delivery_line_1: true,
|
12
|
+
components: {
|
13
|
+
primary_number: true,
|
14
|
+
secondary_number: true,
|
15
|
+
city_name: true,
|
16
|
+
default_city_name: true,
|
17
|
+
state_abbreviation: true,
|
18
|
+
zipcode: true
|
19
|
+
},
|
20
|
+
metadata: {
|
21
|
+
latitude: true,
|
22
|
+
longitude: true
|
23
|
+
}
|
24
|
+
}
|
25
|
+
|
26
|
+
def initialize(
|
27
|
+
path:,
|
28
|
+
glob:,
|
29
|
+
include_invalid:
|
30
|
+
)
|
31
|
+
@path = path
|
32
|
+
@glob = glob
|
33
|
+
@include_invalid = include_invalid
|
21
34
|
end
|
22
35
|
|
23
|
-
def
|
24
|
-
return
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
36
|
+
def perform
|
37
|
+
return unless File.size(path) > 32
|
38
|
+
memo = GeocodeRecords.new_tmp_path File.basename("geocoded-#{path}")
|
39
|
+
args = [
|
40
|
+
smartystreets_bin_path,
|
41
|
+
'-i', path,
|
42
|
+
'-o', memo,
|
43
|
+
'--quiet',
|
44
|
+
'--auth-id', ENV.fetch('SMARTY_STREETS_AUTH_ID'),
|
45
|
+
'--auth-token', ENV.fetch('SMARTY_STREETS_AUTH_TOKEN'),
|
46
|
+
'--column-definition', JSON.dump(COLUMN_DEFINITION),
|
47
|
+
]
|
48
|
+
if include_invalid
|
49
|
+
args += [ '--include-invalid' ]
|
30
50
|
end
|
51
|
+
input_map.each do |ss, local|
|
52
|
+
args += [ "--#{ss}-col", local.to_s ]
|
53
|
+
end
|
54
|
+
GeocodeRecords.system(*args)
|
55
|
+
memo
|
31
56
|
end
|
32
57
|
|
33
58
|
private
|
34
59
|
|
35
|
-
attr_private :input_path
|
36
|
-
attr_private :geocoded_path
|
37
|
-
attr_private :recoded_path
|
38
|
-
|
39
60
|
def input_map
|
40
61
|
@input_map ||= if glob
|
41
62
|
{ 'street' => 'glob' }
|
@@ -47,55 +68,18 @@ class GeocodeRecords
|
|
47
68
|
end
|
48
69
|
end
|
49
70
|
|
50
|
-
def
|
51
|
-
@
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
]
|
58
|
-
if include_invalid
|
59
|
-
args += [ '--include-invalid' ]
|
60
|
-
end
|
61
|
-
input_map.each do |ss, local|
|
62
|
-
args += [ "--#{ss}-col", local.to_s ]
|
63
|
-
end
|
64
|
-
SmartyStreets.run *args
|
65
|
-
raise "Geocoding failed on #{input_path.inspect} with args #{Shellwords.join(args)}" unless $?.success?
|
66
|
-
end
|
67
|
-
|
68
|
-
def recode
|
69
|
-
@recoded_path = Dir::Tmpname.create(Zaru.sanitize!(input_path + '.recode')) {}
|
70
|
-
File.open(@recoded_path, 'w') do |f|
|
71
|
-
f.write output_columns.to_csv
|
72
|
-
CSV.foreach(@geocoded_path, headers: true) do |geocoded_row|
|
73
|
-
f.write recode_columns.map { |k| geocoded_row[k] }.to_csv
|
71
|
+
def smartystreets_bin_path
|
72
|
+
@smartystreets_bin_path ||= begin
|
73
|
+
memo = [
|
74
|
+
'node_modules/.bin/smartystreets',
|
75
|
+
`which smartystreets`.chomp
|
76
|
+
].compact.detect do |path|
|
77
|
+
File.exist? path
|
74
78
|
end
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
def output_columns
|
80
|
-
@output_columns ||= (File.open(input_path) { |f| CSV.parse_line(f.gets) } + RECODE_MAP.keys).uniq
|
81
|
-
end
|
82
|
-
|
83
|
-
# no street yet - street_name, street_suffix
|
84
|
-
RECODE_MAP = {
|
85
|
-
'house_number_and_street' => 'ss_delivery_line_1',
|
86
|
-
'house_number' => 'ss_primary_number',
|
87
|
-
'unit_number' => 'ss_secondary_number',
|
88
|
-
'city' => 'ss_city_name',
|
89
|
-
'state' => 'ss_state_abbreviation',
|
90
|
-
'postcode' => 'ss_zipcode',
|
91
|
-
'latitude' => 'ss_latitude',
|
92
|
-
'longitude' => 'ss_longitude',
|
93
|
-
'default_city' => 'ss_default_city_name',
|
94
|
-
}.freeze
|
95
|
-
|
96
|
-
def recode_columns
|
97
|
-
@recode_columns ||= output_columns.map do |output_k|
|
98
|
-
RECODE_MAP[output_k] || output_k
|
79
|
+
raise "can't find smartystreets bin" unless memo
|
80
|
+
version = Gem::Version.new `#{memo} -V`.chomp
|
81
|
+
raise "smartystreets #{version} too old" unless version >= REQUIRED_SMARTYSTREETS_VERSION
|
82
|
+
memo
|
99
83
|
end
|
100
84
|
end
|
101
85
|
end
|
@@ -1,36 +1,130 @@
|
|
1
1
|
require 'csv'
|
2
|
-
require 'upsert'
|
3
2
|
|
4
3
|
class GeocodeRecords
|
5
4
|
class UpdateTableFromCsv
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
5
|
+
CREATE_TABLE_SQL = (<<-SQL).gsub(' ', '').freeze
|
6
|
+
CREATE TABLE $TMP_TABLE_NAME (
|
7
|
+
id uuid primary key,
|
8
|
+
ss_delivery_line_1 text,
|
9
|
+
ss_primary_number text,
|
10
|
+
ss_secondary_number text,
|
11
|
+
ss_city_name text,
|
12
|
+
ss_state_abbreviation text,
|
13
|
+
ss_zipcode text,
|
14
|
+
ss_latitude float,
|
15
|
+
ss_longitude float,
|
16
|
+
ss_default_city_name text
|
17
|
+
)
|
18
|
+
SQL
|
19
|
+
|
20
|
+
PGLOADER_CONFIG = <<-SQL
|
21
|
+
LOAD CSV
|
22
|
+
FROM '$PATH'
|
23
|
+
(
|
24
|
+
$INPUT_COLUMNS
|
25
|
+
)
|
26
|
+
INTO $DATABASE_URL?$TMP_TABLE_NAME
|
27
|
+
(
|
28
|
+
id,
|
29
|
+
ss_delivery_line_1,
|
30
|
+
ss_primary_number,
|
31
|
+
ss_secondary_number,
|
32
|
+
ss_city_name,
|
33
|
+
ss_state_abbreviation,
|
34
|
+
ss_zipcode,
|
35
|
+
ss_latitude,
|
36
|
+
ss_longitude,
|
37
|
+
ss_default_city_name
|
38
|
+
)
|
39
|
+
WITH
|
40
|
+
skip header = 1,
|
41
|
+
fields optionally enclosed by '"',
|
42
|
+
fields escaped by double-quote,
|
43
|
+
fields terminated by ','
|
44
|
+
SET client_encoding to 'utf8';
|
45
|
+
SQL
|
46
|
+
|
47
|
+
UPDATE_TABLE_SQL = (<<-SQL).gsub(' ', '').freeze
|
48
|
+
UPDATE $TABLE_NAME AS target
|
49
|
+
SET
|
50
|
+
house_number_and_street = src.ss_delivery_line_1,
|
51
|
+
house_number = CASE WHEN LENGTH(src.ss_primary_number) > 7 THEN NULL ELSE src.ss_primary_number::int END,
|
52
|
+
unit_number = src.ss_secondary_number,
|
53
|
+
city = COALESCE(src.ss_default_city_name, src.ss_city_name),
|
54
|
+
state = src.ss_state_abbreviation,
|
55
|
+
postcode = src.ss_zipcode,
|
56
|
+
latitude = src.ss_latitude,
|
57
|
+
longitude = src.ss_longitude
|
58
|
+
FROM $TMP_TABLE_NAME AS src
|
59
|
+
WHERE
|
60
|
+
target.id = src.id
|
61
|
+
AND src.ss_zipcode IS NOT NULL
|
62
|
+
SQL
|
63
|
+
|
64
|
+
attr_reader :database_url
|
65
|
+
attr_reader :table_name
|
66
|
+
attr_reader :path
|
67
|
+
|
68
|
+
def initialize(
|
69
|
+
database_url:,
|
70
|
+
table_name:,
|
71
|
+
path:
|
72
|
+
)
|
73
|
+
@database_url = database_url
|
74
|
+
@table_name = table_name
|
75
|
+
@path = path
|
14
76
|
end
|
77
|
+
|
15
78
|
def perform
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
79
|
+
return unless File.size(path) > 32
|
80
|
+
tmp_table_name = create_tmp_table
|
81
|
+
begin
|
82
|
+
load_csv_into_tmp_table tmp_table_name
|
83
|
+
update_original_table tmp_table_name
|
84
|
+
ensure
|
85
|
+
delete_tmp_table tmp_table_name
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
def create_tmp_table
|
90
|
+
memo = "geocode_records_#{table_name}_#{rand(999999)}"
|
91
|
+
GeocodeRecords.run_sql(
|
92
|
+
database_url,
|
93
|
+
CREATE_TABLE_SQL.sub('$TMP_TABLE_NAME', memo)
|
94
|
+
)
|
95
|
+
memo
|
96
|
+
end
|
97
|
+
|
98
|
+
def load_csv_into_tmp_table(tmp_table_name)
|
99
|
+
pg_loader_config_path = GeocodeRecords.new_tmp_path('pgloader')
|
100
|
+
File.open(pg_loader_config_path, 'w') { |f| f.write PGLOADER_CONFIG.sub('$INPUT_COLUMNS', input_columns.join(',')).sub('$DATABASE_URL', database_url).sub('$TMP_TABLE_NAME', tmp_table_name).sub('$PATH', path) }
|
101
|
+
GeocodeRecords.system(
|
102
|
+
'pgloader',
|
103
|
+
# '--debug',
|
104
|
+
'--quiet',
|
105
|
+
pg_loader_config_path
|
106
|
+
)
|
107
|
+
File.unlink pg_loader_config_path
|
108
|
+
end
|
109
|
+
|
110
|
+
def update_original_table(tmp_table_name)
|
111
|
+
GeocodeRecords.run_sql(
|
112
|
+
database_url,
|
113
|
+
UPDATE_TABLE_SQL.sub('$TMP_TABLE_NAME', tmp_table_name).sub('$TABLE_NAME', table_name)
|
114
|
+
)
|
115
|
+
end
|
116
|
+
|
117
|
+
def delete_tmp_table(tmp_table_name)
|
118
|
+
GeocodeRecords.run_sql(
|
119
|
+
database_url,
|
120
|
+
"DROP TABLE IF EXISTS #{tmp_table_name}"
|
121
|
+
)
|
122
|
+
end
|
123
|
+
|
124
|
+
def input_columns
|
125
|
+
CSV.parse_line(File.open(path) { |f| f.gets }).map do |col|
|
126
|
+
"#{col} [NULL IF BLANKS]"
|
32
127
|
end
|
33
|
-
count
|
34
128
|
end
|
35
129
|
end
|
36
130
|
end
|
data/lib/geocode_records.rb
CHANGED
@@ -1,77 +1,95 @@
|
|
1
|
-
require 'active_record'
|
2
1
|
require 'active_support'
|
3
2
|
require 'active_support/core_ext'
|
4
|
-
require '
|
5
|
-
require '
|
3
|
+
require 'tmpdir'
|
4
|
+
require 'shellwords'
|
5
|
+
require 'fileutils'
|
6
6
|
|
7
7
|
require_relative 'geocode_records/version'
|
8
8
|
require_relative 'geocode_records/dump_sql_to_csv'
|
9
9
|
require_relative 'geocode_records/geocode_csv'
|
10
10
|
require_relative 'geocode_records/update_table_from_csv'
|
11
|
-
require_relative 'geocode_records/smarty_streets'
|
12
11
|
|
13
12
|
class GeocodeRecords
|
13
|
+
class << self
|
14
|
+
def new_tmp_path(hint)
|
15
|
+
Dir::Tmpname.create(hint[0,64].delete('"').gsub(/\W/,'_').squeeze) {}
|
16
|
+
end
|
14
17
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
def perform
|
24
|
-
SmartyStreets.check_compatible!
|
18
|
+
def system(*args)
|
19
|
+
result = Kernel.system(*args)
|
20
|
+
unless result
|
21
|
+
raise "failed command:\n#{Shellwords.join args}"
|
22
|
+
end
|
23
|
+
nil
|
24
|
+
end
|
25
25
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
26
|
+
def run_sql(database_url, sql)
|
27
|
+
system(
|
28
|
+
'psql',
|
29
|
+
database_url,
|
30
|
+
'-v', 'ON_ERROR_STOP=on',
|
31
|
+
# '--echo-all',
|
32
|
+
'--quiet',
|
33
|
+
'--no-psqlrc',
|
34
|
+
'--pset', 'pager=off',
|
35
|
+
'--command', sql
|
36
|
+
)
|
34
37
|
end
|
35
38
|
end
|
36
39
|
|
37
|
-
|
40
|
+
attr_reader :database_url
|
41
|
+
attr_reader :table_name
|
38
42
|
|
39
|
-
|
40
|
-
|
41
|
-
|
43
|
+
# optional
|
44
|
+
attr_reader :include_invalid
|
45
|
+
attr_reader :subquery
|
42
46
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
47
|
+
def initialize(
|
48
|
+
database_url:,
|
49
|
+
table_name:,
|
50
|
+
subquery: nil,
|
51
|
+
include_invalid: nil
|
52
|
+
)
|
53
|
+
@database_url = database_url
|
54
|
+
@table_name = table_name
|
55
|
+
@subquery = subquery
|
56
|
+
@include_invalid = include_invalid
|
48
57
|
end
|
49
|
-
|
50
|
-
def
|
51
|
-
|
52
|
-
|
53
|
-
if glob
|
54
|
-
c.to_sql records.select('id', 'glob').where.not(glob: nil).arel, records.bind_values
|
55
|
-
else
|
56
|
-
c.to_sql records.select('id', 'house_number_and_street', 'house_number', 'unit_number', 'city', 'state', "regexp_replace(postcode, '.0$', '') AS postcode").where('city IS NOT NULL OR postcode IS NOT NULL').arel, records.bind_values
|
57
|
-
end
|
58
|
-
end
|
58
|
+
|
59
|
+
def perform
|
60
|
+
geocode glob: false
|
61
|
+
geocode glob: true
|
59
62
|
end
|
60
63
|
|
61
|
-
|
62
|
-
records.connection
|
63
|
-
end
|
64
|
+
private
|
64
65
|
|
65
|
-
def
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
66
|
+
def geocode(glob:)
|
67
|
+
ungeocoded_path = nil
|
68
|
+
geocoded_path = nil
|
69
|
+
begin
|
70
|
+
ungeocoded_path = DumpSqlToCsv.new(
|
71
|
+
database_url: database_url,
|
72
|
+
table_name: table_name,
|
73
|
+
subquery: subquery,
|
74
|
+
glob: glob
|
75
|
+
).perform
|
76
|
+
unless File.size(ungeocoded_path) > 32
|
77
|
+
$stderr.puts "No records found for #{table_name} #{subquery}, skipping"
|
78
|
+
return
|
79
|
+
end
|
80
|
+
geocoded_path = GeocodeCsv.new(
|
81
|
+
path: ungeocoded_path,
|
82
|
+
glob: glob,
|
83
|
+
include_invalid: include_invalid
|
84
|
+
).perform
|
85
|
+
UpdateTableFromCsv.new(
|
86
|
+
database_url: database_url,
|
87
|
+
table_name: table_name,
|
88
|
+
path: geocoded_path
|
89
|
+
).perform
|
90
|
+
ensure
|
91
|
+
FileUtils.rm_f geocoded_path if geocoded_path
|
92
|
+
FileUtils.rm_f ungeocoded_path if ungeocoded_path
|
71
93
|
end
|
72
94
|
end
|
73
|
-
|
74
|
-
def pasqual
|
75
|
-
@pasqual ||= Pasqual.for ENV.fetch('DATABASE_URL')
|
76
|
-
end
|
77
95
|
end
|
data/package.json
CHANGED
@@ -1,132 +1,85 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
|
-
dbname = 'geocode_records_test'
|
4
|
-
ENV['DATABASE_URL'] = "postgresql://127.0.0.1/#{dbname}"
|
5
|
-
|
6
|
-
unless ENV['FAST'] == 'true'
|
7
|
-
psql = Pasqual.for ENV['DATABASE_URL']
|
8
|
-
psql.dropdb rescue nil
|
9
|
-
psql.createdb
|
10
|
-
psql.command 'CREATE EXTENSION postgis'
|
11
|
-
psql.command <<-SQL
|
12
|
-
CREATE TABLE homes (
|
13
|
-
id serial primary key,
|
14
|
-
the_geom geometry(Geometry,4326),
|
15
|
-
the_geom_webmercator geometry(Geometry,3857),
|
16
|
-
house_number_and_street text,
|
17
|
-
house_number int,
|
18
|
-
unit_number text,
|
19
|
-
city text,
|
20
|
-
state text,
|
21
|
-
postcode text,
|
22
|
-
latitude float,
|
23
|
-
longitude float
|
24
|
-
)
|
25
|
-
SQL
|
26
|
-
psql.command <<-SQL
|
27
|
-
CREATE TABLE glob_homes (
|
28
|
-
id serial primary key,
|
29
|
-
glob text,
|
30
|
-
the_geom geometry(Geometry,4326),
|
31
|
-
the_geom_webmercator geometry(Geometry,3857),
|
32
|
-
house_number_and_street text,
|
33
|
-
house_number int,
|
34
|
-
unit_number text,
|
35
|
-
city text,
|
36
|
-
state text,
|
37
|
-
postcode text,
|
38
|
-
latitude float,
|
39
|
-
longitude float
|
40
|
-
)
|
41
|
-
SQL
|
42
|
-
end
|
43
|
-
|
44
|
-
require 'active_record'
|
45
|
-
ActiveRecord::Base.establish_connection
|
46
|
-
|
47
|
-
require 'logger'
|
48
|
-
require 'fileutils'
|
49
|
-
FileUtils.mkdir_p 'log'
|
50
|
-
logger = Logger.new 'log/test.log'
|
51
|
-
ActiveRecord::Base.logger = logger
|
52
|
-
|
53
|
-
class Home < ActiveRecord::Base
|
54
|
-
end
|
55
|
-
class GlobHome < ActiveRecord::Base
|
56
|
-
end
|
57
|
-
|
58
3
|
describe GeocodeRecords do
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
it "geocodes an AR::Relation" do
|
4
|
+
subject { GeocodeRecords.new(database_url: ENV.fetch('DATABASE_URL'), table_name: 'homes').perform }
|
5
|
+
|
6
|
+
it "geocodes" do
|
64
7
|
home = Home.create! house_number_and_street: '1038 e deyton st', postcode: '53703'
|
65
|
-
|
8
|
+
subject
|
66
9
|
home.reload
|
67
10
|
expect(home.house_number_and_street).to eq('1038 E Dayton St')
|
68
11
|
end
|
69
12
|
|
70
|
-
it "geocodes
|
71
|
-
home =
|
72
|
-
|
13
|
+
it "geocodes glob" do
|
14
|
+
home = Home.create! glob: '1038 e dayton st, madison, wi 53703'
|
15
|
+
subject
|
73
16
|
home.reload
|
74
17
|
expect(home.house_number_and_street).to eq('1038 E Dayton St')
|
75
18
|
expect(home.postcode).to eq('53703')
|
76
19
|
end
|
77
20
|
|
21
|
+
it "geocodes by sql" do
|
22
|
+
home = Home.create! house_number_and_street: '1038 e deyton st', postcode: '53703', foo: 'bar'
|
23
|
+
home_ignored = Home.create! house_number_and_street: '1038 e deyton st', postcode: '53703'
|
24
|
+
GeocodeRecords.new(database_url: ENV.fetch('DATABASE_URL'), table_name: 'homes', subquery: %{SELECT * FROM homes WHERE foo = 'bar'}).perform
|
25
|
+
home.reload
|
26
|
+
home_ignored.reload
|
27
|
+
expect(home.latitude).to be_present
|
28
|
+
expect(home_ignored.latitude).to be_nil
|
29
|
+
end
|
30
|
+
|
78
31
|
it "doesn't break on float-format postcode" do
|
79
32
|
home = Home.create! house_number_and_street: '1038 e deyton st', postcode: '53703.0'
|
80
|
-
|
33
|
+
subject
|
81
34
|
home.reload
|
82
35
|
expect(home.house_number_and_street).to eq('1038 E Dayton St')
|
83
36
|
end
|
84
37
|
|
85
38
|
it "doesn't break on unzeropadded postcode" do
|
86
39
|
home = Home.create! house_number_and_street: '36 main st', postcode: '5753'
|
87
|
-
|
40
|
+
subject
|
88
41
|
home.reload
|
89
42
|
expect(home.house_number_and_street).to eq('36 Main St')
|
90
43
|
end
|
91
44
|
|
92
45
|
it "doesn't break on unzeropadded float-format postcode" do
|
93
46
|
home = Home.create! house_number_and_street: '36 main st', postcode: '5753.0'
|
94
|
-
|
47
|
+
subject
|
95
48
|
home.reload
|
96
49
|
expect(home.house_number_and_street).to eq('36 Main St')
|
97
50
|
end
|
98
51
|
|
99
52
|
it "doesn't break on zip-4" do
|
100
53
|
home = Home.create! house_number_and_street: '1038 e dayton st', postcode: '53703-2428'
|
101
|
-
|
54
|
+
subject
|
102
55
|
home.reload
|
103
56
|
expect(home.house_number_and_street).to eq('1038 E Dayton St')
|
104
57
|
end
|
105
58
|
|
106
59
|
it "accepts city and state only" do
|
107
60
|
home = Home.create! house_number_and_street: '1038 e dayton st', city: 'madison', state: 'wisconsin'
|
108
|
-
|
61
|
+
subject
|
109
62
|
home.reload
|
110
63
|
expect(home.house_number_and_street).to eq('1038 E Dayton St')
|
111
64
|
end
|
112
65
|
|
113
66
|
it "allows invalid" do
|
114
67
|
home = Home.create! house_number_and_street: '1039 e dayton st', city: 'madison', state: 'wisconsin'
|
115
|
-
GeocodeRecords.new(
|
68
|
+
GeocodeRecords.new(database_url: ENV.fetch('DATABASE_URL'), table_name: 'homes', include_invalid: true).perform
|
116
69
|
home.reload
|
117
70
|
expect(home.house_number_and_street).to eq('1039 E Dayton St')
|
118
71
|
end
|
119
72
|
|
120
73
|
it "overwrites unit" do
|
121
74
|
home = Home.create! house_number_and_street: '123 n blount st apt 403', city: 'madison', state: 'wisconsin'
|
122
|
-
GeocodeRecords.new(
|
75
|
+
GeocodeRecords.new(database_url: ENV.fetch('DATABASE_URL'), table_name: 'homes', include_invalid: true).perform
|
123
76
|
home.reload
|
124
77
|
expect(home.house_number_and_street).to eq('123 N Blount St Unit 403')
|
125
78
|
end
|
126
79
|
|
127
80
|
it "overwrites city name with default_city_name" do
|
128
81
|
home = Home.create! house_number_and_street: '7333 Bay Bridge Rd', city: 'eastvale', state: 'ca'
|
129
|
-
GeocodeRecords.new(
|
82
|
+
GeocodeRecords.new(database_url: ENV.fetch('DATABASE_URL'), table_name: 'homes', include_invalid: true).perform
|
130
83
|
home.reload
|
131
84
|
expect(home.city).to eq('Corona')
|
132
85
|
end
|
@@ -134,7 +87,7 @@ describe GeocodeRecords do
|
|
134
87
|
describe 'known issues' do
|
135
88
|
it "doesn't fix float-format postcode on records that it can't geocode" do
|
136
89
|
home = Home.create! house_number_and_street: 'gibberish', postcode: '53703.0'
|
137
|
-
|
90
|
+
subject
|
138
91
|
home.reload
|
139
92
|
expect(home.house_number_and_street).to eq('gibberish')
|
140
93
|
expect(home.postcode).to eq('53703.0')
|
@@ -142,7 +95,7 @@ describe GeocodeRecords do
|
|
142
95
|
|
143
96
|
it "doesn't fix unzeropadded postcode on records that it can't geocode" do
|
144
97
|
home = Home.create! house_number_and_street: 'gibberish', postcode: '5753'
|
145
|
-
|
98
|
+
subject
|
146
99
|
home.reload
|
147
100
|
expect(home.house_number_and_street).to eq('gibberish')
|
148
101
|
expect(home.postcode).to eq('5753')
|
data/spec/spec_helper.rb
CHANGED
@@ -2,3 +2,62 @@ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
|
|
2
2
|
require 'geocode_records'
|
3
3
|
|
4
4
|
require 'pry'
|
5
|
+
|
6
|
+
dbname = 'geocode_records_test'
|
7
|
+
ENV['DATABASE_URL'] = "postgresql://127.0.0.1:#{ENV['PGPORT'] || 5432}/#{dbname}"
|
8
|
+
|
9
|
+
unless ENV['FAST'] == 'true'
|
10
|
+
GeocodeRecords.system('createdb', ENV.fetch('DATABASE_URL')) rescue nil
|
11
|
+
GeocodeRecords.run_sql(
|
12
|
+
ENV.fetch('DATABASE_URL'),
|
13
|
+
'CREATE EXTENSION IF NOT EXISTS postgis'
|
14
|
+
)
|
15
|
+
GeocodeRecords.run_sql(
|
16
|
+
ENV.fetch('DATABASE_URL'),
|
17
|
+
'DROP TABLE IF EXISTS homes'
|
18
|
+
)
|
19
|
+
sql = <<-SQL
|
20
|
+
CREATE TABLE homes (
|
21
|
+
id uuid primary key,
|
22
|
+
the_geom geometry(Geometry,4326),
|
23
|
+
the_geom_webmercator geometry(Geometry,3857),
|
24
|
+
glob text,
|
25
|
+
house_number_and_street text,
|
26
|
+
house_number int,
|
27
|
+
unit_number text,
|
28
|
+
city text,
|
29
|
+
state text,
|
30
|
+
postcode text,
|
31
|
+
latitude float,
|
32
|
+
longitude float,
|
33
|
+
foo text
|
34
|
+
)
|
35
|
+
SQL
|
36
|
+
GeocodeRecords.run_sql(
|
37
|
+
ENV.fetch('DATABASE_URL'),
|
38
|
+
sql
|
39
|
+
)
|
40
|
+
end
|
41
|
+
|
42
|
+
require 'active_record'
|
43
|
+
ActiveRecord::Base.establish_connection
|
44
|
+
|
45
|
+
require 'logger'
|
46
|
+
require 'fileutils'
|
47
|
+
FileUtils.mkdir_p 'log'
|
48
|
+
logger = Logger.new 'log/test.log'
|
49
|
+
ActiveRecord::Base.logger = logger
|
50
|
+
|
51
|
+
require 'securerandom'
|
52
|
+
class Home < ActiveRecord::Base
|
53
|
+
self.primary_key = 'id'
|
54
|
+
before_create do
|
55
|
+
self.id ||= SecureRandom.uuid
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
RSpec.configure do |config|
|
60
|
+
config.before :each do |example|
|
61
|
+
Home.delete_all
|
62
|
+
end
|
63
|
+
end
|
metadata
CHANGED
@@ -1,29 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: geocode_records
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Seamus Abshere
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-01-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
-
- !ruby/object:Gem::Dependency
|
14
|
-
name: activerecord
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - ">="
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: 4.1.9
|
20
|
-
type: :runtime
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - ">="
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: 4.1.9
|
27
13
|
- !ruby/object:Gem::Dependency
|
28
14
|
name: activesupport
|
29
15
|
requirement: !ruby/object:Gem::Requirement
|
@@ -39,75 +25,33 @@ dependencies:
|
|
39
25
|
- !ruby/object:Gem::Version
|
40
26
|
version: '0'
|
41
27
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
43
|
-
requirement: !ruby/object:Gem::Requirement
|
44
|
-
requirements:
|
45
|
-
- - ">="
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version: '0'
|
48
|
-
type: :runtime
|
49
|
-
prerelease: false
|
50
|
-
version_requirements: !ruby/object:Gem::Requirement
|
51
|
-
requirements:
|
52
|
-
- - ">="
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
version: '0'
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: attr_extras
|
57
|
-
requirement: !ruby/object:Gem::Requirement
|
58
|
-
requirements:
|
59
|
-
- - ">="
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
version: '0'
|
62
|
-
type: :runtime
|
63
|
-
prerelease: false
|
64
|
-
version_requirements: !ruby/object:Gem::Requirement
|
65
|
-
requirements:
|
66
|
-
- - ">="
|
67
|
-
- !ruby/object:Gem::Version
|
68
|
-
version: '0'
|
69
|
-
- !ruby/object:Gem::Dependency
|
70
|
-
name: zaru
|
71
|
-
requirement: !ruby/object:Gem::Requirement
|
72
|
-
requirements:
|
73
|
-
- - ">="
|
74
|
-
- !ruby/object:Gem::Version
|
75
|
-
version: '0'
|
76
|
-
type: :runtime
|
77
|
-
prerelease: false
|
78
|
-
version_requirements: !ruby/object:Gem::Requirement
|
79
|
-
requirements:
|
80
|
-
- - ">="
|
81
|
-
- !ruby/object:Gem::Version
|
82
|
-
version: '0'
|
83
|
-
- !ruby/object:Gem::Dependency
|
84
|
-
name: upsert
|
28
|
+
name: activerecord
|
85
29
|
requirement: !ruby/object:Gem::Requirement
|
86
30
|
requirements:
|
87
31
|
- - ">="
|
88
32
|
- !ruby/object:Gem::Version
|
89
|
-
version:
|
90
|
-
type: :
|
33
|
+
version: 4.1.9
|
34
|
+
type: :development
|
91
35
|
prerelease: false
|
92
36
|
version_requirements: !ruby/object:Gem::Requirement
|
93
37
|
requirements:
|
94
38
|
- - ">="
|
95
39
|
- !ruby/object:Gem::Version
|
96
|
-
version:
|
40
|
+
version: 4.1.9
|
97
41
|
- !ruby/object:Gem::Dependency
|
98
|
-
name:
|
42
|
+
name: pg
|
99
43
|
requirement: !ruby/object:Gem::Requirement
|
100
44
|
requirements:
|
101
|
-
- - "
|
45
|
+
- - "~>"
|
102
46
|
- !ruby/object:Gem::Version
|
103
|
-
version: '0'
|
104
|
-
type: :
|
47
|
+
version: '0.21'
|
48
|
+
type: :development
|
105
49
|
prerelease: false
|
106
50
|
version_requirements: !ruby/object:Gem::Requirement
|
107
51
|
requirements:
|
108
|
-
- - "
|
52
|
+
- - "~>"
|
109
53
|
- !ruby/object:Gem::Version
|
110
|
-
version: '0'
|
54
|
+
version: '0.21'
|
111
55
|
- !ruby/object:Gem::Dependency
|
112
56
|
name: bundler
|
113
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -164,7 +108,7 @@ dependencies:
|
|
164
108
|
- - ">="
|
165
109
|
- !ruby/object:Gem::Version
|
166
110
|
version: '0'
|
167
|
-
description: A quick way to re-geocode a table
|
111
|
+
description: A quick way to re-geocode a table. Requires 2 binaries, so YMMV.
|
168
112
|
email:
|
169
113
|
- seamus@abshere.net
|
170
114
|
executables:
|
@@ -185,11 +129,9 @@ files:
|
|
185
129
|
- lib/geocode_records.rb
|
186
130
|
- lib/geocode_records/dump_sql_to_csv.rb
|
187
131
|
- lib/geocode_records/geocode_csv.rb
|
188
|
-
- lib/geocode_records/smarty_streets.rb
|
189
132
|
- lib/geocode_records/update_table_from_csv.rb
|
190
133
|
- lib/geocode_records/version.rb
|
191
134
|
- package.json
|
192
|
-
- spec/geocode_records/smarty_streets_spec.rb
|
193
135
|
- spec/geocode_records_spec.rb
|
194
136
|
- spec/spec_helper.rb
|
195
137
|
homepage: https://github.com/seamusabshere/geocode_records
|
@@ -212,11 +154,10 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
212
154
|
version: '0'
|
213
155
|
requirements: []
|
214
156
|
rubyforge_project:
|
215
|
-
rubygems_version: 2.6.
|
157
|
+
rubygems_version: 2.6.13
|
216
158
|
signing_key:
|
217
159
|
specification_version: 4
|
218
160
|
summary: Geocode an ActiveRecord::Relation with node_smartystreets
|
219
161
|
test_files:
|
220
|
-
- spec/geocode_records/smarty_streets_spec.rb
|
221
162
|
- spec/geocode_records_spec.rb
|
222
163
|
- spec/spec_helper.rb
|
@@ -1,38 +0,0 @@
|
|
1
|
-
class GeocodeRecords
|
2
|
-
|
3
|
-
module SmartyStreets
|
4
|
-
|
5
|
-
VERSION = '1.7.2'
|
6
|
-
|
7
|
-
def self.bin_path
|
8
|
-
@bin_path ||= if File.exist?('node_modules/.bin/smartystreets')
|
9
|
-
'node_modules/.bin/smartystreets'
|
10
|
-
else
|
11
|
-
'smartystreets'
|
12
|
-
end
|
13
|
-
end
|
14
|
-
|
15
|
-
def self.check_compatible!
|
16
|
-
raise "smartystreets >= #{VERSION} is required" unless SmartyStreets.compatible?
|
17
|
-
end
|
18
|
-
|
19
|
-
def self.compatible?
|
20
|
-
output = run_with_output('-V')
|
21
|
-
current_version = Gem::Version.new output.chomp
|
22
|
-
min_version = Gem::Version.new VERSION
|
23
|
-
current_version >= min_version
|
24
|
-
end
|
25
|
-
|
26
|
-
def self.run(*args)
|
27
|
-
shargs = Shellwords.join(args)
|
28
|
-
system "#{bin_path} #{shargs}"
|
29
|
-
end
|
30
|
-
|
31
|
-
def self.run_with_output(*args)
|
32
|
-
shargs = Shellwords.join(args)
|
33
|
-
`#{bin_path} #{shargs}`
|
34
|
-
end
|
35
|
-
|
36
|
-
end
|
37
|
-
|
38
|
-
end
|
@@ -1,39 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
require 'geocode_records/smarty_streets'
|
4
|
-
|
5
|
-
describe GeocodeRecords::SmartyStreets do
|
6
|
-
|
7
|
-
describe '.bin_path' do
|
8
|
-
subject { described_class.bin_path }
|
9
|
-
|
10
|
-
it { is_expected.to eq 'node_modules/.bin/smartystreets' }
|
11
|
-
end
|
12
|
-
|
13
|
-
describe '.compatible?' do
|
14
|
-
before { allow(described_class).to receive(:run_with_output).and_return("#{version}\n") }
|
15
|
-
|
16
|
-
subject { described_class.compatible? }
|
17
|
-
|
18
|
-
context 'v1.3.1' do
|
19
|
-
let(:version) { '1.3.1' }
|
20
|
-
|
21
|
-
it { is_expected.to be false }
|
22
|
-
end
|
23
|
-
|
24
|
-
context 'v1.7.2' do
|
25
|
-
let(:version) { '1.7.2' }
|
26
|
-
|
27
|
-
it { is_expected.to be true }
|
28
|
-
end
|
29
|
-
|
30
|
-
end
|
31
|
-
|
32
|
-
describe '.run_with_output' do
|
33
|
-
subject { described_class.run_with_output '-V' }
|
34
|
-
|
35
|
-
it { is_expected.to match /\d+\.\d+.\d+/ }
|
36
|
-
end
|
37
|
-
|
38
|
-
end
|
39
|
-
|