geocode_records 0.1.5 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/CHANGELOG +13 -0
- data/README.md +3 -27
- data/geocode_records.gemspec +4 -9
- data/lib/geocode_records/dump_sql_to_csv.rb +32 -12
- data/lib/geocode_records/geocode_csv.rb +57 -73
- data/lib/geocode_records/update_table_from_csv.rb +120 -26
- data/lib/geocode_records/version.rb +1 -1
- data/lib/geocode_records.rb +72 -54
- data/package.json +1 -1
- data/spec/geocode_records_spec.rb +27 -74
- data/spec/spec_helper.rb +59 -0
- metadata +14 -73
- data/lib/geocode_records/smarty_streets.rb +0 -38
- data/spec/geocode_records/smarty_streets_spec.rb +0 -39
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1cfba62c3b057b69bc8180fe3d09d7fa0a09c931
|
4
|
+
data.tar.gz: 9ab706ef30253318866feb80a9d2fa5ee947ace7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2a99ba57d347697a74160599861696897b564bc8f528dcccab87eccb0c5127a9743380c769e15ef66a6d94fb9dea693a526a1cd97ac86183169d778a5b3a254b
|
7
|
+
data.tar.gz: c10936fe5170800b77bc186099666a9dadd3e909074c9af39956519b90a1d5f864d929cbac964047ddbbfc7def9ad05bb149c7cdff66f5bc5f12fcb44437f665
|
data/.gitignore
CHANGED
data/CHANGELOG
CHANGED
@@ -1,3 +1,16 @@
|
|
1
|
+
1.0.0 / 2018-01-16
|
2
|
+
|
3
|
+
* Breaking changes
|
4
|
+
|
5
|
+
* Totally change the API
|
6
|
+
* Require smartystreets 1.8.2
|
7
|
+
* Require pgloader to be in your path
|
8
|
+
* Require psql to be in your path
|
9
|
+
|
10
|
+
* Enhancements
|
11
|
+
|
12
|
+
* Way faster for big files
|
13
|
+
|
1
14
|
0.1.5 / 2017-10-15
|
2
15
|
|
3
16
|
* Enhancements
|
data/README.md
CHANGED
@@ -1,31 +1,7 @@
|
|
1
1
|
# GeocodeRecords
|
2
2
|
|
3
|
-
|
3
|
+
As long as you do very specific things... quickly re-geocode tables.
|
4
4
|
|
5
|
-
##
|
5
|
+
## Copyright
|
6
6
|
|
7
|
-
|
8
|
-
|
9
|
-
```ruby
|
10
|
-
gem 'geocode_records'
|
11
|
-
```
|
12
|
-
|
13
|
-
And then execute:
|
14
|
-
|
15
|
-
$ bundle
|
16
|
-
|
17
|
-
Or install it yourself as:
|
18
|
-
|
19
|
-
$ gem install geocode_records
|
20
|
-
|
21
|
-
## Usage
|
22
|
-
|
23
|
-
TODO: Write usage instructions here
|
24
|
-
|
25
|
-
## Contributing
|
26
|
-
|
27
|
-
1. Fork it ( https://github.com/[my-github-username]/geocode_records/fork )
|
28
|
-
2. Create your feature branch (`git checkout -b my-new-feature`)
|
29
|
-
3. Commit your changes (`git commit -am 'Add some feature'`)
|
30
|
-
4. Push to the branch (`git push origin my-new-feature`)
|
31
|
-
5. Create a new Pull Request
|
7
|
+
Copyright 2018 Seamus Abshere
|
data/geocode_records.gemspec
CHANGED
@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
|
|
9
9
|
spec.authors = ["Seamus Abshere"]
|
10
10
|
spec.email = ["seamus@abshere.net"]
|
11
11
|
spec.summary = %q{Geocode an ActiveRecord::Relation with node_smartystreets}
|
12
|
-
spec.description = %q{A quick way to re-geocode a table}
|
12
|
+
spec.description = %q{A quick way to re-geocode a table. Requires 2 binaries, so YMMV.}
|
13
13
|
spec.homepage = "https://github.com/seamusabshere/geocode_records"
|
14
14
|
spec.license = "MIT"
|
15
15
|
|
@@ -17,18 +17,13 @@ Gem::Specification.new do |spec|
|
|
17
17
|
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
18
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
19
|
spec.require_paths = ["lib"]
|
20
|
-
|
21
|
-
spec.add_runtime_dependency 'activerecord', '>=4.1.9'
|
20
|
+
|
22
21
|
spec.add_runtime_dependency 'activesupport'
|
23
|
-
spec.add_runtime_dependency 'pg'
|
24
|
-
spec.add_runtime_dependency 'attr_extras'
|
25
|
-
spec.add_runtime_dependency 'zaru'
|
26
|
-
spec.add_runtime_dependency 'upsert'
|
27
|
-
spec.add_runtime_dependency 'pasqual'
|
28
22
|
|
23
|
+
spec.add_development_dependency 'activerecord', '>=4.1.9'
|
24
|
+
spec.add_development_dependency 'pg', '~>0.21'
|
29
25
|
spec.add_development_dependency "bundler", "~> 1.7"
|
30
26
|
spec.add_development_dependency "rake", "~> 10.0"
|
31
27
|
spec.add_development_dependency "rspec"
|
32
28
|
spec.add_development_dependency "pry"
|
33
|
-
|
34
29
|
end
|
@@ -1,18 +1,38 @@
|
|
1
|
-
class
|
2
|
-
|
3
|
-
|
1
|
+
class GeocodeRecords
|
2
|
+
class DumpSqlToCsv
|
3
|
+
attr_reader :database_url
|
4
|
+
attr_reader :glob
|
5
|
+
attr_reader :table_name
|
6
|
+
attr_reader :subquery
|
4
7
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
8
|
+
def initialize(
|
9
|
+
database_url:,
|
10
|
+
glob:,
|
11
|
+
table_name: nil,
|
12
|
+
subquery: nil)
|
13
|
+
@database_url = database_url
|
14
|
+
@glob = glob
|
15
|
+
@table_name = table_name
|
16
|
+
@subquery = subquery
|
17
|
+
end
|
9
18
|
|
10
|
-
|
11
|
-
|
19
|
+
def perform
|
20
|
+
memo = GeocodeRecords.new_tmp_path(subquery || table_name)
|
21
|
+
GeocodeRecords.run_sql(
|
22
|
+
database_url,
|
23
|
+
"\\copy (#{sql}) TO '#{memo}' DELIMITER ',' CSV HEADER"
|
24
|
+
)
|
25
|
+
memo
|
26
|
+
end
|
12
27
|
|
13
|
-
|
28
|
+
private
|
14
29
|
|
15
|
-
|
30
|
+
def sql
|
31
|
+
@sql ||= unless glob
|
32
|
+
"SELECT id, house_number_and_street, city, state, regexp_replace(postcode, '.0$', '') AS postcode FROM #{subquery ? "(#{subquery}) t1" : table_name} WHERE city IS NOT NULL OR postcode IS NOT NULL"
|
33
|
+
else
|
34
|
+
"SELECT id, glob FROM #{subquery ? "(#{subquery}) t1" : table_name} WHERE (city IS NULL AND postcode IS NULL) AND glob IS NOT NULL"
|
35
|
+
end
|
36
|
+
end
|
16
37
|
end
|
17
|
-
|
18
38
|
end
|
@@ -1,41 +1,62 @@
|
|
1
|
-
require '
|
2
|
-
require 'fileutils'
|
3
|
-
require 'csv'
|
4
|
-
require 'shellwords'
|
5
|
-
require 'zaru'
|
1
|
+
require 'json'
|
6
2
|
|
7
|
-
require 'geocode_records/smarty_streets'
|
8
|
-
|
9
|
-
# copied from hotdog/app/services/file_geocoder.rb with seamus variations
|
10
3
|
class GeocodeRecords
|
11
4
|
class GeocodeCsv
|
5
|
+
attr_reader :path
|
12
6
|
attr_reader :glob
|
13
7
|
attr_reader :include_invalid
|
14
8
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
9
|
+
REQUIRED_SMARTYSTREETS_VERSION = Gem::Version.new('1.8.2')
|
10
|
+
COLUMN_DEFINITION = {
|
11
|
+
delivery_line_1: true,
|
12
|
+
components: {
|
13
|
+
primary_number: true,
|
14
|
+
secondary_number: true,
|
15
|
+
city_name: true,
|
16
|
+
default_city_name: true,
|
17
|
+
state_abbreviation: true,
|
18
|
+
zipcode: true
|
19
|
+
},
|
20
|
+
metadata: {
|
21
|
+
latitude: true,
|
22
|
+
longitude: true
|
23
|
+
}
|
24
|
+
}
|
25
|
+
|
26
|
+
def initialize(
|
27
|
+
path:,
|
28
|
+
glob:,
|
29
|
+
include_invalid:
|
30
|
+
)
|
31
|
+
@path = path
|
32
|
+
@glob = glob
|
33
|
+
@include_invalid = include_invalid
|
21
34
|
end
|
22
35
|
|
23
|
-
def
|
24
|
-
return
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
36
|
+
def perform
|
37
|
+
return unless File.size(path) > 32
|
38
|
+
memo = GeocodeRecords.new_tmp_path File.basename("geocoded-#{path}")
|
39
|
+
args = [
|
40
|
+
smartystreets_bin_path,
|
41
|
+
'-i', path,
|
42
|
+
'-o', memo,
|
43
|
+
'--quiet',
|
44
|
+
'--auth-id', ENV.fetch('SMARTY_STREETS_AUTH_ID'),
|
45
|
+
'--auth-token', ENV.fetch('SMARTY_STREETS_AUTH_TOKEN'),
|
46
|
+
'--column-definition', JSON.dump(COLUMN_DEFINITION),
|
47
|
+
]
|
48
|
+
if include_invalid
|
49
|
+
args += [ '--include-invalid' ]
|
30
50
|
end
|
51
|
+
input_map.each do |ss, local|
|
52
|
+
args += [ "--#{ss}-col", local.to_s ]
|
53
|
+
end
|
54
|
+
GeocodeRecords.system(*args)
|
55
|
+
memo
|
31
56
|
end
|
32
57
|
|
33
58
|
private
|
34
59
|
|
35
|
-
attr_private :input_path
|
36
|
-
attr_private :geocoded_path
|
37
|
-
attr_private :recoded_path
|
38
|
-
|
39
60
|
def input_map
|
40
61
|
@input_map ||= if glob
|
41
62
|
{ 'street' => 'glob' }
|
@@ -47,55 +68,18 @@ class GeocodeRecords
|
|
47
68
|
end
|
48
69
|
end
|
49
70
|
|
50
|
-
def
|
51
|
-
@
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
]
|
58
|
-
if include_invalid
|
59
|
-
args += [ '--include-invalid' ]
|
60
|
-
end
|
61
|
-
input_map.each do |ss, local|
|
62
|
-
args += [ "--#{ss}-col", local.to_s ]
|
63
|
-
end
|
64
|
-
SmartyStreets.run *args
|
65
|
-
raise "Geocoding failed on #{input_path.inspect} with args #{Shellwords.join(args)}" unless $?.success?
|
66
|
-
end
|
67
|
-
|
68
|
-
def recode
|
69
|
-
@recoded_path = Dir::Tmpname.create(Zaru.sanitize!(input_path + '.recode')) {}
|
70
|
-
File.open(@recoded_path, 'w') do |f|
|
71
|
-
f.write output_columns.to_csv
|
72
|
-
CSV.foreach(@geocoded_path, headers: true) do |geocoded_row|
|
73
|
-
f.write recode_columns.map { |k| geocoded_row[k] }.to_csv
|
71
|
+
def smartystreets_bin_path
|
72
|
+
@smartystreets_bin_path ||= begin
|
73
|
+
memo = [
|
74
|
+
'node_modules/.bin/smartystreets',
|
75
|
+
`which smartystreets`.chomp
|
76
|
+
].compact.detect do |path|
|
77
|
+
File.exist? path
|
74
78
|
end
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
def output_columns
|
80
|
-
@output_columns ||= (File.open(input_path) { |f| CSV.parse_line(f.gets) } + RECODE_MAP.keys).uniq
|
81
|
-
end
|
82
|
-
|
83
|
-
# no street yet - street_name, street_suffix
|
84
|
-
RECODE_MAP = {
|
85
|
-
'house_number_and_street' => 'ss_delivery_line_1',
|
86
|
-
'house_number' => 'ss_primary_number',
|
87
|
-
'unit_number' => 'ss_secondary_number',
|
88
|
-
'city' => 'ss_city_name',
|
89
|
-
'state' => 'ss_state_abbreviation',
|
90
|
-
'postcode' => 'ss_zipcode',
|
91
|
-
'latitude' => 'ss_latitude',
|
92
|
-
'longitude' => 'ss_longitude',
|
93
|
-
'default_city' => 'ss_default_city_name',
|
94
|
-
}.freeze
|
95
|
-
|
96
|
-
def recode_columns
|
97
|
-
@recode_columns ||= output_columns.map do |output_k|
|
98
|
-
RECODE_MAP[output_k] || output_k
|
79
|
+
raise "can't find smartystreets bin" unless memo
|
80
|
+
version = Gem::Version.new `#{memo} -V`.chomp
|
81
|
+
raise "smartystreets #{version} too old" unless version >= REQUIRED_SMARTYSTREETS_VERSION
|
82
|
+
memo
|
99
83
|
end
|
100
84
|
end
|
101
85
|
end
|
@@ -1,36 +1,130 @@
|
|
1
1
|
require 'csv'
|
2
|
-
require 'upsert'
|
3
2
|
|
4
3
|
class GeocodeRecords
|
5
4
|
class UpdateTableFromCsv
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
5
|
+
CREATE_TABLE_SQL = (<<-SQL).gsub(' ', '').freeze
|
6
|
+
CREATE TABLE $TMP_TABLE_NAME (
|
7
|
+
id uuid primary key,
|
8
|
+
ss_delivery_line_1 text,
|
9
|
+
ss_primary_number text,
|
10
|
+
ss_secondary_number text,
|
11
|
+
ss_city_name text,
|
12
|
+
ss_state_abbreviation text,
|
13
|
+
ss_zipcode text,
|
14
|
+
ss_latitude float,
|
15
|
+
ss_longitude float,
|
16
|
+
ss_default_city_name text
|
17
|
+
)
|
18
|
+
SQL
|
19
|
+
|
20
|
+
PGLOADER_CONFIG = <<-SQL
|
21
|
+
LOAD CSV
|
22
|
+
FROM '$PATH'
|
23
|
+
(
|
24
|
+
$INPUT_COLUMNS
|
25
|
+
)
|
26
|
+
INTO $DATABASE_URL?$TMP_TABLE_NAME
|
27
|
+
(
|
28
|
+
id,
|
29
|
+
ss_delivery_line_1,
|
30
|
+
ss_primary_number,
|
31
|
+
ss_secondary_number,
|
32
|
+
ss_city_name,
|
33
|
+
ss_state_abbreviation,
|
34
|
+
ss_zipcode,
|
35
|
+
ss_latitude,
|
36
|
+
ss_longitude,
|
37
|
+
ss_default_city_name
|
38
|
+
)
|
39
|
+
WITH
|
40
|
+
skip header = 1,
|
41
|
+
fields optionally enclosed by '"',
|
42
|
+
fields escaped by double-quote,
|
43
|
+
fields terminated by ','
|
44
|
+
SET client_encoding to 'utf8';
|
45
|
+
SQL
|
46
|
+
|
47
|
+
UPDATE_TABLE_SQL = (<<-SQL).gsub(' ', '').freeze
|
48
|
+
UPDATE $TABLE_NAME AS target
|
49
|
+
SET
|
50
|
+
house_number_and_street = src.ss_delivery_line_1,
|
51
|
+
house_number = CASE WHEN LENGTH(src.ss_primary_number) > 7 THEN NULL ELSE src.ss_primary_number::int END,
|
52
|
+
unit_number = src.ss_secondary_number,
|
53
|
+
city = COALESCE(src.ss_default_city_name, src.ss_city_name),
|
54
|
+
state = src.ss_state_abbreviation,
|
55
|
+
postcode = src.ss_zipcode,
|
56
|
+
latitude = src.ss_latitude,
|
57
|
+
longitude = src.ss_longitude
|
58
|
+
FROM $TMP_TABLE_NAME AS src
|
59
|
+
WHERE
|
60
|
+
target.id = src.id
|
61
|
+
AND src.ss_zipcode IS NOT NULL
|
62
|
+
SQL
|
63
|
+
|
64
|
+
attr_reader :database_url
|
65
|
+
attr_reader :table_name
|
66
|
+
attr_reader :path
|
67
|
+
|
68
|
+
def initialize(
|
69
|
+
database_url:,
|
70
|
+
table_name:,
|
71
|
+
path:
|
72
|
+
)
|
73
|
+
@database_url = database_url
|
74
|
+
@table_name = table_name
|
75
|
+
@path = path
|
14
76
|
end
|
77
|
+
|
15
78
|
def perform
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
79
|
+
return unless File.size(path) > 32
|
80
|
+
tmp_table_name = create_tmp_table
|
81
|
+
begin
|
82
|
+
load_csv_into_tmp_table tmp_table_name
|
83
|
+
update_original_table tmp_table_name
|
84
|
+
ensure
|
85
|
+
delete_tmp_table tmp_table_name
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
def create_tmp_table
|
90
|
+
memo = "geocode_records_#{table_name}_#{rand(999999)}"
|
91
|
+
GeocodeRecords.run_sql(
|
92
|
+
database_url,
|
93
|
+
CREATE_TABLE_SQL.sub('$TMP_TABLE_NAME', memo)
|
94
|
+
)
|
95
|
+
memo
|
96
|
+
end
|
97
|
+
|
98
|
+
def load_csv_into_tmp_table(tmp_table_name)
|
99
|
+
pg_loader_config_path = GeocodeRecords.new_tmp_path('pgloader')
|
100
|
+
File.open(pg_loader_config_path, 'w') { |f| f.write PGLOADER_CONFIG.sub('$INPUT_COLUMNS', input_columns.join(',')).sub('$DATABASE_URL', database_url).sub('$TMP_TABLE_NAME', tmp_table_name).sub('$PATH', path) }
|
101
|
+
GeocodeRecords.system(
|
102
|
+
'pgloader',
|
103
|
+
# '--debug',
|
104
|
+
'--quiet',
|
105
|
+
pg_loader_config_path
|
106
|
+
)
|
107
|
+
File.unlink pg_loader_config_path
|
108
|
+
end
|
109
|
+
|
110
|
+
def update_original_table(tmp_table_name)
|
111
|
+
GeocodeRecords.run_sql(
|
112
|
+
database_url,
|
113
|
+
UPDATE_TABLE_SQL.sub('$TMP_TABLE_NAME', tmp_table_name).sub('$TABLE_NAME', table_name)
|
114
|
+
)
|
115
|
+
end
|
116
|
+
|
117
|
+
def delete_tmp_table(tmp_table_name)
|
118
|
+
GeocodeRecords.run_sql(
|
119
|
+
database_url,
|
120
|
+
"DROP TABLE IF EXISTS #{tmp_table_name}"
|
121
|
+
)
|
122
|
+
end
|
123
|
+
|
124
|
+
def input_columns
|
125
|
+
CSV.parse_line(File.open(path) { |f| f.gets }).map do |col|
|
126
|
+
"#{col} [NULL IF BLANKS]"
|
32
127
|
end
|
33
|
-
count
|
34
128
|
end
|
35
129
|
end
|
36
130
|
end
|
data/lib/geocode_records.rb
CHANGED
@@ -1,77 +1,95 @@
|
|
1
|
-
require 'active_record'
|
2
1
|
require 'active_support'
|
3
2
|
require 'active_support/core_ext'
|
4
|
-
require '
|
5
|
-
require '
|
3
|
+
require 'tmpdir'
|
4
|
+
require 'shellwords'
|
5
|
+
require 'fileutils'
|
6
6
|
|
7
7
|
require_relative 'geocode_records/version'
|
8
8
|
require_relative 'geocode_records/dump_sql_to_csv'
|
9
9
|
require_relative 'geocode_records/geocode_csv'
|
10
10
|
require_relative 'geocode_records/update_table_from_csv'
|
11
|
-
require_relative 'geocode_records/smarty_streets'
|
12
11
|
|
13
12
|
class GeocodeRecords
|
13
|
+
class << self
|
14
|
+
def new_tmp_path(hint)
|
15
|
+
Dir::Tmpname.create(hint[0,64].delete('"').gsub(/\W/,'_').squeeze) {}
|
16
|
+
end
|
14
17
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
def perform
|
24
|
-
SmartyStreets.check_compatible!
|
18
|
+
def system(*args)
|
19
|
+
result = Kernel.system(*args)
|
20
|
+
unless result
|
21
|
+
raise "failed command:\n#{Shellwords.join args}"
|
22
|
+
end
|
23
|
+
nil
|
24
|
+
end
|
25
25
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
26
|
+
def run_sql(database_url, sql)
|
27
|
+
system(
|
28
|
+
'psql',
|
29
|
+
database_url,
|
30
|
+
'-v', 'ON_ERROR_STOP=on',
|
31
|
+
# '--echo-all',
|
32
|
+
'--quiet',
|
33
|
+
'--no-psqlrc',
|
34
|
+
'--pset', 'pager=off',
|
35
|
+
'--command', sql
|
36
|
+
)
|
34
37
|
end
|
35
38
|
end
|
36
39
|
|
37
|
-
|
40
|
+
attr_reader :database_url
|
41
|
+
attr_reader :table_name
|
38
42
|
|
39
|
-
|
40
|
-
|
41
|
-
|
43
|
+
# optional
|
44
|
+
attr_reader :include_invalid
|
45
|
+
attr_reader :subquery
|
42
46
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
47
|
+
def initialize(
|
48
|
+
database_url:,
|
49
|
+
table_name:,
|
50
|
+
subquery: nil,
|
51
|
+
include_invalid: nil
|
52
|
+
)
|
53
|
+
@database_url = database_url
|
54
|
+
@table_name = table_name
|
55
|
+
@subquery = subquery
|
56
|
+
@include_invalid = include_invalid
|
48
57
|
end
|
49
|
-
|
50
|
-
def
|
51
|
-
|
52
|
-
|
53
|
-
if glob
|
54
|
-
c.to_sql records.select('id', 'glob').where.not(glob: nil).arel, records.bind_values
|
55
|
-
else
|
56
|
-
c.to_sql records.select('id', 'house_number_and_street', 'house_number', 'unit_number', 'city', 'state', "regexp_replace(postcode, '.0$', '') AS postcode").where('city IS NOT NULL OR postcode IS NOT NULL').arel, records.bind_values
|
57
|
-
end
|
58
|
-
end
|
58
|
+
|
59
|
+
def perform
|
60
|
+
geocode glob: false
|
61
|
+
geocode glob: true
|
59
62
|
end
|
60
63
|
|
61
|
-
|
62
|
-
records.connection
|
63
|
-
end
|
64
|
+
private
|
64
65
|
|
65
|
-
def
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
66
|
+
def geocode(glob:)
|
67
|
+
ungeocoded_path = nil
|
68
|
+
geocoded_path = nil
|
69
|
+
begin
|
70
|
+
ungeocoded_path = DumpSqlToCsv.new(
|
71
|
+
database_url: database_url,
|
72
|
+
table_name: table_name,
|
73
|
+
subquery: subquery,
|
74
|
+
glob: glob
|
75
|
+
).perform
|
76
|
+
unless File.size(ungeocoded_path) > 32
|
77
|
+
$stderr.puts "No records found for #{table_name} #{subquery}, skipping"
|
78
|
+
return
|
79
|
+
end
|
80
|
+
geocoded_path = GeocodeCsv.new(
|
81
|
+
path: ungeocoded_path,
|
82
|
+
glob: glob,
|
83
|
+
include_invalid: include_invalid
|
84
|
+
).perform
|
85
|
+
UpdateTableFromCsv.new(
|
86
|
+
database_url: database_url,
|
87
|
+
table_name: table_name,
|
88
|
+
path: geocoded_path
|
89
|
+
).perform
|
90
|
+
ensure
|
91
|
+
FileUtils.rm_f geocoded_path if geocoded_path
|
92
|
+
FileUtils.rm_f ungeocoded_path if ungeocoded_path
|
71
93
|
end
|
72
94
|
end
|
73
|
-
|
74
|
-
def pasqual
|
75
|
-
@pasqual ||= Pasqual.for ENV.fetch('DATABASE_URL')
|
76
|
-
end
|
77
95
|
end
|
data/package.json
CHANGED
@@ -1,132 +1,85 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
|
-
dbname = 'geocode_records_test'
|
4
|
-
ENV['DATABASE_URL'] = "postgresql://127.0.0.1/#{dbname}"
|
5
|
-
|
6
|
-
unless ENV['FAST'] == 'true'
|
7
|
-
psql = Pasqual.for ENV['DATABASE_URL']
|
8
|
-
psql.dropdb rescue nil
|
9
|
-
psql.createdb
|
10
|
-
psql.command 'CREATE EXTENSION postgis'
|
11
|
-
psql.command <<-SQL
|
12
|
-
CREATE TABLE homes (
|
13
|
-
id serial primary key,
|
14
|
-
the_geom geometry(Geometry,4326),
|
15
|
-
the_geom_webmercator geometry(Geometry,3857),
|
16
|
-
house_number_and_street text,
|
17
|
-
house_number int,
|
18
|
-
unit_number text,
|
19
|
-
city text,
|
20
|
-
state text,
|
21
|
-
postcode text,
|
22
|
-
latitude float,
|
23
|
-
longitude float
|
24
|
-
)
|
25
|
-
SQL
|
26
|
-
psql.command <<-SQL
|
27
|
-
CREATE TABLE glob_homes (
|
28
|
-
id serial primary key,
|
29
|
-
glob text,
|
30
|
-
the_geom geometry(Geometry,4326),
|
31
|
-
the_geom_webmercator geometry(Geometry,3857),
|
32
|
-
house_number_and_street text,
|
33
|
-
house_number int,
|
34
|
-
unit_number text,
|
35
|
-
city text,
|
36
|
-
state text,
|
37
|
-
postcode text,
|
38
|
-
latitude float,
|
39
|
-
longitude float
|
40
|
-
)
|
41
|
-
SQL
|
42
|
-
end
|
43
|
-
|
44
|
-
require 'active_record'
|
45
|
-
ActiveRecord::Base.establish_connection
|
46
|
-
|
47
|
-
require 'logger'
|
48
|
-
require 'fileutils'
|
49
|
-
FileUtils.mkdir_p 'log'
|
50
|
-
logger = Logger.new 'log/test.log'
|
51
|
-
ActiveRecord::Base.logger = logger
|
52
|
-
|
53
|
-
class Home < ActiveRecord::Base
|
54
|
-
end
|
55
|
-
class GlobHome < ActiveRecord::Base
|
56
|
-
end
|
57
|
-
|
58
3
|
describe GeocodeRecords do
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
it "geocodes an AR::Relation" do
|
4
|
+
subject { GeocodeRecords.new(database_url: ENV.fetch('DATABASE_URL'), table_name: 'homes').perform }
|
5
|
+
|
6
|
+
it "geocodes" do
|
64
7
|
home = Home.create! house_number_and_street: '1038 e deyton st', postcode: '53703'
|
65
|
-
|
8
|
+
subject
|
66
9
|
home.reload
|
67
10
|
expect(home.house_number_and_street).to eq('1038 E Dayton St')
|
68
11
|
end
|
69
12
|
|
70
|
-
it "geocodes
|
71
|
-
home =
|
72
|
-
|
13
|
+
it "geocodes glob" do
|
14
|
+
home = Home.create! glob: '1038 e dayton st, madison, wi 53703'
|
15
|
+
subject
|
73
16
|
home.reload
|
74
17
|
expect(home.house_number_and_street).to eq('1038 E Dayton St')
|
75
18
|
expect(home.postcode).to eq('53703')
|
76
19
|
end
|
77
20
|
|
21
|
+
it "geocodes by sql" do
|
22
|
+
home = Home.create! house_number_and_street: '1038 e deyton st', postcode: '53703', foo: 'bar'
|
23
|
+
home_ignored = Home.create! house_number_and_street: '1038 e deyton st', postcode: '53703'
|
24
|
+
GeocodeRecords.new(database_url: ENV.fetch('DATABASE_URL'), table_name: 'homes', subquery: %{SELECT * FROM homes WHERE foo = 'bar'}).perform
|
25
|
+
home.reload
|
26
|
+
home_ignored.reload
|
27
|
+
expect(home.latitude).to be_present
|
28
|
+
expect(home_ignored.latitude).to be_nil
|
29
|
+
end
|
30
|
+
|
78
31
|
it "doesn't break on float-format postcode" do
|
79
32
|
home = Home.create! house_number_and_street: '1038 e deyton st', postcode: '53703.0'
|
80
|
-
|
33
|
+
subject
|
81
34
|
home.reload
|
82
35
|
expect(home.house_number_and_street).to eq('1038 E Dayton St')
|
83
36
|
end
|
84
37
|
|
85
38
|
it "doesn't break on unzeropadded postcode" do
|
86
39
|
home = Home.create! house_number_and_street: '36 main st', postcode: '5753'
|
87
|
-
|
40
|
+
subject
|
88
41
|
home.reload
|
89
42
|
expect(home.house_number_and_street).to eq('36 Main St')
|
90
43
|
end
|
91
44
|
|
92
45
|
it "doesn't break on unzeropadded float-format postcode" do
|
93
46
|
home = Home.create! house_number_and_street: '36 main st', postcode: '5753.0'
|
94
|
-
|
47
|
+
subject
|
95
48
|
home.reload
|
96
49
|
expect(home.house_number_and_street).to eq('36 Main St')
|
97
50
|
end
|
98
51
|
|
99
52
|
it "doesn't break on zip-4" do
|
100
53
|
home = Home.create! house_number_and_street: '1038 e dayton st', postcode: '53703-2428'
|
101
|
-
|
54
|
+
subject
|
102
55
|
home.reload
|
103
56
|
expect(home.house_number_and_street).to eq('1038 E Dayton St')
|
104
57
|
end
|
105
58
|
|
106
59
|
it "accepts city and state only" do
|
107
60
|
home = Home.create! house_number_and_street: '1038 e dayton st', city: 'madison', state: 'wisconsin'
|
108
|
-
|
61
|
+
subject
|
109
62
|
home.reload
|
110
63
|
expect(home.house_number_and_street).to eq('1038 E Dayton St')
|
111
64
|
end
|
112
65
|
|
113
66
|
it "allows invalid" do
|
114
67
|
home = Home.create! house_number_and_street: '1039 e dayton st', city: 'madison', state: 'wisconsin'
|
115
|
-
GeocodeRecords.new(
|
68
|
+
GeocodeRecords.new(database_url: ENV.fetch('DATABASE_URL'), table_name: 'homes', include_invalid: true).perform
|
116
69
|
home.reload
|
117
70
|
expect(home.house_number_and_street).to eq('1039 E Dayton St')
|
118
71
|
end
|
119
72
|
|
120
73
|
it "overwrites unit" do
|
121
74
|
home = Home.create! house_number_and_street: '123 n blount st apt 403', city: 'madison', state: 'wisconsin'
|
122
|
-
GeocodeRecords.new(
|
75
|
+
GeocodeRecords.new(database_url: ENV.fetch('DATABASE_URL'), table_name: 'homes', include_invalid: true).perform
|
123
76
|
home.reload
|
124
77
|
expect(home.house_number_and_street).to eq('123 N Blount St Unit 403')
|
125
78
|
end
|
126
79
|
|
127
80
|
it "overwrites city name with default_city_name" do
|
128
81
|
home = Home.create! house_number_and_street: '7333 Bay Bridge Rd', city: 'eastvale', state: 'ca'
|
129
|
-
GeocodeRecords.new(
|
82
|
+
GeocodeRecords.new(database_url: ENV.fetch('DATABASE_URL'), table_name: 'homes', include_invalid: true).perform
|
130
83
|
home.reload
|
131
84
|
expect(home.city).to eq('Corona')
|
132
85
|
end
|
@@ -134,7 +87,7 @@ describe GeocodeRecords do
|
|
134
87
|
describe 'known issues' do
|
135
88
|
it "doesn't fix float-format postcode on records that it can't geocode" do
|
136
89
|
home = Home.create! house_number_and_street: 'gibberish', postcode: '53703.0'
|
137
|
-
|
90
|
+
subject
|
138
91
|
home.reload
|
139
92
|
expect(home.house_number_and_street).to eq('gibberish')
|
140
93
|
expect(home.postcode).to eq('53703.0')
|
@@ -142,7 +95,7 @@ describe GeocodeRecords do
|
|
142
95
|
|
143
96
|
it "doesn't fix unzeropadded postcode on records that it can't geocode" do
|
144
97
|
home = Home.create! house_number_and_street: 'gibberish', postcode: '5753'
|
145
|
-
|
98
|
+
subject
|
146
99
|
home.reload
|
147
100
|
expect(home.house_number_and_street).to eq('gibberish')
|
148
101
|
expect(home.postcode).to eq('5753')
|
data/spec/spec_helper.rb
CHANGED
@@ -2,3 +2,62 @@ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
|
|
2
2
|
require 'geocode_records'
|
3
3
|
|
4
4
|
require 'pry'
|
5
|
+
|
6
|
+
dbname = 'geocode_records_test'
|
7
|
+
ENV['DATABASE_URL'] = "postgresql://127.0.0.1:#{ENV['PGPORT'] || 5432}/#{dbname}"
|
8
|
+
|
9
|
+
unless ENV['FAST'] == 'true'
|
10
|
+
GeocodeRecords.system('createdb', ENV.fetch('DATABASE_URL')) rescue nil
|
11
|
+
GeocodeRecords.run_sql(
|
12
|
+
ENV.fetch('DATABASE_URL'),
|
13
|
+
'CREATE EXTENSION IF NOT EXISTS postgis'
|
14
|
+
)
|
15
|
+
GeocodeRecords.run_sql(
|
16
|
+
ENV.fetch('DATABASE_URL'),
|
17
|
+
'DROP TABLE IF EXISTS homes'
|
18
|
+
)
|
19
|
+
sql = <<-SQL
|
20
|
+
CREATE TABLE homes (
|
21
|
+
id uuid primary key,
|
22
|
+
the_geom geometry(Geometry,4326),
|
23
|
+
the_geom_webmercator geometry(Geometry,3857),
|
24
|
+
glob text,
|
25
|
+
house_number_and_street text,
|
26
|
+
house_number int,
|
27
|
+
unit_number text,
|
28
|
+
city text,
|
29
|
+
state text,
|
30
|
+
postcode text,
|
31
|
+
latitude float,
|
32
|
+
longitude float,
|
33
|
+
foo text
|
34
|
+
)
|
35
|
+
SQL
|
36
|
+
GeocodeRecords.run_sql(
|
37
|
+
ENV.fetch('DATABASE_URL'),
|
38
|
+
sql
|
39
|
+
)
|
40
|
+
end
|
41
|
+
|
42
|
+
require 'active_record'
|
43
|
+
ActiveRecord::Base.establish_connection
|
44
|
+
|
45
|
+
require 'logger'
|
46
|
+
require 'fileutils'
|
47
|
+
FileUtils.mkdir_p 'log'
|
48
|
+
logger = Logger.new 'log/test.log'
|
49
|
+
ActiveRecord::Base.logger = logger
|
50
|
+
|
51
|
+
require 'securerandom'
|
52
|
+
class Home < ActiveRecord::Base
|
53
|
+
self.primary_key = 'id'
|
54
|
+
before_create do
|
55
|
+
self.id ||= SecureRandom.uuid
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
RSpec.configure do |config|
|
60
|
+
config.before :each do |example|
|
61
|
+
Home.delete_all
|
62
|
+
end
|
63
|
+
end
|
metadata
CHANGED
@@ -1,29 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: geocode_records
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Seamus Abshere
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-01-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
-
- !ruby/object:Gem::Dependency
|
14
|
-
name: activerecord
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - ">="
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: 4.1.9
|
20
|
-
type: :runtime
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - ">="
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: 4.1.9
|
27
13
|
- !ruby/object:Gem::Dependency
|
28
14
|
name: activesupport
|
29
15
|
requirement: !ruby/object:Gem::Requirement
|
@@ -39,75 +25,33 @@ dependencies:
|
|
39
25
|
- !ruby/object:Gem::Version
|
40
26
|
version: '0'
|
41
27
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
43
|
-
requirement: !ruby/object:Gem::Requirement
|
44
|
-
requirements:
|
45
|
-
- - ">="
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version: '0'
|
48
|
-
type: :runtime
|
49
|
-
prerelease: false
|
50
|
-
version_requirements: !ruby/object:Gem::Requirement
|
51
|
-
requirements:
|
52
|
-
- - ">="
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
version: '0'
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: attr_extras
|
57
|
-
requirement: !ruby/object:Gem::Requirement
|
58
|
-
requirements:
|
59
|
-
- - ">="
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
version: '0'
|
62
|
-
type: :runtime
|
63
|
-
prerelease: false
|
64
|
-
version_requirements: !ruby/object:Gem::Requirement
|
65
|
-
requirements:
|
66
|
-
- - ">="
|
67
|
-
- !ruby/object:Gem::Version
|
68
|
-
version: '0'
|
69
|
-
- !ruby/object:Gem::Dependency
|
70
|
-
name: zaru
|
71
|
-
requirement: !ruby/object:Gem::Requirement
|
72
|
-
requirements:
|
73
|
-
- - ">="
|
74
|
-
- !ruby/object:Gem::Version
|
75
|
-
version: '0'
|
76
|
-
type: :runtime
|
77
|
-
prerelease: false
|
78
|
-
version_requirements: !ruby/object:Gem::Requirement
|
79
|
-
requirements:
|
80
|
-
- - ">="
|
81
|
-
- !ruby/object:Gem::Version
|
82
|
-
version: '0'
|
83
|
-
- !ruby/object:Gem::Dependency
|
84
|
-
name: upsert
|
28
|
+
name: activerecord
|
85
29
|
requirement: !ruby/object:Gem::Requirement
|
86
30
|
requirements:
|
87
31
|
- - ">="
|
88
32
|
- !ruby/object:Gem::Version
|
89
|
-
version:
|
90
|
-
type: :
|
33
|
+
version: 4.1.9
|
34
|
+
type: :development
|
91
35
|
prerelease: false
|
92
36
|
version_requirements: !ruby/object:Gem::Requirement
|
93
37
|
requirements:
|
94
38
|
- - ">="
|
95
39
|
- !ruby/object:Gem::Version
|
96
|
-
version:
|
40
|
+
version: 4.1.9
|
97
41
|
- !ruby/object:Gem::Dependency
|
98
|
-
name:
|
42
|
+
name: pg
|
99
43
|
requirement: !ruby/object:Gem::Requirement
|
100
44
|
requirements:
|
101
|
-
- - "
|
45
|
+
- - "~>"
|
102
46
|
- !ruby/object:Gem::Version
|
103
|
-
version: '0'
|
104
|
-
type: :
|
47
|
+
version: '0.21'
|
48
|
+
type: :development
|
105
49
|
prerelease: false
|
106
50
|
version_requirements: !ruby/object:Gem::Requirement
|
107
51
|
requirements:
|
108
|
-
- - "
|
52
|
+
- - "~>"
|
109
53
|
- !ruby/object:Gem::Version
|
110
|
-
version: '0'
|
54
|
+
version: '0.21'
|
111
55
|
- !ruby/object:Gem::Dependency
|
112
56
|
name: bundler
|
113
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -164,7 +108,7 @@ dependencies:
|
|
164
108
|
- - ">="
|
165
109
|
- !ruby/object:Gem::Version
|
166
110
|
version: '0'
|
167
|
-
description: A quick way to re-geocode a table
|
111
|
+
description: A quick way to re-geocode a table. Requires 2 binaries, so YMMV.
|
168
112
|
email:
|
169
113
|
- seamus@abshere.net
|
170
114
|
executables:
|
@@ -185,11 +129,9 @@ files:
|
|
185
129
|
- lib/geocode_records.rb
|
186
130
|
- lib/geocode_records/dump_sql_to_csv.rb
|
187
131
|
- lib/geocode_records/geocode_csv.rb
|
188
|
-
- lib/geocode_records/smarty_streets.rb
|
189
132
|
- lib/geocode_records/update_table_from_csv.rb
|
190
133
|
- lib/geocode_records/version.rb
|
191
134
|
- package.json
|
192
|
-
- spec/geocode_records/smarty_streets_spec.rb
|
193
135
|
- spec/geocode_records_spec.rb
|
194
136
|
- spec/spec_helper.rb
|
195
137
|
homepage: https://github.com/seamusabshere/geocode_records
|
@@ -212,11 +154,10 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
212
154
|
version: '0'
|
213
155
|
requirements: []
|
214
156
|
rubyforge_project:
|
215
|
-
rubygems_version: 2.6.
|
157
|
+
rubygems_version: 2.6.13
|
216
158
|
signing_key:
|
217
159
|
specification_version: 4
|
218
160
|
summary: Geocode an ActiveRecord::Relation with node_smartystreets
|
219
161
|
test_files:
|
220
|
-
- spec/geocode_records/smarty_streets_spec.rb
|
221
162
|
- spec/geocode_records_spec.rb
|
222
163
|
- spec/spec_helper.rb
|
@@ -1,38 +0,0 @@
|
|
1
|
-
class GeocodeRecords
|
2
|
-
|
3
|
-
module SmartyStreets
|
4
|
-
|
5
|
-
VERSION = '1.7.2'
|
6
|
-
|
7
|
-
def self.bin_path
|
8
|
-
@bin_path ||= if File.exist?('node_modules/.bin/smartystreets')
|
9
|
-
'node_modules/.bin/smartystreets'
|
10
|
-
else
|
11
|
-
'smartystreets'
|
12
|
-
end
|
13
|
-
end
|
14
|
-
|
15
|
-
def self.check_compatible!
|
16
|
-
raise "smartystreets >= #{VERSION} is required" unless SmartyStreets.compatible?
|
17
|
-
end
|
18
|
-
|
19
|
-
def self.compatible?
|
20
|
-
output = run_with_output('-V')
|
21
|
-
current_version = Gem::Version.new output.chomp
|
22
|
-
min_version = Gem::Version.new VERSION
|
23
|
-
current_version >= min_version
|
24
|
-
end
|
25
|
-
|
26
|
-
def self.run(*args)
|
27
|
-
shargs = Shellwords.join(args)
|
28
|
-
system "#{bin_path} #{shargs}"
|
29
|
-
end
|
30
|
-
|
31
|
-
def self.run_with_output(*args)
|
32
|
-
shargs = Shellwords.join(args)
|
33
|
-
`#{bin_path} #{shargs}`
|
34
|
-
end
|
35
|
-
|
36
|
-
end
|
37
|
-
|
38
|
-
end
|
@@ -1,39 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
require 'geocode_records/smarty_streets'
|
4
|
-
|
5
|
-
describe GeocodeRecords::SmartyStreets do
|
6
|
-
|
7
|
-
describe '.bin_path' do
|
8
|
-
subject { described_class.bin_path }
|
9
|
-
|
10
|
-
it { is_expected.to eq 'node_modules/.bin/smartystreets' }
|
11
|
-
end
|
12
|
-
|
13
|
-
describe '.compatible?' do
|
14
|
-
before { allow(described_class).to receive(:run_with_output).and_return("#{version}\n") }
|
15
|
-
|
16
|
-
subject { described_class.compatible? }
|
17
|
-
|
18
|
-
context 'v1.3.1' do
|
19
|
-
let(:version) { '1.3.1' }
|
20
|
-
|
21
|
-
it { is_expected.to be false }
|
22
|
-
end
|
23
|
-
|
24
|
-
context 'v1.7.2' do
|
25
|
-
let(:version) { '1.7.2' }
|
26
|
-
|
27
|
-
it { is_expected.to be true }
|
28
|
-
end
|
29
|
-
|
30
|
-
end
|
31
|
-
|
32
|
-
describe '.run_with_output' do
|
33
|
-
subject { described_class.run_with_output '-V' }
|
34
|
-
|
35
|
-
it { is_expected.to match /\d+\.\d+.\d+/ }
|
36
|
-
end
|
37
|
-
|
38
|
-
end
|
39
|
-
|