geocode_records 1.1.3 → 1.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG +6 -0
- data/lib/geocode_records.rb +10 -4
- data/lib/geocode_records/dump_sql_to_csv.rb +12 -5
- data/lib/geocode_records/geocode_csv.rb +16 -8
- data/lib/geocode_records/update_table_from_csv.rb +16 -12
- data/lib/geocode_records/version.rb +1 -1
- data/spec/geocode_records_spec.rb +27 -0
- data/spec/spec_helper.rb +9 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e5956f06797c3f79547c0385cc672b451e8ad23b
|
4
|
+
data.tar.gz: 6d4a9b488113ce87fe0bc4b8615dac61ecf60aec
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1b3a7c67d72a3d38b0a76903f66fef501dea328f9c752df674aedc918a12ef2e7c166eb65c95ef9bf041e864ae7ce1caa81dea421d7b4852161c15d71dff87eb
|
7
|
+
data.tar.gz: eaa1f7f94982cb13ea692f43b42cf5b82e286c49787c8aa688b2b0764f83c36a63b1eaa0806b1c71bf8123295b086a08f67ea1eae012a1c5c1a6914c972d666f
|
data/CHANGELOG
CHANGED
data/lib/geocode_records.rb
CHANGED
@@ -43,17 +43,20 @@ class GeocodeRecords
|
|
43
43
|
# optional
|
44
44
|
attr_reader :include_invalid
|
45
45
|
attr_reader :subquery
|
46
|
+
attr_reader :num
|
46
47
|
|
47
48
|
def initialize(
|
48
49
|
database_url:,
|
49
50
|
table_name:,
|
50
51
|
subquery: nil,
|
51
|
-
include_invalid:
|
52
|
+
include_invalid: false,
|
53
|
+
num: 1
|
52
54
|
)
|
53
55
|
@database_url = database_url
|
54
56
|
@table_name = table_name
|
55
57
|
@subquery = subquery
|
56
58
|
@include_invalid = include_invalid
|
59
|
+
@num = num
|
57
60
|
end
|
58
61
|
|
59
62
|
def perform
|
@@ -71,7 +74,8 @@ class GeocodeRecords
|
|
71
74
|
database_url: database_url,
|
72
75
|
table_name: table_name,
|
73
76
|
subquery: subquery,
|
74
|
-
glob: glob
|
77
|
+
glob: glob,
|
78
|
+
num: num,
|
75
79
|
).perform
|
76
80
|
unless File.size(ungeocoded_path) > 32
|
77
81
|
return
|
@@ -79,12 +83,14 @@ class GeocodeRecords
|
|
79
83
|
geocoded_path = GeocodeCsv.new(
|
80
84
|
path: ungeocoded_path,
|
81
85
|
glob: glob,
|
82
|
-
include_invalid: include_invalid
|
86
|
+
include_invalid: include_invalid,
|
87
|
+
num: num,
|
83
88
|
).perform
|
84
89
|
UpdateTableFromCsv.new(
|
85
90
|
database_url: database_url,
|
86
91
|
table_name: table_name,
|
87
|
-
path: geocoded_path
|
92
|
+
path: geocoded_path,
|
93
|
+
num: num,
|
88
94
|
).perform
|
89
95
|
ensure
|
90
96
|
FileUtils.rm_f geocoded_path if geocoded_path
|
@@ -4,16 +4,20 @@ class GeocodeRecords
|
|
4
4
|
attr_reader :glob
|
5
5
|
attr_reader :table_name
|
6
6
|
attr_reader :subquery
|
7
|
+
attr_reader :num
|
7
8
|
|
8
9
|
def initialize(
|
9
10
|
database_url:,
|
10
11
|
glob:,
|
11
12
|
table_name: nil,
|
12
|
-
subquery: nil
|
13
|
+
subquery: nil,
|
14
|
+
num: nil
|
15
|
+
)
|
13
16
|
@database_url = database_url
|
14
17
|
@glob = glob
|
15
18
|
@table_name = table_name
|
16
19
|
@subquery = subquery
|
20
|
+
@num = num
|
17
21
|
end
|
18
22
|
|
19
23
|
def perform
|
@@ -28,10 +32,13 @@ class GeocodeRecords
|
|
28
32
|
private
|
29
33
|
|
30
34
|
def sql
|
31
|
-
@sql ||=
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
+
@sql ||= begin
|
36
|
+
num_suffix = (num == 1 ? '' : num)
|
37
|
+
unless glob
|
38
|
+
"SELECT id, house_number_and_street#{num_suffix}, city#{num_suffix}, state#{num_suffix}, regexp_replace(postcode#{num_suffix}, '.0$', '') AS postcode#{num_suffix} FROM #{subquery ? "(#{subquery}) t1" : table_name} WHERE city#{num_suffix} IS NOT NULL OR postcode#{num_suffix} IS NOT NULL"
|
39
|
+
else
|
40
|
+
"SELECT id, glob#{num_suffix} FROM #{subquery ? "(#{subquery}) t1" : table_name} WHERE (city#{num_suffix} IS NULL AND postcode#{num_suffix} IS NULL) AND glob#{num_suffix} IS NOT NULL"
|
41
|
+
end
|
35
42
|
end
|
36
43
|
end
|
37
44
|
end
|
@@ -5,6 +5,7 @@ class GeocodeRecords
|
|
5
5
|
attr_reader :path
|
6
6
|
attr_reader :glob
|
7
7
|
attr_reader :include_invalid
|
8
|
+
attr_reader :num
|
8
9
|
|
9
10
|
REQUIRED_SMARTYSTREETS_VERSION = Gem::Version.new('1.8.2')
|
10
11
|
COLUMN_DEFINITION = {
|
@@ -26,11 +27,13 @@ class GeocodeRecords
|
|
26
27
|
def initialize(
|
27
28
|
path:,
|
28
29
|
glob:,
|
29
|
-
include_invalid
|
30
|
+
include_invalid:,
|
31
|
+
num:
|
30
32
|
)
|
31
33
|
@path = path
|
32
34
|
@glob = glob
|
33
35
|
@include_invalid = include_invalid
|
36
|
+
@num = num
|
34
37
|
end
|
35
38
|
|
36
39
|
def perform
|
@@ -58,13 +61,18 @@ class GeocodeRecords
|
|
58
61
|
private
|
59
62
|
|
60
63
|
def input_map
|
61
|
-
@input_map ||=
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
64
|
+
@input_map ||= begin
|
65
|
+
num_suffix = (num == 1 ? '' : num)
|
66
|
+
if glob
|
67
|
+
{ 'street' => "glob#{num_suffix}" }
|
68
|
+
else
|
69
|
+
{
|
70
|
+
'street' => "house_number_and_street#{num_suffix}",
|
71
|
+
'zipcode' => "postcode#{num_suffix}",
|
72
|
+
'city' => "city#{num_suffix}",
|
73
|
+
'state' => "state#{num_suffix}",
|
74
|
+
}
|
75
|
+
end
|
68
76
|
end
|
69
77
|
end
|
70
78
|
|
@@ -33,8 +33,8 @@ class GeocodeRecords
|
|
33
33
|
UPDATE_TABLE_SQL = (<<-SQL).gsub(' ', '').freeze
|
34
34
|
UPDATE $TABLE_NAME AS target
|
35
35
|
SET
|
36
|
-
house_number_and_street = src.ss_delivery_line_1,
|
37
|
-
house_number = CASE
|
36
|
+
house_number_and_street$NUM_SUFFIX = src.ss_delivery_line_1,
|
37
|
+
house_number$NUM_SUFFIX = CASE
|
38
38
|
WHEN src.ss_primary_number IS NULL THEN NULL
|
39
39
|
WHEN LENGTH(src.ss_primary_number) > 8 THEN NULL
|
40
40
|
WHEN src.ss_primary_number ~ '\\A\\d+\\Z' THEN src.ss_primary_number::int
|
@@ -42,12 +42,12 @@ class GeocodeRecords
|
|
42
42
|
WHEN src.ss_primary_number ~ '-' THEN (SELECT ROUND(AVG(v)) FROM unnest(array_remove(regexp_split_to_array(src.ss_primary_number, '\\D+'), '')::int[]) v)
|
43
43
|
ELSE (SELECT regexp_matches(src.ss_primary_number, '(\\d+)'))[1]::int
|
44
44
|
END,
|
45
|
-
unit_number = src.ss_secondary_number,
|
46
|
-
city = COALESCE(src.ss_default_city_name, src.ss_city_name),
|
47
|
-
state = src.ss_state_abbreviation,
|
48
|
-
postcode = src.ss_zipcode,
|
49
|
-
latitude = src.ss_latitude,
|
50
|
-
longitude = src.ss_longitude
|
45
|
+
unit_number$NUM_SUFFIX = src.ss_secondary_number,
|
46
|
+
city$NUM_SUFFIX = COALESCE(src.ss_default_city_name, src.ss_city_name),
|
47
|
+
state$NUM_SUFFIX = src.ss_state_abbreviation,
|
48
|
+
postcode$NUM_SUFFIX = src.ss_zipcode,
|
49
|
+
latitude$NUM_SUFFIX = src.ss_latitude,
|
50
|
+
longitude$NUM_SUFFIX = src.ss_longitude
|
51
51
|
FROM $TMP_TABLE_NAME AS src
|
52
52
|
WHERE
|
53
53
|
target.id = src.id
|
@@ -57,15 +57,18 @@ class GeocodeRecords
|
|
57
57
|
attr_reader :database_url
|
58
58
|
attr_reader :table_name
|
59
59
|
attr_reader :path
|
60
|
+
attr_reader :num
|
60
61
|
|
61
62
|
def initialize(
|
62
63
|
database_url:,
|
63
64
|
table_name:,
|
64
|
-
path
|
65
|
+
path:,
|
66
|
+
num:
|
65
67
|
)
|
66
68
|
@database_url = database_url
|
67
69
|
@table_name = table_name
|
68
70
|
@path = path
|
71
|
+
@num = num
|
69
72
|
end
|
70
73
|
|
71
74
|
def perform
|
@@ -84,7 +87,7 @@ class GeocodeRecords
|
|
84
87
|
memo = "geocode_records_#{table_name}_#{rand(999999)}".gsub(/[^a-z0-9_]/i, '')
|
85
88
|
GeocodeRecords.psql(
|
86
89
|
database_url,
|
87
|
-
CREATE_TABLE_SQL.
|
90
|
+
CREATE_TABLE_SQL.gsub('$TMP_TABLE_NAME', memo)
|
88
91
|
)
|
89
92
|
memo
|
90
93
|
end
|
@@ -103,14 +106,15 @@ class GeocodeRecords
|
|
103
106
|
def load_csv_into_tmp_table(path:, table_name:)
|
104
107
|
GeocodeRecords.psql(
|
105
108
|
database_url,
|
106
|
-
COPY_SQL.
|
109
|
+
COPY_SQL.gsub('$TMP_TABLE_NAME', table_name).gsub('$PATH', path)
|
107
110
|
)
|
108
111
|
end
|
109
112
|
|
110
113
|
def update_original_table(tmp_table_name)
|
114
|
+
num_suffix = (num == 1 ? '' : num.to_s)
|
111
115
|
GeocodeRecords.psql(
|
112
116
|
database_url,
|
113
|
-
UPDATE_TABLE_SQL.
|
117
|
+
UPDATE_TABLE_SQL.gsub('$TMP_TABLE_NAME', tmp_table_name).gsub('$TABLE_NAME', table_name).gsub('$NUM_SUFFIX', num_suffix)
|
114
118
|
)
|
115
119
|
end
|
116
120
|
|
@@ -8,6 +8,15 @@ describe GeocodeRecords do
|
|
8
8
|
subject
|
9
9
|
home.reload
|
10
10
|
expect(home.house_number_and_street).to eq('1038 E Dayton St')
|
11
|
+
expect(home.latitude).to be_present
|
12
|
+
end
|
13
|
+
|
14
|
+
it "geocodes addr 2" do
|
15
|
+
home = Home.create! house_number_and_street2: '1038 e deyton st', postcode2: '53703'
|
16
|
+
GeocodeRecords.new(database_url: ENV.fetch('DATABASE_URL'), table_name: 'homes', num: 2).perform
|
17
|
+
home.reload
|
18
|
+
expect(home.house_number_and_street2).to eq('1038 E Dayton St')
|
19
|
+
expect(home.latitude2).to be_present
|
11
20
|
end
|
12
21
|
|
13
22
|
it "geocodes quoted table name" do
|
@@ -25,6 +34,14 @@ describe GeocodeRecords do
|
|
25
34
|
expect(home.postcode).to eq('53703')
|
26
35
|
end
|
27
36
|
|
37
|
+
it "geocodes glob2" do
|
38
|
+
home = Home.create! glob2: '1038 e dayton st, madison, wi 53703'
|
39
|
+
GeocodeRecords.new(database_url: ENV.fetch('DATABASE_URL'), table_name: 'homes', num: 2).perform
|
40
|
+
home.reload
|
41
|
+
expect(home.house_number_and_street2).to eq('1038 E Dayton St')
|
42
|
+
expect(home.postcode2).to eq('53703')
|
43
|
+
end
|
44
|
+
|
28
45
|
it "geocodes by sql" do
|
29
46
|
home = Home.create! house_number_and_street: '1038 e deyton st', postcode: '53703', foo: 'bar'
|
30
47
|
home_ignored = Home.create! house_number_and_street: '1038 e deyton st', postcode: '53703'
|
@@ -35,6 +52,16 @@ describe GeocodeRecords do
|
|
35
52
|
expect(home_ignored.latitude).to be_nil
|
36
53
|
end
|
37
54
|
|
55
|
+
it "geocodes by sql num 2" do
|
56
|
+
home = Home.create! house_number_and_street2: '1038 e deyton st', postcode2: '53703', foo: 'bar'
|
57
|
+
home_ignored = Home.create! house_number_and_street2: '1038 e deyton st', postcode2: '53703'
|
58
|
+
GeocodeRecords.new(database_url: ENV.fetch('DATABASE_URL'), table_name: 'homes', subquery: %{SELECT * FROM homes WHERE foo = 'bar'}, num: 2).perform
|
59
|
+
home.reload
|
60
|
+
home_ignored.reload
|
61
|
+
expect(home.latitude2).to be_present
|
62
|
+
expect(home_ignored.latitude2).to be_nil
|
63
|
+
end
|
64
|
+
|
38
65
|
it "doesn't break on float-format postcode" do
|
39
66
|
home = Home.create! house_number_and_street: '1038 e deyton st', postcode: '53703.0'
|
40
67
|
subject
|
data/spec/spec_helper.rb
CHANGED
@@ -30,6 +30,15 @@ unless ENV['FAST'] == 'true'
|
|
30
30
|
postcode text,
|
31
31
|
latitude float,
|
32
32
|
longitude float,
|
33
|
+
glob2 text,
|
34
|
+
house_number_and_street2 text,
|
35
|
+
house_number2 int,
|
36
|
+
unit_number2 text,
|
37
|
+
city2 text,
|
38
|
+
state2 text,
|
39
|
+
postcode2 text,
|
40
|
+
latitude2 float,
|
41
|
+
longitude2 float,
|
33
42
|
foo text
|
34
43
|
)
|
35
44
|
SQL
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: geocode_records
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Seamus Abshere
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-01-
|
11
|
+
date: 2018-01-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|