geocode_records 0.1.5 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9f9481740c101b666c731e1071a7d019b3e5f580
4
- data.tar.gz: c2f36a4a3908b26fdfab3fcba6f07892a8193ae3
3
+ metadata.gz: 1cfba62c3b057b69bc8180fe3d09d7fa0a09c931
4
+ data.tar.gz: 9ab706ef30253318866feb80a9d2fa5ee947ace7
5
5
  SHA512:
6
- metadata.gz: 7a3189caf4b574054a078989e2cc5b23ebd0773525eee869c7e2da1ac6ef7a6257a8104c3d5b857d952a7cd16f2315796210474f0e45c54a5e79d554730fc503
7
- data.tar.gz: 7410d0746fbd76029a0cd573804abe6bd5a553e46c1a8d2a84415b3fb8090da7ca7bec25a3a8888b0edb248cdc99385b7bc60cf7c981e05de03d071df81edf48
6
+ metadata.gz: 2a99ba57d347697a74160599861696897b564bc8f528dcccab87eccb0c5127a9743380c769e15ef66a6d94fb9dea693a526a1cd97ac86183169d778a5b3a254b
7
+ data.tar.gz: c10936fe5170800b77bc186099666a9dadd3e909074c9af39956519b90a1d5f864d929cbac964047ddbbfc7def9ad05bb149c7cdff66f5bc5f12fcb44437f665
data/.gitignore CHANGED
@@ -14,3 +14,4 @@
14
14
  mkmf.log
15
15
  /node_modules/
16
16
  log/*
17
+ package-lock.json
data/CHANGELOG CHANGED
@@ -1,3 +1,16 @@
1
+ 1.0.0 / 2018-01-16
2
+
3
+ * Breaking changes
4
+
5
+ * Totally change the API
6
+ * Require smartystreets 1.8.2
7
+ * Require pgloader to be in your path
8
+ * Require psql to be in your path
9
+
10
+ * Enhancements
11
+
12
+ * Way faster for big files
13
+
1
14
  0.1.5 / 2017-10-15
2
15
 
3
16
  * Enhancements
data/README.md CHANGED
@@ -1,31 +1,7 @@
1
1
  # GeocodeRecords
2
2
 
3
- TODO: Write a gem description
3
+ As long as you do very specific things... quickly re-geocode tables.
4
4
 
5
- ## Installation
5
+ ## Copyright
6
6
 
7
- Add this line to your application's Gemfile:
8
-
9
- ```ruby
10
- gem 'geocode_records'
11
- ```
12
-
13
- And then execute:
14
-
15
- $ bundle
16
-
17
- Or install it yourself as:
18
-
19
- $ gem install geocode_records
20
-
21
- ## Usage
22
-
23
- TODO: Write usage instructions here
24
-
25
- ## Contributing
26
-
27
- 1. Fork it ( https://github.com/[my-github-username]/geocode_records/fork )
28
- 2. Create your feature branch (`git checkout -b my-new-feature`)
29
- 3. Commit your changes (`git commit -am 'Add some feature'`)
30
- 4. Push to the branch (`git push origin my-new-feature`)
31
- 5. Create a new Pull Request
7
+ Copyright 2018 Seamus Abshere
@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
9
9
  spec.authors = ["Seamus Abshere"]
10
10
  spec.email = ["seamus@abshere.net"]
11
11
  spec.summary = %q{Geocode an ActiveRecord::Relation with node_smartystreets}
12
- spec.description = %q{A quick way to re-geocode a table}
12
+ spec.description = %q{A quick way to re-geocode a table. Requires 2 binaries, so YMMV.}
13
13
  spec.homepage = "https://github.com/seamusabshere/geocode_records"
14
14
  spec.license = "MIT"
15
15
 
@@ -17,18 +17,13 @@ Gem::Specification.new do |spec|
17
17
  spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
18
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
19
  spec.require_paths = ["lib"]
20
-
21
- spec.add_runtime_dependency 'activerecord', '>=4.1.9'
20
+
22
21
  spec.add_runtime_dependency 'activesupport'
23
- spec.add_runtime_dependency 'pg'
24
- spec.add_runtime_dependency 'attr_extras'
25
- spec.add_runtime_dependency 'zaru'
26
- spec.add_runtime_dependency 'upsert'
27
- spec.add_runtime_dependency 'pasqual'
28
22
 
23
+ spec.add_development_dependency 'activerecord', '>=4.1.9'
24
+ spec.add_development_dependency 'pg', '~>0.21'
29
25
  spec.add_development_dependency "bundler", "~> 1.7"
30
26
  spec.add_development_dependency "rake", "~> 10.0"
31
27
  spec.add_development_dependency "rspec"
32
28
  spec.add_development_dependency "pry"
33
-
34
29
  end
@@ -1,18 +1,38 @@
1
- class DumpSqlToCsv
2
- attr_private :sql
3
- attr_private :pasqual
1
+ class GeocodeRecords
2
+ class DumpSqlToCsv
3
+ attr_reader :database_url
4
+ attr_reader :glob
5
+ attr_reader :table_name
6
+ attr_reader :subquery
4
7
 
5
- def initialize(pasqual, sql, ignored_options = {})
6
- @pasqual = pasqual
7
- @sql = sql
8
- end
8
+ def initialize(
9
+ database_url:,
10
+ glob:,
11
+ table_name: nil,
12
+ subquery: nil)
13
+ @database_url = database_url
14
+ @glob = glob
15
+ @table_name = table_name
16
+ @subquery = subquery
17
+ end
9
18
 
10
- def path
11
- @path = Dir::Tmpname.create(sql[0,64].delete('"').gsub(/\W/,'_').squeeze) {}
19
+ def perform
20
+ memo = GeocodeRecords.new_tmp_path(subquery || table_name)
21
+ GeocodeRecords.run_sql(
22
+ database_url,
23
+ "\\copy (#{sql}) TO '#{memo}' DELIMITER ',' CSV HEADER"
24
+ )
25
+ memo
26
+ end
12
27
 
13
- pasqual.command "\\copy (#{sql}) TO '#{@path}' DELIMITER ',' CSV HEADER"
28
+ private
14
29
 
15
- @path
30
+ def sql
31
+ @sql ||= unless glob
32
+ "SELECT id, house_number_and_street, city, state, regexp_replace(postcode, '.0$', '') AS postcode FROM #{subquery ? "(#{subquery}) t1" : table_name} WHERE city IS NOT NULL OR postcode IS NOT NULL"
33
+ else
34
+ "SELECT id, glob FROM #{subquery ? "(#{subquery}) t1" : table_name} WHERE (city IS NULL AND postcode IS NULL) AND glob IS NOT NULL"
35
+ end
36
+ end
16
37
  end
17
-
18
38
  end
@@ -1,41 +1,62 @@
1
- require 'tmpdir'
2
- require 'fileutils'
3
- require 'csv'
4
- require 'shellwords'
5
- require 'zaru'
1
+ require 'json'
6
2
 
7
- require 'geocode_records/smarty_streets'
8
-
9
- # copied from hotdog/app/services/file_geocoder.rb with seamus variations
10
3
  class GeocodeRecords
11
4
  class GeocodeCsv
5
+ attr_reader :path
12
6
  attr_reader :glob
13
7
  attr_reader :include_invalid
14
8
 
15
- def initialize(input_path, options = {})
16
- @input_path = input_path
17
- options ||= {}
18
- @glob = options[:glob]
19
- @include_invalid = options[:include_invalid]
20
- @mutex = Mutex.new
9
+ REQUIRED_SMARTYSTREETS_VERSION = Gem::Version.new('1.8.2')
10
+ COLUMN_DEFINITION = {
11
+ delivery_line_1: true,
12
+ components: {
13
+ primary_number: true,
14
+ secondary_number: true,
15
+ city_name: true,
16
+ default_city_name: true,
17
+ state_abbreviation: true,
18
+ zipcode: true
19
+ },
20
+ metadata: {
21
+ latitude: true,
22
+ longitude: true
23
+ }
24
+ }
25
+
26
+ def initialize(
27
+ path:,
28
+ glob:,
29
+ include_invalid:
30
+ )
31
+ @path = path
32
+ @glob = glob
33
+ @include_invalid = include_invalid
21
34
  end
22
35
 
23
- def path
24
- return if @path
25
- @mutex.synchronize do
26
- return if @path
27
- geocode
28
- recode
29
- @path = @recoded_path
36
+ def perform
37
+ return unless File.size(path) > 32
38
+ memo = GeocodeRecords.new_tmp_path File.basename("geocoded-#{path}")
39
+ args = [
40
+ smartystreets_bin_path,
41
+ '-i', path,
42
+ '-o', memo,
43
+ '--quiet',
44
+ '--auth-id', ENV.fetch('SMARTY_STREETS_AUTH_ID'),
45
+ '--auth-token', ENV.fetch('SMARTY_STREETS_AUTH_TOKEN'),
46
+ '--column-definition', JSON.dump(COLUMN_DEFINITION),
47
+ ]
48
+ if include_invalid
49
+ args += [ '--include-invalid' ]
30
50
  end
51
+ input_map.each do |ss, local|
52
+ args += [ "--#{ss}-col", local.to_s ]
53
+ end
54
+ GeocodeRecords.system(*args)
55
+ memo
31
56
  end
32
57
 
33
58
  private
34
59
 
35
- attr_private :input_path
36
- attr_private :geocoded_path
37
- attr_private :recoded_path
38
-
39
60
  def input_map
40
61
  @input_map ||= if glob
41
62
  { 'street' => 'glob' }
@@ -47,55 +68,18 @@ class GeocodeRecords
47
68
  end
48
69
  end
49
70
 
50
- def geocode
51
- @geocoded_path = Dir::Tmpname.create(Zaru.sanitize!(input_path + '.geocode')) {}
52
- args = [
53
- '-i', input_path,
54
- '-o', geocoded_path,
55
- '--auth-id', ENV.fetch('SMARTY_STREETS_AUTH_ID'),
56
- '--auth-token', ENV.fetch('SMARTY_STREETS_AUTH_TOKEN')
57
- ]
58
- if include_invalid
59
- args += [ '--include-invalid' ]
60
- end
61
- input_map.each do |ss, local|
62
- args += [ "--#{ss}-col", local.to_s ]
63
- end
64
- SmartyStreets.run *args
65
- raise "Geocoding failed on #{input_path.inspect} with args #{Shellwords.join(args)}" unless $?.success?
66
- end
67
-
68
- def recode
69
- @recoded_path = Dir::Tmpname.create(Zaru.sanitize!(input_path + '.recode')) {}
70
- File.open(@recoded_path, 'w') do |f|
71
- f.write output_columns.to_csv
72
- CSV.foreach(@geocoded_path, headers: true) do |geocoded_row|
73
- f.write recode_columns.map { |k| geocoded_row[k] }.to_csv
71
+ def smartystreets_bin_path
72
+ @smartystreets_bin_path ||= begin
73
+ memo = [
74
+ 'node_modules/.bin/smartystreets',
75
+ `which smartystreets`.chomp
76
+ ].compact.detect do |path|
77
+ File.exist? path
74
78
  end
75
- end
76
- File.unlink @geocoded_path
77
- end
78
-
79
- def output_columns
80
- @output_columns ||= (File.open(input_path) { |f| CSV.parse_line(f.gets) } + RECODE_MAP.keys).uniq
81
- end
82
-
83
- # no street yet - street_name, street_suffix
84
- RECODE_MAP = {
85
- 'house_number_and_street' => 'ss_delivery_line_1',
86
- 'house_number' => 'ss_primary_number',
87
- 'unit_number' => 'ss_secondary_number',
88
- 'city' => 'ss_city_name',
89
- 'state' => 'ss_state_abbreviation',
90
- 'postcode' => 'ss_zipcode',
91
- 'latitude' => 'ss_latitude',
92
- 'longitude' => 'ss_longitude',
93
- 'default_city' => 'ss_default_city_name',
94
- }.freeze
95
-
96
- def recode_columns
97
- @recode_columns ||= output_columns.map do |output_k|
98
- RECODE_MAP[output_k] || output_k
79
+ raise "can't find smartystreets bin" unless memo
80
+ version = Gem::Version.new `#{memo} -V`.chomp
81
+ raise "smartystreets #{version} too old" unless version >= REQUIRED_SMARTYSTREETS_VERSION
82
+ memo
99
83
  end
100
84
  end
101
85
  end
@@ -1,36 +1,130 @@
1
1
  require 'csv'
2
- require 'upsert'
3
2
 
4
3
  class GeocodeRecords
5
4
  class UpdateTableFromCsv
6
- MAX_INT = 2**31 - 1
7
- attr_private :connection
8
- attr_private :table_name
9
- attr_private :csv_path
10
- attr_private :upsert
11
- def initialize(connection, table_name, csv_path, ignored_options = {})
12
- @upsert = Upsert.new connection, table_name
13
- @csv_path = csv_path
5
+ CREATE_TABLE_SQL = (<<-SQL).gsub(' ', '').freeze
6
+ CREATE TABLE $TMP_TABLE_NAME (
7
+ id uuid primary key,
8
+ ss_delivery_line_1 text,
9
+ ss_primary_number text,
10
+ ss_secondary_number text,
11
+ ss_city_name text,
12
+ ss_state_abbreviation text,
13
+ ss_zipcode text,
14
+ ss_latitude float,
15
+ ss_longitude float,
16
+ ss_default_city_name text
17
+ )
18
+ SQL
19
+
20
+ PGLOADER_CONFIG = <<-SQL
21
+ LOAD CSV
22
+ FROM '$PATH'
23
+ (
24
+ $INPUT_COLUMNS
25
+ )
26
+ INTO $DATABASE_URL?$TMP_TABLE_NAME
27
+ (
28
+ id,
29
+ ss_delivery_line_1,
30
+ ss_primary_number,
31
+ ss_secondary_number,
32
+ ss_city_name,
33
+ ss_state_abbreviation,
34
+ ss_zipcode,
35
+ ss_latitude,
36
+ ss_longitude,
37
+ ss_default_city_name
38
+ )
39
+ WITH
40
+ skip header = 1,
41
+ fields optionally enclosed by '"',
42
+ fields escaped by double-quote,
43
+ fields terminated by ','
44
+ SET client_encoding to 'utf8';
45
+ SQL
46
+
47
+ UPDATE_TABLE_SQL = (<<-SQL).gsub(' ', '').freeze
48
+ UPDATE $TABLE_NAME AS target
49
+ SET
50
+ house_number_and_street = src.ss_delivery_line_1,
51
+ house_number = CASE WHEN LENGTH(src.ss_primary_number) > 7 THEN NULL ELSE src.ss_primary_number::int END,
52
+ unit_number = src.ss_secondary_number,
53
+ city = COALESCE(src.ss_default_city_name, src.ss_city_name),
54
+ state = src.ss_state_abbreviation,
55
+ postcode = src.ss_zipcode,
56
+ latitude = src.ss_latitude,
57
+ longitude = src.ss_longitude
58
+ FROM $TMP_TABLE_NAME AS src
59
+ WHERE
60
+ target.id = src.id
61
+ AND src.ss_zipcode IS NOT NULL
62
+ SQL
63
+
64
+ attr_reader :database_url
65
+ attr_reader :table_name
66
+ attr_reader :path
67
+
68
+ def initialize(
69
+ database_url:,
70
+ table_name:,
71
+ path:
72
+ )
73
+ @database_url = database_url
74
+ @table_name = table_name
75
+ @path = path
14
76
  end
77
+
15
78
  def perform
16
- count = 0
17
- CSV.foreach(csv_path, headers: true) do |row|
18
- next unless row['postcode']
19
- row = row.to_hash
20
- if hn = row['house_number']
21
- row['house_number'] = hn.to_i
22
- next if row['house_number'] > MAX_INT
23
- end
24
- if default_city = row.delete('default_city')
25
- row['city'] = default_city
26
- end
27
- selector = { id: row.delete('id') }
28
- setter = row
29
- upsert.row selector, setter
30
- # $stderr.write "U#{count}..." if count % 1000 == 0
31
- count += 1
79
+ return unless File.size(path) > 32
80
+ tmp_table_name = create_tmp_table
81
+ begin
82
+ load_csv_into_tmp_table tmp_table_name
83
+ update_original_table tmp_table_name
84
+ ensure
85
+ delete_tmp_table tmp_table_name
86
+ end
87
+ end
88
+
89
+ def create_tmp_table
90
+ memo = "geocode_records_#{table_name}_#{rand(999999)}"
91
+ GeocodeRecords.run_sql(
92
+ database_url,
93
+ CREATE_TABLE_SQL.sub('$TMP_TABLE_NAME', memo)
94
+ )
95
+ memo
96
+ end
97
+
98
+ def load_csv_into_tmp_table(tmp_table_name)
99
+ pg_loader_config_path = GeocodeRecords.new_tmp_path('pgloader')
100
+ File.open(pg_loader_config_path, 'w') { |f| f.write PGLOADER_CONFIG.sub('$INPUT_COLUMNS', input_columns.join(',')).sub('$DATABASE_URL', database_url).sub('$TMP_TABLE_NAME', tmp_table_name).sub('$PATH', path) }
101
+ GeocodeRecords.system(
102
+ 'pgloader',
103
+ # '--debug',
104
+ '--quiet',
105
+ pg_loader_config_path
106
+ )
107
+ File.unlink pg_loader_config_path
108
+ end
109
+
110
+ def update_original_table(tmp_table_name)
111
+ GeocodeRecords.run_sql(
112
+ database_url,
113
+ UPDATE_TABLE_SQL.sub('$TMP_TABLE_NAME', tmp_table_name).sub('$TABLE_NAME', table_name)
114
+ )
115
+ end
116
+
117
+ def delete_tmp_table(tmp_table_name)
118
+ GeocodeRecords.run_sql(
119
+ database_url,
120
+ "DROP TABLE IF EXISTS #{tmp_table_name}"
121
+ )
122
+ end
123
+
124
+ def input_columns
125
+ CSV.parse_line(File.open(path) { |f| f.gets }).map do |col|
126
+ "#{col} [NULL IF BLANKS]"
32
127
  end
33
- count
34
128
  end
35
129
  end
36
130
  end
@@ -1,3 +1,3 @@
1
1
  class GeocodeRecords
2
- VERSION = '0.1.5'
2
+ VERSION = '1.0.0'
3
3
  end
@@ -1,77 +1,95 @@
1
- require 'active_record'
2
1
  require 'active_support'
3
2
  require 'active_support/core_ext'
4
- require 'attr_extras'
5
- require 'pasqual'
3
+ require 'tmpdir'
4
+ require 'shellwords'
5
+ require 'fileutils'
6
6
 
7
7
  require_relative 'geocode_records/version'
8
8
  require_relative 'geocode_records/dump_sql_to_csv'
9
9
  require_relative 'geocode_records/geocode_csv'
10
10
  require_relative 'geocode_records/update_table_from_csv'
11
- require_relative 'geocode_records/smarty_streets'
12
11
 
13
12
  class GeocodeRecords
13
+ class << self
14
+ def new_tmp_path(hint)
15
+ Dir::Tmpname.create(hint[0,64].delete('"').gsub(/\W/,'_').squeeze) {}
16
+ end
14
17
 
15
- attr_reader :records
16
- attr_reader :options
17
- def initialize(records, options = {})
18
- records.is_a?(ActiveRecord::Relation) or raise(ArgumentError, "expected AR::Relation, got #{records.class}")
19
- @options = (options || {}).symbolize_keys
20
- @records = records
21
- end
22
-
23
- def perform
24
- SmartyStreets.check_compatible!
18
+ def system(*args)
19
+ result = Kernel.system(*args)
20
+ unless result
21
+ raise "failed command:\n#{Shellwords.join args}"
22
+ end
23
+ nil
24
+ end
25
25
 
26
- if records.count > 0
27
- # $stderr.puts "GeocodeRecords: #{records.count} to go!"
28
- ungeocoded_path = DumpSqlToCsv.new(pasqual, to_sql, options).path
29
- geocoded_path = GeocodeCsv.new(ungeocoded_path, options).path
30
- UpdateTableFromCsv.new(connection, table_name, geocoded_path, options).perform
31
- set_the_geom
32
- File.unlink geocoded_path
33
- File.unlink ungeocoded_path
26
+ def run_sql(database_url, sql)
27
+ system(
28
+ 'psql',
29
+ database_url,
30
+ '-v', 'ON_ERROR_STOP=on',
31
+ # '--echo-all',
32
+ '--quiet',
33
+ '--no-psqlrc',
34
+ '--pset', 'pager=off',
35
+ '--command', sql
36
+ )
34
37
  end
35
38
  end
36
39
 
37
- private
40
+ attr_reader :database_url
41
+ attr_reader :table_name
38
42
 
39
- def glob
40
- !!options[:glob]
41
- end
43
+ # optional
44
+ attr_reader :include_invalid
45
+ attr_reader :subquery
42
46
 
43
- def set_the_geom
44
- records.update_all <<-SQL
45
- the_geom = ST_SetSRID(ST_MakePoint(longitude, latitude), 4326),
46
- the_geom_webmercator = ST_Transform(ST_SetSRID(ST_MakePoint(longitude, latitude), 4326), 3857)
47
- SQL
47
+ def initialize(
48
+ database_url:,
49
+ table_name:,
50
+ subquery: nil,
51
+ include_invalid: nil
52
+ )
53
+ @database_url = database_url
54
+ @table_name = table_name
55
+ @subquery = subquery
56
+ @include_invalid = include_invalid
48
57
  end
49
-
50
- def to_sql
51
- c = connection
52
- c.unprepared_statement do
53
- if glob
54
- c.to_sql records.select('id', 'glob').where.not(glob: nil).arel, records.bind_values
55
- else
56
- c.to_sql records.select('id', 'house_number_and_street', 'house_number', 'unit_number', 'city', 'state', "regexp_replace(postcode, '.0$', '') AS postcode").where('city IS NOT NULL OR postcode IS NOT NULL').arel, records.bind_values
57
- end
58
- end
58
+
59
+ def perform
60
+ geocode glob: false
61
+ geocode glob: true
59
62
  end
60
63
 
61
- def connection
62
- records.connection
63
- end
64
+ private
64
65
 
65
- def table_name
66
- @table_name = begin
67
- memo = options[:table_name]
68
- memo ||= records.table_name if records.respond_to?(:table_name)
69
- memo ||= records.engine.table_name
70
- memo
66
+ def geocode(glob:)
67
+ ungeocoded_path = nil
68
+ geocoded_path = nil
69
+ begin
70
+ ungeocoded_path = DumpSqlToCsv.new(
71
+ database_url: database_url,
72
+ table_name: table_name,
73
+ subquery: subquery,
74
+ glob: glob
75
+ ).perform
76
+ unless File.size(ungeocoded_path) > 32
77
+ $stderr.puts "No records found for #{table_name} #{subquery}, skipping"
78
+ return
79
+ end
80
+ geocoded_path = GeocodeCsv.new(
81
+ path: ungeocoded_path,
82
+ glob: glob,
83
+ include_invalid: include_invalid
84
+ ).perform
85
+ UpdateTableFromCsv.new(
86
+ database_url: database_url,
87
+ table_name: table_name,
88
+ path: geocoded_path
89
+ ).perform
90
+ ensure
91
+ FileUtils.rm_f geocoded_path if geocoded_path
92
+ FileUtils.rm_f ungeocoded_path if ungeocoded_path
71
93
  end
72
94
  end
73
-
74
- def pasqual
75
- @pasqual ||= Pasqual.for ENV.fetch('DATABASE_URL')
76
- end
77
95
  end
data/package.json CHANGED
@@ -2,6 +2,6 @@
2
2
  "name": "rolodeck",
3
3
  "private": true,
4
4
  "dependencies": {
5
- "smartystreets": "^1.7.2"
5
+ "smartystreets": "^1.8.2"
6
6
  }
7
7
  }
@@ -1,132 +1,85 @@
1
1
  require 'spec_helper'
2
2
 
3
- dbname = 'geocode_records_test'
4
- ENV['DATABASE_URL'] = "postgresql://127.0.0.1/#{dbname}"
5
-
6
- unless ENV['FAST'] == 'true'
7
- psql = Pasqual.for ENV['DATABASE_URL']
8
- psql.dropdb rescue nil
9
- psql.createdb
10
- psql.command 'CREATE EXTENSION postgis'
11
- psql.command <<-SQL
12
- CREATE TABLE homes (
13
- id serial primary key,
14
- the_geom geometry(Geometry,4326),
15
- the_geom_webmercator geometry(Geometry,3857),
16
- house_number_and_street text,
17
- house_number int,
18
- unit_number text,
19
- city text,
20
- state text,
21
- postcode text,
22
- latitude float,
23
- longitude float
24
- )
25
- SQL
26
- psql.command <<-SQL
27
- CREATE TABLE glob_homes (
28
- id serial primary key,
29
- glob text,
30
- the_geom geometry(Geometry,4326),
31
- the_geom_webmercator geometry(Geometry,3857),
32
- house_number_and_street text,
33
- house_number int,
34
- unit_number text,
35
- city text,
36
- state text,
37
- postcode text,
38
- latitude float,
39
- longitude float
40
- )
41
- SQL
42
- end
43
-
44
- require 'active_record'
45
- ActiveRecord::Base.establish_connection
46
-
47
- require 'logger'
48
- require 'fileutils'
49
- FileUtils.mkdir_p 'log'
50
- logger = Logger.new 'log/test.log'
51
- ActiveRecord::Base.logger = logger
52
-
53
- class Home < ActiveRecord::Base
54
- end
55
- class GlobHome < ActiveRecord::Base
56
- end
57
-
58
3
  describe GeocodeRecords do
59
- it 'has a version number' do
60
- expect(GeocodeRecords::VERSION).not_to be nil
61
- end
62
-
63
- it "geocodes an AR::Relation" do
4
+ subject { GeocodeRecords.new(database_url: ENV.fetch('DATABASE_URL'), table_name: 'homes').perform }
5
+
6
+ it "geocodes" do
64
7
  home = Home.create! house_number_and_street: '1038 e deyton st', postcode: '53703'
65
- GeocodeRecords.new(Home.all).perform
8
+ subject
66
9
  home.reload
67
10
  expect(home.house_number_and_street).to eq('1038 E Dayton St')
68
11
  end
69
12
 
70
- it "geocodes an AR::Relation with just a glob" do
71
- home = GlobHome.create! glob: '1038 e dayton st, madison, wi 53703'
72
- GeocodeRecords.new(GlobHome.all, glob: true).perform
13
+ it "geocodes glob" do
14
+ home = Home.create! glob: '1038 e dayton st, madison, wi 53703'
15
+ subject
73
16
  home.reload
74
17
  expect(home.house_number_and_street).to eq('1038 E Dayton St')
75
18
  expect(home.postcode).to eq('53703')
76
19
  end
77
20
 
21
+ it "geocodes by sql" do
22
+ home = Home.create! house_number_and_street: '1038 e deyton st', postcode: '53703', foo: 'bar'
23
+ home_ignored = Home.create! house_number_and_street: '1038 e deyton st', postcode: '53703'
24
+ GeocodeRecords.new(database_url: ENV.fetch('DATABASE_URL'), table_name: 'homes', subquery: %{SELECT * FROM homes WHERE foo = 'bar'}).perform
25
+ home.reload
26
+ home_ignored.reload
27
+ expect(home.latitude).to be_present
28
+ expect(home_ignored.latitude).to be_nil
29
+ end
30
+
78
31
  it "doesn't break on float-format postcode" do
79
32
  home = Home.create! house_number_and_street: '1038 e deyton st', postcode: '53703.0'
80
- GeocodeRecords.new(Home.all).perform
33
+ subject
81
34
  home.reload
82
35
  expect(home.house_number_and_street).to eq('1038 E Dayton St')
83
36
  end
84
37
 
85
38
  it "doesn't break on unzeropadded postcode" do
86
39
  home = Home.create! house_number_and_street: '36 main st', postcode: '5753'
87
- GeocodeRecords.new(Home.all).perform
40
+ subject
88
41
  home.reload
89
42
  expect(home.house_number_and_street).to eq('36 Main St')
90
43
  end
91
44
 
92
45
  it "doesn't break on unzeropadded float-format postcode" do
93
46
  home = Home.create! house_number_and_street: '36 main st', postcode: '5753.0'
94
- GeocodeRecords.new(Home.all).perform
47
+ subject
95
48
  home.reload
96
49
  expect(home.house_number_and_street).to eq('36 Main St')
97
50
  end
98
51
 
99
52
  it "doesn't break on zip-4" do
100
53
  home = Home.create! house_number_and_street: '1038 e dayton st', postcode: '53703-2428'
101
- GeocodeRecords.new(Home.all).perform
54
+ subject
102
55
  home.reload
103
56
  expect(home.house_number_and_street).to eq('1038 E Dayton St')
104
57
  end
105
58
 
106
59
  it "accepts city and state only" do
107
60
  home = Home.create! house_number_and_street: '1038 e dayton st', city: 'madison', state: 'wisconsin'
108
- GeocodeRecords.new(Home.all).perform
61
+ subject
109
62
  home.reload
110
63
  expect(home.house_number_and_street).to eq('1038 E Dayton St')
111
64
  end
112
65
 
113
66
  it "allows invalid" do
114
67
  home = Home.create! house_number_and_street: '1039 e dayton st', city: 'madison', state: 'wisconsin'
115
- GeocodeRecords.new(Home.all, include_invalid: true).perform
68
+ GeocodeRecords.new(database_url: ENV.fetch('DATABASE_URL'), table_name: 'homes', include_invalid: true).perform
116
69
  home.reload
117
70
  expect(home.house_number_and_street).to eq('1039 E Dayton St')
118
71
  end
119
72
 
120
73
  it "overwrites unit" do
121
74
  home = Home.create! house_number_and_street: '123 n blount st apt 403', city: 'madison', state: 'wisconsin'
122
- GeocodeRecords.new(Home.all, include_invalid: true).perform
75
+ GeocodeRecords.new(database_url: ENV.fetch('DATABASE_URL'), table_name: 'homes', include_invalid: true).perform
123
76
  home.reload
124
77
  expect(home.house_number_and_street).to eq('123 N Blount St Unit 403')
125
78
  end
126
79
 
127
80
  it "overwrites city name with default_city_name" do
128
81
  home = Home.create! house_number_and_street: '7333 Bay Bridge Rd', city: 'eastvale', state: 'ca'
129
- GeocodeRecords.new(Home.all, include_invalid: true).perform
82
+ GeocodeRecords.new(database_url: ENV.fetch('DATABASE_URL'), table_name: 'homes', include_invalid: true).perform
130
83
  home.reload
131
84
  expect(home.city).to eq('Corona')
132
85
  end
@@ -134,7 +87,7 @@ describe GeocodeRecords do
134
87
  describe 'known issues' do
135
88
  it "doesn't fix float-format postcode on records that it can't geocode" do
136
89
  home = Home.create! house_number_and_street: 'gibberish', postcode: '53703.0'
137
- GeocodeRecords.new(Home.all).perform
90
+ subject
138
91
  home.reload
139
92
  expect(home.house_number_and_street).to eq('gibberish')
140
93
  expect(home.postcode).to eq('53703.0')
@@ -142,7 +95,7 @@ describe GeocodeRecords do
142
95
 
143
96
  it "doesn't fix unzeropadded postcode on records that it can't geocode" do
144
97
  home = Home.create! house_number_and_street: 'gibberish', postcode: '5753'
145
- GeocodeRecords.new(Home.all).perform
98
+ subject
146
99
  home.reload
147
100
  expect(home.house_number_and_street).to eq('gibberish')
148
101
  expect(home.postcode).to eq('5753')
data/spec/spec_helper.rb CHANGED
@@ -2,3 +2,62 @@ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
2
2
  require 'geocode_records'
3
3
 
4
4
  require 'pry'
5
+
6
+ dbname = 'geocode_records_test'
7
+ ENV['DATABASE_URL'] = "postgresql://127.0.0.1:#{ENV['PGPORT'] || 5432}/#{dbname}"
8
+
9
+ unless ENV['FAST'] == 'true'
10
+ GeocodeRecords.system('createdb', ENV.fetch('DATABASE_URL')) rescue nil
11
+ GeocodeRecords.run_sql(
12
+ ENV.fetch('DATABASE_URL'),
13
+ 'CREATE EXTENSION IF NOT EXISTS postgis'
14
+ )
15
+ GeocodeRecords.run_sql(
16
+ ENV.fetch('DATABASE_URL'),
17
+ 'DROP TABLE IF EXISTS homes'
18
+ )
19
+ sql = <<-SQL
20
+ CREATE TABLE homes (
21
+ id uuid primary key,
22
+ the_geom geometry(Geometry,4326),
23
+ the_geom_webmercator geometry(Geometry,3857),
24
+ glob text,
25
+ house_number_and_street text,
26
+ house_number int,
27
+ unit_number text,
28
+ city text,
29
+ state text,
30
+ postcode text,
31
+ latitude float,
32
+ longitude float,
33
+ foo text
34
+ )
35
+ SQL
36
+ GeocodeRecords.run_sql(
37
+ ENV.fetch('DATABASE_URL'),
38
+ sql
39
+ )
40
+ end
41
+
42
+ require 'active_record'
43
+ ActiveRecord::Base.establish_connection
44
+
45
+ require 'logger'
46
+ require 'fileutils'
47
+ FileUtils.mkdir_p 'log'
48
+ logger = Logger.new 'log/test.log'
49
+ ActiveRecord::Base.logger = logger
50
+
51
+ require 'securerandom'
52
+ class Home < ActiveRecord::Base
53
+ self.primary_key = 'id'
54
+ before_create do
55
+ self.id ||= SecureRandom.uuid
56
+ end
57
+ end
58
+
59
+ RSpec.configure do |config|
60
+ config.before :each do |example|
61
+ Home.delete_all
62
+ end
63
+ end
metadata CHANGED
@@ -1,29 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: geocode_records
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Seamus Abshere
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-10-15 00:00:00.000000000 Z
11
+ date: 2018-01-16 00:00:00.000000000 Z
12
12
  dependencies:
13
- - !ruby/object:Gem::Dependency
14
- name: activerecord
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - ">="
18
- - !ruby/object:Gem::Version
19
- version: 4.1.9
20
- type: :runtime
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - ">="
25
- - !ruby/object:Gem::Version
26
- version: 4.1.9
27
13
  - !ruby/object:Gem::Dependency
28
14
  name: activesupport
29
15
  requirement: !ruby/object:Gem::Requirement
@@ -39,75 +25,33 @@ dependencies:
39
25
  - !ruby/object:Gem::Version
40
26
  version: '0'
41
27
  - !ruby/object:Gem::Dependency
42
- name: pg
43
- requirement: !ruby/object:Gem::Requirement
44
- requirements:
45
- - - ">="
46
- - !ruby/object:Gem::Version
47
- version: '0'
48
- type: :runtime
49
- prerelease: false
50
- version_requirements: !ruby/object:Gem::Requirement
51
- requirements:
52
- - - ">="
53
- - !ruby/object:Gem::Version
54
- version: '0'
55
- - !ruby/object:Gem::Dependency
56
- name: attr_extras
57
- requirement: !ruby/object:Gem::Requirement
58
- requirements:
59
- - - ">="
60
- - !ruby/object:Gem::Version
61
- version: '0'
62
- type: :runtime
63
- prerelease: false
64
- version_requirements: !ruby/object:Gem::Requirement
65
- requirements:
66
- - - ">="
67
- - !ruby/object:Gem::Version
68
- version: '0'
69
- - !ruby/object:Gem::Dependency
70
- name: zaru
71
- requirement: !ruby/object:Gem::Requirement
72
- requirements:
73
- - - ">="
74
- - !ruby/object:Gem::Version
75
- version: '0'
76
- type: :runtime
77
- prerelease: false
78
- version_requirements: !ruby/object:Gem::Requirement
79
- requirements:
80
- - - ">="
81
- - !ruby/object:Gem::Version
82
- version: '0'
83
- - !ruby/object:Gem::Dependency
84
- name: upsert
28
+ name: activerecord
85
29
  requirement: !ruby/object:Gem::Requirement
86
30
  requirements:
87
31
  - - ">="
88
32
  - !ruby/object:Gem::Version
89
- version: '0'
90
- type: :runtime
33
+ version: 4.1.9
34
+ type: :development
91
35
  prerelease: false
92
36
  version_requirements: !ruby/object:Gem::Requirement
93
37
  requirements:
94
38
  - - ">="
95
39
  - !ruby/object:Gem::Version
96
- version: '0'
40
+ version: 4.1.9
97
41
  - !ruby/object:Gem::Dependency
98
- name: pasqual
42
+ name: pg
99
43
  requirement: !ruby/object:Gem::Requirement
100
44
  requirements:
101
- - - ">="
45
+ - - "~>"
102
46
  - !ruby/object:Gem::Version
103
- version: '0'
104
- type: :runtime
47
+ version: '0.21'
48
+ type: :development
105
49
  prerelease: false
106
50
  version_requirements: !ruby/object:Gem::Requirement
107
51
  requirements:
108
- - - ">="
52
+ - - "~>"
109
53
  - !ruby/object:Gem::Version
110
- version: '0'
54
+ version: '0.21'
111
55
  - !ruby/object:Gem::Dependency
112
56
  name: bundler
113
57
  requirement: !ruby/object:Gem::Requirement
@@ -164,7 +108,7 @@ dependencies:
164
108
  - - ">="
165
109
  - !ruby/object:Gem::Version
166
110
  version: '0'
167
- description: A quick way to re-geocode a table
111
+ description: A quick way to re-geocode a table. Requires 2 binaries, so YMMV.
168
112
  email:
169
113
  - seamus@abshere.net
170
114
  executables:
@@ -185,11 +129,9 @@ files:
185
129
  - lib/geocode_records.rb
186
130
  - lib/geocode_records/dump_sql_to_csv.rb
187
131
  - lib/geocode_records/geocode_csv.rb
188
- - lib/geocode_records/smarty_streets.rb
189
132
  - lib/geocode_records/update_table_from_csv.rb
190
133
  - lib/geocode_records/version.rb
191
134
  - package.json
192
- - spec/geocode_records/smarty_streets_spec.rb
193
135
  - spec/geocode_records_spec.rb
194
136
  - spec/spec_helper.rb
195
137
  homepage: https://github.com/seamusabshere/geocode_records
@@ -212,11 +154,10 @@ required_rubygems_version: !ruby/object:Gem::Requirement
212
154
  version: '0'
213
155
  requirements: []
214
156
  rubyforge_project:
215
- rubygems_version: 2.6.8
157
+ rubygems_version: 2.6.13
216
158
  signing_key:
217
159
  specification_version: 4
218
160
  summary: Geocode an ActiveRecord::Relation with node_smartystreets
219
161
  test_files:
220
- - spec/geocode_records/smarty_streets_spec.rb
221
162
  - spec/geocode_records_spec.rb
222
163
  - spec/spec_helper.rb
@@ -1,38 +0,0 @@
1
- class GeocodeRecords
2
-
3
- module SmartyStreets
4
-
5
- VERSION = '1.7.2'
6
-
7
- def self.bin_path
8
- @bin_path ||= if File.exist?('node_modules/.bin/smartystreets')
9
- 'node_modules/.bin/smartystreets'
10
- else
11
- 'smartystreets'
12
- end
13
- end
14
-
15
- def self.check_compatible!
16
- raise "smartystreets >= #{VERSION} is required" unless SmartyStreets.compatible?
17
- end
18
-
19
- def self.compatible?
20
- output = run_with_output('-V')
21
- current_version = Gem::Version.new output.chomp
22
- min_version = Gem::Version.new VERSION
23
- current_version >= min_version
24
- end
25
-
26
- def self.run(*args)
27
- shargs = Shellwords.join(args)
28
- system "#{bin_path} #{shargs}"
29
- end
30
-
31
- def self.run_with_output(*args)
32
- shargs = Shellwords.join(args)
33
- `#{bin_path} #{shargs}`
34
- end
35
-
36
- end
37
-
38
- end
@@ -1,39 +0,0 @@
1
- require 'spec_helper'
2
-
3
- require 'geocode_records/smarty_streets'
4
-
5
- describe GeocodeRecords::SmartyStreets do
6
-
7
- describe '.bin_path' do
8
- subject { described_class.bin_path }
9
-
10
- it { is_expected.to eq 'node_modules/.bin/smartystreets' }
11
- end
12
-
13
- describe '.compatible?' do
14
- before { allow(described_class).to receive(:run_with_output).and_return("#{version}\n") }
15
-
16
- subject { described_class.compatible? }
17
-
18
- context 'v1.3.1' do
19
- let(:version) { '1.3.1' }
20
-
21
- it { is_expected.to be false }
22
- end
23
-
24
- context 'v1.7.2' do
25
- let(:version) { '1.7.2' }
26
-
27
- it { is_expected.to be true }
28
- end
29
-
30
- end
31
-
32
- describe '.run_with_output' do
33
- subject { described_class.run_with_output '-V' }
34
-
35
- it { is_expected.to match /\d+\.\d+.\d+/ }
36
- end
37
-
38
- end
39
-