geocode_records 0.1.5 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9f9481740c101b666c731e1071a7d019b3e5f580
4
- data.tar.gz: c2f36a4a3908b26fdfab3fcba6f07892a8193ae3
3
+ metadata.gz: 1cfba62c3b057b69bc8180fe3d09d7fa0a09c931
4
+ data.tar.gz: 9ab706ef30253318866feb80a9d2fa5ee947ace7
5
5
  SHA512:
6
- metadata.gz: 7a3189caf4b574054a078989e2cc5b23ebd0773525eee869c7e2da1ac6ef7a6257a8104c3d5b857d952a7cd16f2315796210474f0e45c54a5e79d554730fc503
7
- data.tar.gz: 7410d0746fbd76029a0cd573804abe6bd5a553e46c1a8d2a84415b3fb8090da7ca7bec25a3a8888b0edb248cdc99385b7bc60cf7c981e05de03d071df81edf48
6
+ metadata.gz: 2a99ba57d347697a74160599861696897b564bc8f528dcccab87eccb0c5127a9743380c769e15ef66a6d94fb9dea693a526a1cd97ac86183169d778a5b3a254b
7
+ data.tar.gz: c10936fe5170800b77bc186099666a9dadd3e909074c9af39956519b90a1d5f864d929cbac964047ddbbfc7def9ad05bb149c7cdff66f5bc5f12fcb44437f665
data/.gitignore CHANGED
@@ -14,3 +14,4 @@
14
14
  mkmf.log
15
15
  /node_modules/
16
16
  log/*
17
+ package-lock.json
data/CHANGELOG CHANGED
@@ -1,3 +1,16 @@
1
+ 1.0.0 / 2018-01-16
2
+
3
+ * Breaking changes
4
+
5
+ * Totally change the API
6
+ * Require smartystreets 1.8.2
7
+ * Require pgloader to be in your path
8
+ * Require psql to be in your path
9
+
10
+ * Enhancements
11
+
12
+ * Way faster for big files
13
+
1
14
  0.1.5 / 2017-10-15
2
15
 
3
16
  * Enhancements
data/README.md CHANGED
@@ -1,31 +1,7 @@
1
1
  # GeocodeRecords
2
2
 
3
- TODO: Write a gem description
3
+ As long as you do very specific things... quickly re-geocode tables.
4
4
 
5
- ## Installation
5
+ ## Copyright
6
6
 
7
- Add this line to your application's Gemfile:
8
-
9
- ```ruby
10
- gem 'geocode_records'
11
- ```
12
-
13
- And then execute:
14
-
15
- $ bundle
16
-
17
- Or install it yourself as:
18
-
19
- $ gem install geocode_records
20
-
21
- ## Usage
22
-
23
- TODO: Write usage instructions here
24
-
25
- ## Contributing
26
-
27
- 1. Fork it ( https://github.com/[my-github-username]/geocode_records/fork )
28
- 2. Create your feature branch (`git checkout -b my-new-feature`)
29
- 3. Commit your changes (`git commit -am 'Add some feature'`)
30
- 4. Push to the branch (`git push origin my-new-feature`)
31
- 5. Create a new Pull Request
7
+ Copyright 2018 Seamus Abshere
@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
9
9
  spec.authors = ["Seamus Abshere"]
10
10
  spec.email = ["seamus@abshere.net"]
11
11
  spec.summary = %q{Geocode an ActiveRecord::Relation with node_smartystreets}
12
- spec.description = %q{A quick way to re-geocode a table}
12
+ spec.description = %q{A quick way to re-geocode a table. Requires 2 binaries, so YMMV.}
13
13
  spec.homepage = "https://github.com/seamusabshere/geocode_records"
14
14
  spec.license = "MIT"
15
15
 
@@ -17,18 +17,13 @@ Gem::Specification.new do |spec|
17
17
  spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
18
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
19
  spec.require_paths = ["lib"]
20
-
21
- spec.add_runtime_dependency 'activerecord', '>=4.1.9'
20
+
22
21
  spec.add_runtime_dependency 'activesupport'
23
- spec.add_runtime_dependency 'pg'
24
- spec.add_runtime_dependency 'attr_extras'
25
- spec.add_runtime_dependency 'zaru'
26
- spec.add_runtime_dependency 'upsert'
27
- spec.add_runtime_dependency 'pasqual'
28
22
 
23
+ spec.add_development_dependency 'activerecord', '>=4.1.9'
24
+ spec.add_development_dependency 'pg', '~>0.21'
29
25
  spec.add_development_dependency "bundler", "~> 1.7"
30
26
  spec.add_development_dependency "rake", "~> 10.0"
31
27
  spec.add_development_dependency "rspec"
32
28
  spec.add_development_dependency "pry"
33
-
34
29
  end
@@ -1,18 +1,38 @@
1
- class DumpSqlToCsv
2
- attr_private :sql
3
- attr_private :pasqual
1
+ class GeocodeRecords
2
+ class DumpSqlToCsv
3
+ attr_reader :database_url
4
+ attr_reader :glob
5
+ attr_reader :table_name
6
+ attr_reader :subquery
4
7
 
5
- def initialize(pasqual, sql, ignored_options = {})
6
- @pasqual = pasqual
7
- @sql = sql
8
- end
8
+ def initialize(
9
+ database_url:,
10
+ glob:,
11
+ table_name: nil,
12
+ subquery: nil)
13
+ @database_url = database_url
14
+ @glob = glob
15
+ @table_name = table_name
16
+ @subquery = subquery
17
+ end
9
18
 
10
- def path
11
- @path = Dir::Tmpname.create(sql[0,64].delete('"').gsub(/\W/,'_').squeeze) {}
19
+ def perform
20
+ memo = GeocodeRecords.new_tmp_path(subquery || table_name)
21
+ GeocodeRecords.run_sql(
22
+ database_url,
23
+ "\\copy (#{sql}) TO '#{memo}' DELIMITER ',' CSV HEADER"
24
+ )
25
+ memo
26
+ end
12
27
 
13
- pasqual.command "\\copy (#{sql}) TO '#{@path}' DELIMITER ',' CSV HEADER"
28
+ private
14
29
 
15
- @path
30
+ def sql
31
+ @sql ||= unless glob
32
+ "SELECT id, house_number_and_street, city, state, regexp_replace(postcode, '.0$', '') AS postcode FROM #{subquery ? "(#{subquery}) t1" : table_name} WHERE city IS NOT NULL OR postcode IS NOT NULL"
33
+ else
34
+ "SELECT id, glob FROM #{subquery ? "(#{subquery}) t1" : table_name} WHERE (city IS NULL AND postcode IS NULL) AND glob IS NOT NULL"
35
+ end
36
+ end
16
37
  end
17
-
18
38
  end
@@ -1,41 +1,62 @@
1
- require 'tmpdir'
2
- require 'fileutils'
3
- require 'csv'
4
- require 'shellwords'
5
- require 'zaru'
1
+ require 'json'
6
2
 
7
- require 'geocode_records/smarty_streets'
8
-
9
- # copied from hotdog/app/services/file_geocoder.rb with seamus variations
10
3
  class GeocodeRecords
11
4
  class GeocodeCsv
5
+ attr_reader :path
12
6
  attr_reader :glob
13
7
  attr_reader :include_invalid
14
8
 
15
- def initialize(input_path, options = {})
16
- @input_path = input_path
17
- options ||= {}
18
- @glob = options[:glob]
19
- @include_invalid = options[:include_invalid]
20
- @mutex = Mutex.new
9
+ REQUIRED_SMARTYSTREETS_VERSION = Gem::Version.new('1.8.2')
10
+ COLUMN_DEFINITION = {
11
+ delivery_line_1: true,
12
+ components: {
13
+ primary_number: true,
14
+ secondary_number: true,
15
+ city_name: true,
16
+ default_city_name: true,
17
+ state_abbreviation: true,
18
+ zipcode: true
19
+ },
20
+ metadata: {
21
+ latitude: true,
22
+ longitude: true
23
+ }
24
+ }
25
+
26
+ def initialize(
27
+ path:,
28
+ glob:,
29
+ include_invalid:
30
+ )
31
+ @path = path
32
+ @glob = glob
33
+ @include_invalid = include_invalid
21
34
  end
22
35
 
23
- def path
24
- return if @path
25
- @mutex.synchronize do
26
- return if @path
27
- geocode
28
- recode
29
- @path = @recoded_path
36
+ def perform
37
+ return unless File.size(path) > 32
38
+ memo = GeocodeRecords.new_tmp_path File.basename("geocoded-#{path}")
39
+ args = [
40
+ smartystreets_bin_path,
41
+ '-i', path,
42
+ '-o', memo,
43
+ '--quiet',
44
+ '--auth-id', ENV.fetch('SMARTY_STREETS_AUTH_ID'),
45
+ '--auth-token', ENV.fetch('SMARTY_STREETS_AUTH_TOKEN'),
46
+ '--column-definition', JSON.dump(COLUMN_DEFINITION),
47
+ ]
48
+ if include_invalid
49
+ args += [ '--include-invalid' ]
30
50
  end
51
+ input_map.each do |ss, local|
52
+ args += [ "--#{ss}-col", local.to_s ]
53
+ end
54
+ GeocodeRecords.system(*args)
55
+ memo
31
56
  end
32
57
 
33
58
  private
34
59
 
35
- attr_private :input_path
36
- attr_private :geocoded_path
37
- attr_private :recoded_path
38
-
39
60
  def input_map
40
61
  @input_map ||= if glob
41
62
  { 'street' => 'glob' }
@@ -47,55 +68,18 @@ class GeocodeRecords
47
68
  end
48
69
  end
49
70
 
50
- def geocode
51
- @geocoded_path = Dir::Tmpname.create(Zaru.sanitize!(input_path + '.geocode')) {}
52
- args = [
53
- '-i', input_path,
54
- '-o', geocoded_path,
55
- '--auth-id', ENV.fetch('SMARTY_STREETS_AUTH_ID'),
56
- '--auth-token', ENV.fetch('SMARTY_STREETS_AUTH_TOKEN')
57
- ]
58
- if include_invalid
59
- args += [ '--include-invalid' ]
60
- end
61
- input_map.each do |ss, local|
62
- args += [ "--#{ss}-col", local.to_s ]
63
- end
64
- SmartyStreets.run *args
65
- raise "Geocoding failed on #{input_path.inspect} with args #{Shellwords.join(args)}" unless $?.success?
66
- end
67
-
68
- def recode
69
- @recoded_path = Dir::Tmpname.create(Zaru.sanitize!(input_path + '.recode')) {}
70
- File.open(@recoded_path, 'w') do |f|
71
- f.write output_columns.to_csv
72
- CSV.foreach(@geocoded_path, headers: true) do |geocoded_row|
73
- f.write recode_columns.map { |k| geocoded_row[k] }.to_csv
71
+ def smartystreets_bin_path
72
+ @smartystreets_bin_path ||= begin
73
+ memo = [
74
+ 'node_modules/.bin/smartystreets',
75
+ `which smartystreets`.chomp
76
+ ].compact.detect do |path|
77
+ File.exist? path
74
78
  end
75
- end
76
- File.unlink @geocoded_path
77
- end
78
-
79
- def output_columns
80
- @output_columns ||= (File.open(input_path) { |f| CSV.parse_line(f.gets) } + RECODE_MAP.keys).uniq
81
- end
82
-
83
- # no street yet - street_name, street_suffix
84
- RECODE_MAP = {
85
- 'house_number_and_street' => 'ss_delivery_line_1',
86
- 'house_number' => 'ss_primary_number',
87
- 'unit_number' => 'ss_secondary_number',
88
- 'city' => 'ss_city_name',
89
- 'state' => 'ss_state_abbreviation',
90
- 'postcode' => 'ss_zipcode',
91
- 'latitude' => 'ss_latitude',
92
- 'longitude' => 'ss_longitude',
93
- 'default_city' => 'ss_default_city_name',
94
- }.freeze
95
-
96
- def recode_columns
97
- @recode_columns ||= output_columns.map do |output_k|
98
- RECODE_MAP[output_k] || output_k
79
+ raise "can't find smartystreets bin" unless memo
80
+ version = Gem::Version.new `#{memo} -V`.chomp
81
+ raise "smartystreets #{version} too old" unless version >= REQUIRED_SMARTYSTREETS_VERSION
82
+ memo
99
83
  end
100
84
  end
101
85
  end
@@ -1,36 +1,130 @@
1
1
  require 'csv'
2
- require 'upsert'
3
2
 
4
3
  class GeocodeRecords
5
4
  class UpdateTableFromCsv
6
- MAX_INT = 2**31 - 1
7
- attr_private :connection
8
- attr_private :table_name
9
- attr_private :csv_path
10
- attr_private :upsert
11
- def initialize(connection, table_name, csv_path, ignored_options = {})
12
- @upsert = Upsert.new connection, table_name
13
- @csv_path = csv_path
5
+ CREATE_TABLE_SQL = (<<-SQL).gsub(' ', '').freeze
6
+ CREATE TABLE $TMP_TABLE_NAME (
7
+ id uuid primary key,
8
+ ss_delivery_line_1 text,
9
+ ss_primary_number text,
10
+ ss_secondary_number text,
11
+ ss_city_name text,
12
+ ss_state_abbreviation text,
13
+ ss_zipcode text,
14
+ ss_latitude float,
15
+ ss_longitude float,
16
+ ss_default_city_name text
17
+ )
18
+ SQL
19
+
20
+ PGLOADER_CONFIG = <<-SQL
21
+ LOAD CSV
22
+ FROM '$PATH'
23
+ (
24
+ $INPUT_COLUMNS
25
+ )
26
+ INTO $DATABASE_URL?$TMP_TABLE_NAME
27
+ (
28
+ id,
29
+ ss_delivery_line_1,
30
+ ss_primary_number,
31
+ ss_secondary_number,
32
+ ss_city_name,
33
+ ss_state_abbreviation,
34
+ ss_zipcode,
35
+ ss_latitude,
36
+ ss_longitude,
37
+ ss_default_city_name
38
+ )
39
+ WITH
40
+ skip header = 1,
41
+ fields optionally enclosed by '"',
42
+ fields escaped by double-quote,
43
+ fields terminated by ','
44
+ SET client_encoding to 'utf8';
45
+ SQL
46
+
47
+ UPDATE_TABLE_SQL = (<<-SQL).gsub(' ', '').freeze
48
+ UPDATE $TABLE_NAME AS target
49
+ SET
50
+ house_number_and_street = src.ss_delivery_line_1,
51
+ house_number = CASE WHEN LENGTH(src.ss_primary_number) > 7 THEN NULL ELSE src.ss_primary_number::int END,
52
+ unit_number = src.ss_secondary_number,
53
+ city = COALESCE(src.ss_default_city_name, src.ss_city_name),
54
+ state = src.ss_state_abbreviation,
55
+ postcode = src.ss_zipcode,
56
+ latitude = src.ss_latitude,
57
+ longitude = src.ss_longitude
58
+ FROM $TMP_TABLE_NAME AS src
59
+ WHERE
60
+ target.id = src.id
61
+ AND src.ss_zipcode IS NOT NULL
62
+ SQL
63
+
64
+ attr_reader :database_url
65
+ attr_reader :table_name
66
+ attr_reader :path
67
+
68
+ def initialize(
69
+ database_url:,
70
+ table_name:,
71
+ path:
72
+ )
73
+ @database_url = database_url
74
+ @table_name = table_name
75
+ @path = path
14
76
  end
77
+
15
78
  def perform
16
- count = 0
17
- CSV.foreach(csv_path, headers: true) do |row|
18
- next unless row['postcode']
19
- row = row.to_hash
20
- if hn = row['house_number']
21
- row['house_number'] = hn.to_i
22
- next if row['house_number'] > MAX_INT
23
- end
24
- if default_city = row.delete('default_city')
25
- row['city'] = default_city
26
- end
27
- selector = { id: row.delete('id') }
28
- setter = row
29
- upsert.row selector, setter
30
- # $stderr.write "U#{count}..." if count % 1000 == 0
31
- count += 1
79
+ return unless File.size(path) > 32
80
+ tmp_table_name = create_tmp_table
81
+ begin
82
+ load_csv_into_tmp_table tmp_table_name
83
+ update_original_table tmp_table_name
84
+ ensure
85
+ delete_tmp_table tmp_table_name
86
+ end
87
+ end
88
+
89
+ def create_tmp_table
90
+ memo = "geocode_records_#{table_name}_#{rand(999999)}"
91
+ GeocodeRecords.run_sql(
92
+ database_url,
93
+ CREATE_TABLE_SQL.sub('$TMP_TABLE_NAME', memo)
94
+ )
95
+ memo
96
+ end
97
+
98
+ def load_csv_into_tmp_table(tmp_table_name)
99
+ pg_loader_config_path = GeocodeRecords.new_tmp_path('pgloader')
100
+ File.open(pg_loader_config_path, 'w') { |f| f.write PGLOADER_CONFIG.sub('$INPUT_COLUMNS', input_columns.join(',')).sub('$DATABASE_URL', database_url).sub('$TMP_TABLE_NAME', tmp_table_name).sub('$PATH', path) }
101
+ GeocodeRecords.system(
102
+ 'pgloader',
103
+ # '--debug',
104
+ '--quiet',
105
+ pg_loader_config_path
106
+ )
107
+ File.unlink pg_loader_config_path
108
+ end
109
+
110
+ def update_original_table(tmp_table_name)
111
+ GeocodeRecords.run_sql(
112
+ database_url,
113
+ UPDATE_TABLE_SQL.sub('$TMP_TABLE_NAME', tmp_table_name).sub('$TABLE_NAME', table_name)
114
+ )
115
+ end
116
+
117
+ def delete_tmp_table(tmp_table_name)
118
+ GeocodeRecords.run_sql(
119
+ database_url,
120
+ "DROP TABLE IF EXISTS #{tmp_table_name}"
121
+ )
122
+ end
123
+
124
+ def input_columns
125
+ CSV.parse_line(File.open(path) { |f| f.gets }).map do |col|
126
+ "#{col} [NULL IF BLANKS]"
32
127
  end
33
- count
34
128
  end
35
129
  end
36
130
  end
@@ -1,3 +1,3 @@
1
1
  class GeocodeRecords
2
- VERSION = '0.1.5'
2
+ VERSION = '1.0.0'
3
3
  end
@@ -1,77 +1,95 @@
1
- require 'active_record'
2
1
  require 'active_support'
3
2
  require 'active_support/core_ext'
4
- require 'attr_extras'
5
- require 'pasqual'
3
+ require 'tmpdir'
4
+ require 'shellwords'
5
+ require 'fileutils'
6
6
 
7
7
  require_relative 'geocode_records/version'
8
8
  require_relative 'geocode_records/dump_sql_to_csv'
9
9
  require_relative 'geocode_records/geocode_csv'
10
10
  require_relative 'geocode_records/update_table_from_csv'
11
- require_relative 'geocode_records/smarty_streets'
12
11
 
13
12
  class GeocodeRecords
13
+ class << self
14
+ def new_tmp_path(hint)
15
+ Dir::Tmpname.create(hint[0,64].delete('"').gsub(/\W/,'_').squeeze) {}
16
+ end
14
17
 
15
- attr_reader :records
16
- attr_reader :options
17
- def initialize(records, options = {})
18
- records.is_a?(ActiveRecord::Relation) or raise(ArgumentError, "expected AR::Relation, got #{records.class}")
19
- @options = (options || {}).symbolize_keys
20
- @records = records
21
- end
22
-
23
- def perform
24
- SmartyStreets.check_compatible!
18
+ def system(*args)
19
+ result = Kernel.system(*args)
20
+ unless result
21
+ raise "failed command:\n#{Shellwords.join args}"
22
+ end
23
+ nil
24
+ end
25
25
 
26
- if records.count > 0
27
- # $stderr.puts "GeocodeRecords: #{records.count} to go!"
28
- ungeocoded_path = DumpSqlToCsv.new(pasqual, to_sql, options).path
29
- geocoded_path = GeocodeCsv.new(ungeocoded_path, options).path
30
- UpdateTableFromCsv.new(connection, table_name, geocoded_path, options).perform
31
- set_the_geom
32
- File.unlink geocoded_path
33
- File.unlink ungeocoded_path
26
+ def run_sql(database_url, sql)
27
+ system(
28
+ 'psql',
29
+ database_url,
30
+ '-v', 'ON_ERROR_STOP=on',
31
+ # '--echo-all',
32
+ '--quiet',
33
+ '--no-psqlrc',
34
+ '--pset', 'pager=off',
35
+ '--command', sql
36
+ )
34
37
  end
35
38
  end
36
39
 
37
- private
40
+ attr_reader :database_url
41
+ attr_reader :table_name
38
42
 
39
- def glob
40
- !!options[:glob]
41
- end
43
+ # optional
44
+ attr_reader :include_invalid
45
+ attr_reader :subquery
42
46
 
43
- def set_the_geom
44
- records.update_all <<-SQL
45
- the_geom = ST_SetSRID(ST_MakePoint(longitude, latitude), 4326),
46
- the_geom_webmercator = ST_Transform(ST_SetSRID(ST_MakePoint(longitude, latitude), 4326), 3857)
47
- SQL
47
+ def initialize(
48
+ database_url:,
49
+ table_name:,
50
+ subquery: nil,
51
+ include_invalid: nil
52
+ )
53
+ @database_url = database_url
54
+ @table_name = table_name
55
+ @subquery = subquery
56
+ @include_invalid = include_invalid
48
57
  end
49
-
50
- def to_sql
51
- c = connection
52
- c.unprepared_statement do
53
- if glob
54
- c.to_sql records.select('id', 'glob').where.not(glob: nil).arel, records.bind_values
55
- else
56
- c.to_sql records.select('id', 'house_number_and_street', 'house_number', 'unit_number', 'city', 'state', "regexp_replace(postcode, '.0$', '') AS postcode").where('city IS NOT NULL OR postcode IS NOT NULL').arel, records.bind_values
57
- end
58
- end
58
+
59
+ def perform
60
+ geocode glob: false
61
+ geocode glob: true
59
62
  end
60
63
 
61
- def connection
62
- records.connection
63
- end
64
+ private
64
65
 
65
- def table_name
66
- @table_name = begin
67
- memo = options[:table_name]
68
- memo ||= records.table_name if records.respond_to?(:table_name)
69
- memo ||= records.engine.table_name
70
- memo
66
+ def geocode(glob:)
67
+ ungeocoded_path = nil
68
+ geocoded_path = nil
69
+ begin
70
+ ungeocoded_path = DumpSqlToCsv.new(
71
+ database_url: database_url,
72
+ table_name: table_name,
73
+ subquery: subquery,
74
+ glob: glob
75
+ ).perform
76
+ unless File.size(ungeocoded_path) > 32
77
+ $stderr.puts "No records found for #{table_name} #{subquery}, skipping"
78
+ return
79
+ end
80
+ geocoded_path = GeocodeCsv.new(
81
+ path: ungeocoded_path,
82
+ glob: glob,
83
+ include_invalid: include_invalid
84
+ ).perform
85
+ UpdateTableFromCsv.new(
86
+ database_url: database_url,
87
+ table_name: table_name,
88
+ path: geocoded_path
89
+ ).perform
90
+ ensure
91
+ FileUtils.rm_f geocoded_path if geocoded_path
92
+ FileUtils.rm_f ungeocoded_path if ungeocoded_path
71
93
  end
72
94
  end
73
-
74
- def pasqual
75
- @pasqual ||= Pasqual.for ENV.fetch('DATABASE_URL')
76
- end
77
95
  end
data/package.json CHANGED
@@ -2,6 +2,6 @@
2
2
  "name": "rolodeck",
3
3
  "private": true,
4
4
  "dependencies": {
5
- "smartystreets": "^1.7.2"
5
+ "smartystreets": "^1.8.2"
6
6
  }
7
7
  }
@@ -1,132 +1,85 @@
1
1
  require 'spec_helper'
2
2
 
3
- dbname = 'geocode_records_test'
4
- ENV['DATABASE_URL'] = "postgresql://127.0.0.1/#{dbname}"
5
-
6
- unless ENV['FAST'] == 'true'
7
- psql = Pasqual.for ENV['DATABASE_URL']
8
- psql.dropdb rescue nil
9
- psql.createdb
10
- psql.command 'CREATE EXTENSION postgis'
11
- psql.command <<-SQL
12
- CREATE TABLE homes (
13
- id serial primary key,
14
- the_geom geometry(Geometry,4326),
15
- the_geom_webmercator geometry(Geometry,3857),
16
- house_number_and_street text,
17
- house_number int,
18
- unit_number text,
19
- city text,
20
- state text,
21
- postcode text,
22
- latitude float,
23
- longitude float
24
- )
25
- SQL
26
- psql.command <<-SQL
27
- CREATE TABLE glob_homes (
28
- id serial primary key,
29
- glob text,
30
- the_geom geometry(Geometry,4326),
31
- the_geom_webmercator geometry(Geometry,3857),
32
- house_number_and_street text,
33
- house_number int,
34
- unit_number text,
35
- city text,
36
- state text,
37
- postcode text,
38
- latitude float,
39
- longitude float
40
- )
41
- SQL
42
- end
43
-
44
- require 'active_record'
45
- ActiveRecord::Base.establish_connection
46
-
47
- require 'logger'
48
- require 'fileutils'
49
- FileUtils.mkdir_p 'log'
50
- logger = Logger.new 'log/test.log'
51
- ActiveRecord::Base.logger = logger
52
-
53
- class Home < ActiveRecord::Base
54
- end
55
- class GlobHome < ActiveRecord::Base
56
- end
57
-
58
3
  describe GeocodeRecords do
59
- it 'has a version number' do
60
- expect(GeocodeRecords::VERSION).not_to be nil
61
- end
62
-
63
- it "geocodes an AR::Relation" do
4
+ subject { GeocodeRecords.new(database_url: ENV.fetch('DATABASE_URL'), table_name: 'homes').perform }
5
+
6
+ it "geocodes" do
64
7
  home = Home.create! house_number_and_street: '1038 e deyton st', postcode: '53703'
65
- GeocodeRecords.new(Home.all).perform
8
+ subject
66
9
  home.reload
67
10
  expect(home.house_number_and_street).to eq('1038 E Dayton St')
68
11
  end
69
12
 
70
- it "geocodes an AR::Relation with just a glob" do
71
- home = GlobHome.create! glob: '1038 e dayton st, madison, wi 53703'
72
- GeocodeRecords.new(GlobHome.all, glob: true).perform
13
+ it "geocodes glob" do
14
+ home = Home.create! glob: '1038 e dayton st, madison, wi 53703'
15
+ subject
73
16
  home.reload
74
17
  expect(home.house_number_and_street).to eq('1038 E Dayton St')
75
18
  expect(home.postcode).to eq('53703')
76
19
  end
77
20
 
21
+ it "geocodes by sql" do
22
+ home = Home.create! house_number_and_street: '1038 e deyton st', postcode: '53703', foo: 'bar'
23
+ home_ignored = Home.create! house_number_and_street: '1038 e deyton st', postcode: '53703'
24
+ GeocodeRecords.new(database_url: ENV.fetch('DATABASE_URL'), table_name: 'homes', subquery: %{SELECT * FROM homes WHERE foo = 'bar'}).perform
25
+ home.reload
26
+ home_ignored.reload
27
+ expect(home.latitude).to be_present
28
+ expect(home_ignored.latitude).to be_nil
29
+ end
30
+
78
31
  it "doesn't break on float-format postcode" do
79
32
  home = Home.create! house_number_and_street: '1038 e deyton st', postcode: '53703.0'
80
- GeocodeRecords.new(Home.all).perform
33
+ subject
81
34
  home.reload
82
35
  expect(home.house_number_and_street).to eq('1038 E Dayton St')
83
36
  end
84
37
 
85
38
  it "doesn't break on unzeropadded postcode" do
86
39
  home = Home.create! house_number_and_street: '36 main st', postcode: '5753'
87
- GeocodeRecords.new(Home.all).perform
40
+ subject
88
41
  home.reload
89
42
  expect(home.house_number_and_street).to eq('36 Main St')
90
43
  end
91
44
 
92
45
  it "doesn't break on unzeropadded float-format postcode" do
93
46
  home = Home.create! house_number_and_street: '36 main st', postcode: '5753.0'
94
- GeocodeRecords.new(Home.all).perform
47
+ subject
95
48
  home.reload
96
49
  expect(home.house_number_and_street).to eq('36 Main St')
97
50
  end
98
51
 
99
52
  it "doesn't break on zip-4" do
100
53
  home = Home.create! house_number_and_street: '1038 e dayton st', postcode: '53703-2428'
101
- GeocodeRecords.new(Home.all).perform
54
+ subject
102
55
  home.reload
103
56
  expect(home.house_number_and_street).to eq('1038 E Dayton St')
104
57
  end
105
58
 
106
59
  it "accepts city and state only" do
107
60
  home = Home.create! house_number_and_street: '1038 e dayton st', city: 'madison', state: 'wisconsin'
108
- GeocodeRecords.new(Home.all).perform
61
+ subject
109
62
  home.reload
110
63
  expect(home.house_number_and_street).to eq('1038 E Dayton St')
111
64
  end
112
65
 
113
66
  it "allows invalid" do
114
67
  home = Home.create! house_number_and_street: '1039 e dayton st', city: 'madison', state: 'wisconsin'
115
- GeocodeRecords.new(Home.all, include_invalid: true).perform
68
+ GeocodeRecords.new(database_url: ENV.fetch('DATABASE_URL'), table_name: 'homes', include_invalid: true).perform
116
69
  home.reload
117
70
  expect(home.house_number_and_street).to eq('1039 E Dayton St')
118
71
  end
119
72
 
120
73
  it "overwrites unit" do
121
74
  home = Home.create! house_number_and_street: '123 n blount st apt 403', city: 'madison', state: 'wisconsin'
122
- GeocodeRecords.new(Home.all, include_invalid: true).perform
75
+ GeocodeRecords.new(database_url: ENV.fetch('DATABASE_URL'), table_name: 'homes', include_invalid: true).perform
123
76
  home.reload
124
77
  expect(home.house_number_and_street).to eq('123 N Blount St Unit 403')
125
78
  end
126
79
 
127
80
  it "overwrites city name with default_city_name" do
128
81
  home = Home.create! house_number_and_street: '7333 Bay Bridge Rd', city: 'eastvale', state: 'ca'
129
- GeocodeRecords.new(Home.all, include_invalid: true).perform
82
+ GeocodeRecords.new(database_url: ENV.fetch('DATABASE_URL'), table_name: 'homes', include_invalid: true).perform
130
83
  home.reload
131
84
  expect(home.city).to eq('Corona')
132
85
  end
@@ -134,7 +87,7 @@ describe GeocodeRecords do
134
87
  describe 'known issues' do
135
88
  it "doesn't fix float-format postcode on records that it can't geocode" do
136
89
  home = Home.create! house_number_and_street: 'gibberish', postcode: '53703.0'
137
- GeocodeRecords.new(Home.all).perform
90
+ subject
138
91
  home.reload
139
92
  expect(home.house_number_and_street).to eq('gibberish')
140
93
  expect(home.postcode).to eq('53703.0')
@@ -142,7 +95,7 @@ describe GeocodeRecords do
142
95
 
143
96
  it "doesn't fix unzeropadded postcode on records that it can't geocode" do
144
97
  home = Home.create! house_number_and_street: 'gibberish', postcode: '5753'
145
- GeocodeRecords.new(Home.all).perform
98
+ subject
146
99
  home.reload
147
100
  expect(home.house_number_and_street).to eq('gibberish')
148
101
  expect(home.postcode).to eq('5753')
data/spec/spec_helper.rb CHANGED
@@ -2,3 +2,62 @@ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
2
2
  require 'geocode_records'
3
3
 
4
4
  require 'pry'
5
+
6
+ dbname = 'geocode_records_test'
7
+ ENV['DATABASE_URL'] = "postgresql://127.0.0.1:#{ENV['PGPORT'] || 5432}/#{dbname}"
8
+
9
+ unless ENV['FAST'] == 'true'
10
+ GeocodeRecords.system('createdb', ENV.fetch('DATABASE_URL')) rescue nil
11
+ GeocodeRecords.run_sql(
12
+ ENV.fetch('DATABASE_URL'),
13
+ 'CREATE EXTENSION IF NOT EXISTS postgis'
14
+ )
15
+ GeocodeRecords.run_sql(
16
+ ENV.fetch('DATABASE_URL'),
17
+ 'DROP TABLE IF EXISTS homes'
18
+ )
19
+ sql = <<-SQL
20
+ CREATE TABLE homes (
21
+ id uuid primary key,
22
+ the_geom geometry(Geometry,4326),
23
+ the_geom_webmercator geometry(Geometry,3857),
24
+ glob text,
25
+ house_number_and_street text,
26
+ house_number int,
27
+ unit_number text,
28
+ city text,
29
+ state text,
30
+ postcode text,
31
+ latitude float,
32
+ longitude float,
33
+ foo text
34
+ )
35
+ SQL
36
+ GeocodeRecords.run_sql(
37
+ ENV.fetch('DATABASE_URL'),
38
+ sql
39
+ )
40
+ end
41
+
42
+ require 'active_record'
43
+ ActiveRecord::Base.establish_connection
44
+
45
+ require 'logger'
46
+ require 'fileutils'
47
+ FileUtils.mkdir_p 'log'
48
+ logger = Logger.new 'log/test.log'
49
+ ActiveRecord::Base.logger = logger
50
+
51
+ require 'securerandom'
52
+ class Home < ActiveRecord::Base
53
+ self.primary_key = 'id'
54
+ before_create do
55
+ self.id ||= SecureRandom.uuid
56
+ end
57
+ end
58
+
59
+ RSpec.configure do |config|
60
+ config.before :each do |example|
61
+ Home.delete_all
62
+ end
63
+ end
metadata CHANGED
@@ -1,29 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: geocode_records
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Seamus Abshere
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-10-15 00:00:00.000000000 Z
11
+ date: 2018-01-16 00:00:00.000000000 Z
12
12
  dependencies:
13
- - !ruby/object:Gem::Dependency
14
- name: activerecord
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - ">="
18
- - !ruby/object:Gem::Version
19
- version: 4.1.9
20
- type: :runtime
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - ">="
25
- - !ruby/object:Gem::Version
26
- version: 4.1.9
27
13
  - !ruby/object:Gem::Dependency
28
14
  name: activesupport
29
15
  requirement: !ruby/object:Gem::Requirement
@@ -39,75 +25,33 @@ dependencies:
39
25
  - !ruby/object:Gem::Version
40
26
  version: '0'
41
27
  - !ruby/object:Gem::Dependency
42
- name: pg
43
- requirement: !ruby/object:Gem::Requirement
44
- requirements:
45
- - - ">="
46
- - !ruby/object:Gem::Version
47
- version: '0'
48
- type: :runtime
49
- prerelease: false
50
- version_requirements: !ruby/object:Gem::Requirement
51
- requirements:
52
- - - ">="
53
- - !ruby/object:Gem::Version
54
- version: '0'
55
- - !ruby/object:Gem::Dependency
56
- name: attr_extras
57
- requirement: !ruby/object:Gem::Requirement
58
- requirements:
59
- - - ">="
60
- - !ruby/object:Gem::Version
61
- version: '0'
62
- type: :runtime
63
- prerelease: false
64
- version_requirements: !ruby/object:Gem::Requirement
65
- requirements:
66
- - - ">="
67
- - !ruby/object:Gem::Version
68
- version: '0'
69
- - !ruby/object:Gem::Dependency
70
- name: zaru
71
- requirement: !ruby/object:Gem::Requirement
72
- requirements:
73
- - - ">="
74
- - !ruby/object:Gem::Version
75
- version: '0'
76
- type: :runtime
77
- prerelease: false
78
- version_requirements: !ruby/object:Gem::Requirement
79
- requirements:
80
- - - ">="
81
- - !ruby/object:Gem::Version
82
- version: '0'
83
- - !ruby/object:Gem::Dependency
84
- name: upsert
28
+ name: activerecord
85
29
  requirement: !ruby/object:Gem::Requirement
86
30
  requirements:
87
31
  - - ">="
88
32
  - !ruby/object:Gem::Version
89
- version: '0'
90
- type: :runtime
33
+ version: 4.1.9
34
+ type: :development
91
35
  prerelease: false
92
36
  version_requirements: !ruby/object:Gem::Requirement
93
37
  requirements:
94
38
  - - ">="
95
39
  - !ruby/object:Gem::Version
96
- version: '0'
40
+ version: 4.1.9
97
41
  - !ruby/object:Gem::Dependency
98
- name: pasqual
42
+ name: pg
99
43
  requirement: !ruby/object:Gem::Requirement
100
44
  requirements:
101
- - - ">="
45
+ - - "~>"
102
46
  - !ruby/object:Gem::Version
103
- version: '0'
104
- type: :runtime
47
+ version: '0.21'
48
+ type: :development
105
49
  prerelease: false
106
50
  version_requirements: !ruby/object:Gem::Requirement
107
51
  requirements:
108
- - - ">="
52
+ - - "~>"
109
53
  - !ruby/object:Gem::Version
110
- version: '0'
54
+ version: '0.21'
111
55
  - !ruby/object:Gem::Dependency
112
56
  name: bundler
113
57
  requirement: !ruby/object:Gem::Requirement
@@ -164,7 +108,7 @@ dependencies:
164
108
  - - ">="
165
109
  - !ruby/object:Gem::Version
166
110
  version: '0'
167
- description: A quick way to re-geocode a table
111
+ description: A quick way to re-geocode a table. Requires 2 binaries, so YMMV.
168
112
  email:
169
113
  - seamus@abshere.net
170
114
  executables:
@@ -185,11 +129,9 @@ files:
185
129
  - lib/geocode_records.rb
186
130
  - lib/geocode_records/dump_sql_to_csv.rb
187
131
  - lib/geocode_records/geocode_csv.rb
188
- - lib/geocode_records/smarty_streets.rb
189
132
  - lib/geocode_records/update_table_from_csv.rb
190
133
  - lib/geocode_records/version.rb
191
134
  - package.json
192
- - spec/geocode_records/smarty_streets_spec.rb
193
135
  - spec/geocode_records_spec.rb
194
136
  - spec/spec_helper.rb
195
137
  homepage: https://github.com/seamusabshere/geocode_records
@@ -212,11 +154,10 @@ required_rubygems_version: !ruby/object:Gem::Requirement
212
154
  version: '0'
213
155
  requirements: []
214
156
  rubyforge_project:
215
- rubygems_version: 2.6.8
157
+ rubygems_version: 2.6.13
216
158
  signing_key:
217
159
  specification_version: 4
218
160
  summary: Geocode an ActiveRecord::Relation with node_smartystreets
219
161
  test_files:
220
- - spec/geocode_records/smarty_streets_spec.rb
221
162
  - spec/geocode_records_spec.rb
222
163
  - spec/spec_helper.rb
@@ -1,38 +0,0 @@
1
- class GeocodeRecords
2
-
3
- module SmartyStreets
4
-
5
- VERSION = '1.7.2'
6
-
7
- def self.bin_path
8
- @bin_path ||= if File.exist?('node_modules/.bin/smartystreets')
9
- 'node_modules/.bin/smartystreets'
10
- else
11
- 'smartystreets'
12
- end
13
- end
14
-
15
- def self.check_compatible!
16
- raise "smartystreets >= #{VERSION} is required" unless SmartyStreets.compatible?
17
- end
18
-
19
- def self.compatible?
20
- output = run_with_output('-V')
21
- current_version = Gem::Version.new output.chomp
22
- min_version = Gem::Version.new VERSION
23
- current_version >= min_version
24
- end
25
-
26
- def self.run(*args)
27
- shargs = Shellwords.join(args)
28
- system "#{bin_path} #{shargs}"
29
- end
30
-
31
- def self.run_with_output(*args)
32
- shargs = Shellwords.join(args)
33
- `#{bin_path} #{shargs}`
34
- end
35
-
36
- end
37
-
38
- end
@@ -1,39 +0,0 @@
1
- require 'spec_helper'
2
-
3
- require 'geocode_records/smarty_streets'
4
-
5
- describe GeocodeRecords::SmartyStreets do
6
-
7
- describe '.bin_path' do
8
- subject { described_class.bin_path }
9
-
10
- it { is_expected.to eq 'node_modules/.bin/smartystreets' }
11
- end
12
-
13
- describe '.compatible?' do
14
- before { allow(described_class).to receive(:run_with_output).and_return("#{version}\n") }
15
-
16
- subject { described_class.compatible? }
17
-
18
- context 'v1.3.1' do
19
- let(:version) { '1.3.1' }
20
-
21
- it { is_expected.to be false }
22
- end
23
-
24
- context 'v1.7.2' do
25
- let(:version) { '1.7.2' }
26
-
27
- it { is_expected.to be true }
28
- end
29
-
30
- end
31
-
32
- describe '.run_with_output' do
33
- subject { described_class.run_with_output '-V' }
34
-
35
- it { is_expected.to match /\d+\.\d+.\d+/ }
36
- end
37
-
38
- end
39
-