free_zipcode_data 1.0.6 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.dockerignore +10 -0
- data/.gitignore +2 -0
- data/.rubocop.yml +25 -16
- data/.ruby-version +1 -1
- data/CHANGELOG +17 -0
- data/CLAUDE.md +89 -0
- data/Dockerfile +21 -0
- data/Gemfile +10 -0
- data/Gemfile.lock +50 -36
- data/README.md +38 -5
- data/Rakefile +1 -1
- data/docker-entrypoint.sh +14 -0
- data/free_zipcode_data.gemspec +8 -14
- data/lib/etl/common.rb +1 -0
- data/lib/etl/csv_source.rb +4 -4
- data/lib/free_zipcode_data/country_table.rb +10 -2
- data/lib/free_zipcode_data/county_table.rb +14 -6
- data/lib/free_zipcode_data/data_source.rb +2 -2
- data/lib/free_zipcode_data/db_table.rb +54 -7
- data/lib/free_zipcode_data/logger.rb +8 -12
- data/lib/free_zipcode_data/runner.rb +2 -2
- data/lib/free_zipcode_data/state_table.rb +37 -5
- data/lib/free_zipcode_data/version.rb +1 -1
- data/lib/free_zipcode_data/zipcode_table.rb +15 -5
- data/lib/free_zipcode_data.rb +3 -3
- data/lib/tasks/version.rake +27 -24
- data/spec/etl/csv_source_spec.rb +57 -0
- data/spec/etl/free_zipcode_data_job_spec.rb +135 -0
- data/spec/fixtures/.free_zipcode_data.yml +1 -0
- data/spec/fixtures/US.txt +5 -0
- data/spec/fixtures/US.zip +0 -0
- data/spec/fixtures/test_data.csv +7 -0
- data/spec/fixtures/test_data.txt +5 -0
- data/spec/free_zipcode_data/country_table_spec.rb +52 -0
- data/spec/free_zipcode_data/county_table_spec.rb +84 -0
- data/spec/free_zipcode_data/data_source_spec.rb +131 -0
- data/spec/free_zipcode_data/db_table_spec.rb +164 -0
- data/spec/free_zipcode_data/logger_spec.rb +78 -0
- data/spec/free_zipcode_data/options_spec.rb +37 -0
- data/spec/free_zipcode_data/runner_spec.rb +91 -0
- data/spec/free_zipcode_data/sqlite_ram_spec.rb +64 -0
- data/spec/free_zipcode_data/state_table_spec.rb +112 -0
- data/spec/free_zipcode_data/zipcode_table_spec.rb +102 -0
- data/spec/free_zipcode_data_spec.rb +38 -0
- data/spec/spec_helper.rb +23 -2
- data/spec/support/database_helpers.rb +48 -0
- metadata +41 -91
|
@@ -8,6 +8,7 @@ module FreeZipcodeData
|
|
|
8
8
|
ISSUE_URL = 'https://github.com/midwire/free_zipcode_data/issues/new'
|
|
9
9
|
|
|
10
10
|
attr_reader :database, :tablename
|
|
11
|
+
|
|
11
12
|
@@progressbar = nil
|
|
12
13
|
|
|
13
14
|
def initialize(database:, tablename:)
|
|
@@ -23,6 +24,18 @@ module FreeZipcodeData
|
|
|
23
24
|
|
|
24
25
|
private
|
|
25
26
|
|
|
27
|
+
def logger
|
|
28
|
+
Logger.instance
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def warn_once(message)
|
|
32
|
+
@warned_messages ||= {}
|
|
33
|
+
return if @warned_messages[message]
|
|
34
|
+
|
|
35
|
+
logger.warn(message)
|
|
36
|
+
@warned_messages[message] = true
|
|
37
|
+
end
|
|
38
|
+
|
|
26
39
|
def country_lookup_table
|
|
27
40
|
@country_lookup_table ||=
|
|
28
41
|
begin
|
|
@@ -33,9 +46,9 @@ module FreeZipcodeData
|
|
|
33
46
|
|
|
34
47
|
def select_first(sql)
|
|
35
48
|
rows = database.execute(sql)
|
|
36
|
-
rows[0]
|
|
37
|
-
rescue SQLite3::SQLException =>
|
|
38
|
-
raise "Please file an issue at #{ISSUE_URL}: [#{
|
|
49
|
+
rows[0]&.first
|
|
50
|
+
rescue SQLite3::SQLException => e
|
|
51
|
+
raise "Please file an issue at #{ISSUE_URL}: [#{e}] -> SQL: [#{sql}]"
|
|
39
52
|
end
|
|
40
53
|
|
|
41
54
|
def get_country_id(country)
|
|
@@ -43,20 +56,54 @@ module FreeZipcodeData
|
|
|
43
56
|
select_first(sql)
|
|
44
57
|
end
|
|
45
58
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
59
|
+
# Look up a state ID scoped to a country, trying progressively less specific
|
|
60
|
+
# criteria: (1) abbr + name + country, (2) abbr + country, (3) name + country.
|
|
61
|
+
# Returns nil if no match is found.
|
|
62
|
+
def get_state_id(country, state_abbr, state_name)
|
|
63
|
+
escaped_country = escape_single_quotes(country)
|
|
64
|
+
return nil if escaped_country.empty?
|
|
65
|
+
|
|
66
|
+
escaped_abbr = escape_single_quotes(state_abbr)
|
|
67
|
+
escaped_name = escape_single_quotes(state_name)
|
|
68
|
+
country_cond = "c.alpha2 = '#{escaped_country}'"
|
|
69
|
+
# Most specific lookup: abbr + name + country
|
|
70
|
+
res = find_state_where("s.abbr = '#{escaped_abbr}'", "s.name = '#{escaped_name}'", country_cond)
|
|
71
|
+
return res if res
|
|
72
|
+
|
|
73
|
+
# Fallback: abbr + country only
|
|
74
|
+
res = find_state_where("s.abbr = '#{escaped_abbr}'", country_cond)
|
|
75
|
+
if res
|
|
76
|
+
logger.verbose("State fallback: abbr '#{state_abbr}' + country '#{country}' (name mismatch)")
|
|
77
|
+
return res
|
|
78
|
+
end
|
|
79
|
+
# Fallback: name + country only
|
|
80
|
+
res = find_state_where("s.name = '#{escaped_name}'", country_cond)
|
|
81
|
+
if res
|
|
82
|
+
logger.verbose("State fallback: name '#{state_name}' + country '#{country}' (abbr mismatch)")
|
|
83
|
+
return res
|
|
84
|
+
end
|
|
85
|
+
logger.warn("State lookup failed: abbr='#{state_abbr}', name='#{state_name}', country='#{country}'")
|
|
86
|
+
nil
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def find_state_where(*conditions)
|
|
90
|
+
sql = <<-SQL
|
|
91
|
+
SELECT s.id FROM states s
|
|
92
|
+
INNER JOIN countries c ON s.country_id = c.id
|
|
93
|
+
WHERE #{conditions.join(' AND ')}
|
|
94
|
+
SQL
|
|
49
95
|
select_first(sql)
|
|
50
96
|
end
|
|
51
97
|
|
|
52
98
|
def get_county_id(county)
|
|
53
99
|
return nil if county.nil?
|
|
100
|
+
|
|
54
101
|
sql = "SELECT id FROM counties WHERE name = '#{escape_single_quotes(county)}'"
|
|
55
102
|
select_first(sql)
|
|
56
103
|
end
|
|
57
104
|
|
|
58
105
|
def escape_single_quotes(string)
|
|
59
|
-
string&.gsub(
|
|
106
|
+
string&.gsub('\'', '\'\'') || ''
|
|
60
107
|
end
|
|
61
108
|
end
|
|
62
109
|
end
|
|
@@ -13,27 +13,23 @@ module FreeZipcodeData
|
|
|
13
13
|
@log_provider = provider
|
|
14
14
|
end
|
|
15
15
|
|
|
16
|
-
def log_exception(
|
|
17
|
-
msg = "EXCEPTION : #{
|
|
16
|
+
def log_exception(error, data = {})
|
|
17
|
+
msg = "EXCEPTION : #{error.class.name} : #{error.message}"
|
|
18
18
|
msg += "\n data : #{data.inspect}" if data && !data.empty?
|
|
19
|
-
msg += "\n #{
|
|
19
|
+
msg += "\n #{error.backtrace[0, 6].join("\n ")}"
|
|
20
20
|
log_provider.error(msg)
|
|
21
21
|
end
|
|
22
22
|
|
|
23
|
-
def method_missing(meth,
|
|
23
|
+
def method_missing(meth, *, &)
|
|
24
24
|
if log_provider.respond_to?(meth)
|
|
25
|
-
log_provider.send(meth,
|
|
25
|
+
log_provider.send(meth, *, &)
|
|
26
26
|
else
|
|
27
27
|
super
|
|
28
28
|
end
|
|
29
29
|
end
|
|
30
30
|
|
|
31
|
-
def
|
|
32
|
-
|
|
33
|
-
true
|
|
34
|
-
else
|
|
35
|
-
super
|
|
36
|
-
end
|
|
31
|
+
def respond_to_missing?(meth, include_private = false)
|
|
32
|
+
log_provider.respond_to?(meth) || super
|
|
37
33
|
end
|
|
38
34
|
|
|
39
35
|
def verbose(msg)
|
|
@@ -43,7 +39,7 @@ module FreeZipcodeData
|
|
|
43
39
|
private
|
|
44
40
|
|
|
45
41
|
def default_logger
|
|
46
|
-
logger = ::Logger.new(
|
|
42
|
+
logger = ::Logger.new($stdout)
|
|
47
43
|
logger.formatter = proc do |_, _, _, msg|
|
|
48
44
|
"#{msg}\n"
|
|
49
45
|
end
|
|
@@ -58,9 +58,9 @@ module FreeZipcodeData
|
|
|
58
58
|
private
|
|
59
59
|
|
|
60
60
|
def initialize_table(table_sym, database)
|
|
61
|
-
tablename = options["#{table_sym}_tablename"
|
|
61
|
+
tablename = options[:"#{table_sym}_tablename"]
|
|
62
62
|
logger.verbose("Initializing #{table_sym} table: '#{tablename}'...")
|
|
63
|
-
klass =
|
|
63
|
+
klass = FreeZipcodeData.const_get(:"#{titleize(table_sym)}Table")
|
|
64
64
|
table = klass.new(
|
|
65
65
|
database: database.conn,
|
|
66
66
|
tablename: tablename
|
|
@@ -17,21 +17,27 @@ module FreeZipcodeData
|
|
|
17
17
|
|
|
18
18
|
ndx = <<-SQL
|
|
19
19
|
CREATE UNIQUE INDEX "main"."unique_state"
|
|
20
|
-
ON #{tablename} (abbr
|
|
20
|
+
ON #{tablename} (abbr COLLATE NOCASE ASC, country_id);
|
|
21
21
|
SQL
|
|
22
22
|
database.execute_batch(ndx)
|
|
23
23
|
|
|
24
24
|
ndx = <<-SQL
|
|
25
25
|
CREATE UNIQUE INDEX "main"."state_name"
|
|
26
|
-
ON #{tablename} (name COLLATE NOCASE ASC);
|
|
26
|
+
ON #{tablename} (name COLLATE NOCASE ASC, country_id);
|
|
27
27
|
SQL
|
|
28
28
|
database.execute_batch(ndx)
|
|
29
29
|
end
|
|
30
30
|
|
|
31
31
|
def write(row)
|
|
32
|
-
return nil unless row
|
|
32
|
+
return nil unless synthesize_state(row)
|
|
33
|
+
|
|
33
34
|
row[:state] = 'Marshall Islands' if row[:short_state] == 'MH' && row[:state].nil?
|
|
34
35
|
country_id = get_country_id(row[:country])
|
|
36
|
+
unless country_id
|
|
37
|
+
warn_once("Country '#{row[:country]}' not found in countries table, skipping state")
|
|
38
|
+
return nil
|
|
39
|
+
end
|
|
40
|
+
|
|
35
41
|
sql = <<-SQL
|
|
36
42
|
INSERT INTO states (abbr, name, country_id)
|
|
37
43
|
VALUES ('#{row[:short_state]}',
|
|
@@ -41,11 +47,37 @@ module FreeZipcodeData
|
|
|
41
47
|
SQL
|
|
42
48
|
begin
|
|
43
49
|
database.execute(sql)
|
|
44
|
-
rescue SQLite3::ConstraintException
|
|
45
|
-
|
|
50
|
+
rescue SQLite3::ConstraintException => e
|
|
51
|
+
unless e.message.include?('UNIQUE')
|
|
52
|
+
raise "Please file an issue at #{ISSUE_URL}: [#{e}] -> SQL: [#{sql}]"
|
|
53
|
+
end
|
|
54
|
+
rescue StandardError => e
|
|
55
|
+
raise "Please file an issue at #{ISSUE_URL}: [#{e}] -> SQL: [#{sql}]"
|
|
46
56
|
end
|
|
47
57
|
|
|
48
58
|
update_progress
|
|
49
59
|
end
|
|
60
|
+
|
|
61
|
+
private
|
|
62
|
+
|
|
63
|
+
# Synthesize state from country for stateless countries.
|
|
64
|
+
# Mutates the row hash so downstream Kiba destinations (CountyTable, ZipcodeTable)
|
|
65
|
+
# see the synthesized short_state and state values.
|
|
66
|
+
def synthesize_state(row)
|
|
67
|
+
if row[:short_state].nil? || row[:short_state] == ''
|
|
68
|
+
country_entry = country_lookup_table[row[:country]]
|
|
69
|
+
unless country_entry
|
|
70
|
+
warn_once(
|
|
71
|
+
"Cannot synthesize state for country '#{row[:country]}': " \
|
|
72
|
+
'not in country_lookup_table'
|
|
73
|
+
)
|
|
74
|
+
return false
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
row[:short_state] = row[:country]
|
|
78
|
+
row[:state] = country_entry[:name]
|
|
79
|
+
end
|
|
80
|
+
row[:short_state]
|
|
81
|
+
end
|
|
50
82
|
end
|
|
51
83
|
end
|
|
@@ -29,7 +29,15 @@ module FreeZipcodeData
|
|
|
29
29
|
def write(row)
|
|
30
30
|
return nil unless row[:postal_code]
|
|
31
31
|
|
|
32
|
-
state_id = get_state_id(row[:short_state], row[:state])
|
|
32
|
+
state_id = get_state_id(row[:country], row[:short_state], row[:state])
|
|
33
|
+
unless state_id
|
|
34
|
+
logger.verbose(
|
|
35
|
+
"Skipping zipcode '#{row[:postal_code]}': no state found for " \
|
|
36
|
+
"abbr='#{row[:short_state]}', country='#{row[:country]}'"
|
|
37
|
+
)
|
|
38
|
+
return nil
|
|
39
|
+
end
|
|
40
|
+
|
|
33
41
|
city_name = escape_single_quotes(row[:city])
|
|
34
42
|
|
|
35
43
|
sql = <<-SQL
|
|
@@ -45,10 +53,12 @@ module FreeZipcodeData
|
|
|
45
53
|
|
|
46
54
|
begin
|
|
47
55
|
database.execute(sql)
|
|
48
|
-
rescue SQLite3::ConstraintException =>
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
56
|
+
rescue SQLite3::ConstraintException => e
|
|
57
|
+
unless e.message.include?('UNIQUE')
|
|
58
|
+
raise "Please file an issue at #{ISSUE_URL}: [#{e}] -> SQL: [#{sql}]"
|
|
59
|
+
end
|
|
60
|
+
rescue StandardError => e
|
|
61
|
+
raise "Please file an issue at #{ISSUE_URL}: [#{e}] -> SQL: [#{sql}]"
|
|
52
62
|
end
|
|
53
63
|
|
|
54
64
|
update_progress
|
data/lib/free_zipcode_data.rb
CHANGED
|
@@ -14,16 +14,16 @@ module FreeZipcodeData
|
|
|
14
14
|
ENV.fetch('APP_ENV', 'development')
|
|
15
15
|
end
|
|
16
16
|
|
|
17
|
-
|
|
17
|
+
# :nocov:
|
|
18
18
|
def self.config_file(filename = '.free_zipcode_data.yml')
|
|
19
19
|
return root.join('spec', 'fixtures', filename) if current_environment == 'test'
|
|
20
20
|
|
|
21
|
-
home =
|
|
21
|
+
home = Dir.home
|
|
22
22
|
file = ENV.fetch('FZD_CONFIG_FILE', File.join(home, '.free_zipcode_data.yml'))
|
|
23
23
|
FileUtils.touch(file)
|
|
24
24
|
file
|
|
25
25
|
end
|
|
26
|
-
|
|
26
|
+
# :nocov:
|
|
27
27
|
|
|
28
28
|
def self.os
|
|
29
29
|
if RUBY_PLATFORM.match?(/cygwin|mswin|mingw|bccwin|wince|emx/)
|
data/lib/tasks/version.rake
CHANGED
|
@@ -6,8 +6,6 @@ require 'fileutils'
|
|
|
6
6
|
|
|
7
7
|
# rubocop:disable Metrics/BlockLength
|
|
8
8
|
namespace :version do
|
|
9
|
-
PROJECT_ROOT = File.expand_path(FileUtils.pwd).freeze
|
|
10
|
-
PROJECT_NAME = ENV['PROJECT_NAME'] || File.basename(PROJECT_ROOT)
|
|
11
9
|
|
|
12
10
|
desc 'Write changes to the CHANGELOG'
|
|
13
11
|
task :changes do
|
|
@@ -23,7 +21,7 @@ namespace :version do
|
|
|
23
21
|
|
|
24
22
|
desc 'Increment the patch version and write changes to the changelog'
|
|
25
23
|
task :bump_patch do
|
|
26
|
-
exit unless check_branch_and_warn
|
|
24
|
+
exit unless check_branch_and_warn?
|
|
27
25
|
major, minor, patch = read_version
|
|
28
26
|
patch = patch.to_i + 1
|
|
29
27
|
write_version_file([major, minor, patch])
|
|
@@ -36,7 +34,7 @@ namespace :version do
|
|
|
36
34
|
|
|
37
35
|
desc 'Increment the minor version and write changes to the changelog'
|
|
38
36
|
task :bump_minor do
|
|
39
|
-
exit unless check_branch_and_warn
|
|
37
|
+
exit unless check_branch_and_warn?
|
|
40
38
|
major, minor, _patch = read_version
|
|
41
39
|
minor = minor.to_i + 1
|
|
42
40
|
patch = 0
|
|
@@ -47,7 +45,7 @@ namespace :version do
|
|
|
47
45
|
|
|
48
46
|
desc 'Increment the major version and write changes to the changelog'
|
|
49
47
|
task :bump_major do
|
|
50
|
-
exit unless check_branch_and_warn
|
|
48
|
+
exit unless check_branch_and_warn?
|
|
51
49
|
major, _minor, _patch = read_version
|
|
52
50
|
major = major.to_i + 1
|
|
53
51
|
minor = 0
|
|
@@ -59,19 +57,27 @@ namespace :version do
|
|
|
59
57
|
|
|
60
58
|
private
|
|
61
59
|
|
|
60
|
+
def project_root
|
|
61
|
+
@project_root ||= File.expand_path(FileUtils.pwd).freeze
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def project_name
|
|
65
|
+
@project_name ||= ENV['PROJECT_NAME'] || File.basename(project_root)
|
|
66
|
+
end
|
|
67
|
+
|
|
62
68
|
def version_file_path
|
|
63
|
-
split =
|
|
64
|
-
"#{
|
|
69
|
+
split = project_name.split('-')
|
|
70
|
+
"#{project_root}/lib/#{split.join('/')}/version.rb"
|
|
65
71
|
end
|
|
66
72
|
|
|
67
73
|
def module_name
|
|
68
|
-
case
|
|
74
|
+
case project_name
|
|
69
75
|
when /-/
|
|
70
|
-
|
|
76
|
+
project_name.split('-').map(&:capitalize).join('::')
|
|
71
77
|
when /_/
|
|
72
|
-
|
|
78
|
+
project_name.split('_').map(&:capitalize).join
|
|
73
79
|
else
|
|
74
|
-
|
|
80
|
+
project_name.capitalize
|
|
75
81
|
end
|
|
76
82
|
end
|
|
77
83
|
|
|
@@ -79,13 +85,13 @@ namespace :version do
|
|
|
79
85
|
silence_warnings do
|
|
80
86
|
load version_file_path
|
|
81
87
|
end
|
|
82
|
-
text =
|
|
88
|
+
text = module_name.split('::').inject(Object) { |mod, name| mod.const_get(name) }::VERSION
|
|
83
89
|
text.split('.')
|
|
84
90
|
end
|
|
85
91
|
|
|
86
92
|
def write_version_file(version_array)
|
|
87
93
|
version = version_array.join('.')
|
|
88
|
-
new_version = %( VERSION = '#{version}'
|
|
94
|
+
new_version = %( VERSION = '#{version}')
|
|
89
95
|
lines = File.readlines(version_file_path)
|
|
90
96
|
File.open(version_file_path, 'w') do |f|
|
|
91
97
|
lines.each do |line|
|
|
@@ -100,19 +106,17 @@ namespace :version do
|
|
|
100
106
|
|
|
101
107
|
def update_readme_version_strings
|
|
102
108
|
version_string = read_version.join('.')
|
|
103
|
-
readme =
|
|
109
|
+
readme = File.read('README.md')
|
|
104
110
|
regex = /^\*\*Version: [0-9.]+\*\*$/i
|
|
105
111
|
return nil unless readme =~ regex
|
|
106
112
|
|
|
107
|
-
File.
|
|
108
|
-
f.write(readme.gsub(regex, "**Version: #{version_string}**"))
|
|
109
|
-
end
|
|
113
|
+
File.write('README.md', readme.gsub(regex, "**Version: #{version_string}**"))
|
|
110
114
|
end
|
|
111
115
|
|
|
112
116
|
def changelog
|
|
113
117
|
return @changelog_path if @changelog_path
|
|
114
118
|
|
|
115
|
-
@changelog_path = File.join(
|
|
119
|
+
@changelog_path = File.join(project_root, 'CHANGELOG')
|
|
116
120
|
FileUtils.touch(@changelog_path)
|
|
117
121
|
@changelog_path
|
|
118
122
|
end
|
|
@@ -159,16 +163,15 @@ namespace :version do
|
|
|
159
163
|
STRING
|
|
160
164
|
end
|
|
161
165
|
|
|
162
|
-
def check_branch_and_warn
|
|
166
|
+
def check_branch_and_warn?
|
|
163
167
|
return true unless current_branch == 'master'
|
|
164
168
|
|
|
165
169
|
puts(branch_warning_message)
|
|
166
|
-
|
|
167
|
-
|
|
170
|
+
line = $stdin.gets.chomp
|
|
171
|
+
return true if line =~ /[yY]/
|
|
168
172
|
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
end
|
|
173
|
+
puts 'Aborting version bump.'
|
|
174
|
+
false
|
|
172
175
|
end
|
|
173
176
|
|
|
174
177
|
def launch_editor(file)
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'etl/csv_source'
|
|
4
|
+
|
|
5
|
+
RSpec.describe CsvSource do
|
|
6
|
+
let(:fixture_csv) { File.join(FreeZipcodeData.root, 'spec', 'fixtures', 'test_data.csv') }
|
|
7
|
+
|
|
8
|
+
describe '#initialize' do
|
|
9
|
+
it 'stores the filename and options' do
|
|
10
|
+
source = described_class.new(filename: fixture_csv)
|
|
11
|
+
expect(source.filename).to eq(fixture_csv)
|
|
12
|
+
expect(source.headers).to be true
|
|
13
|
+
expect(source.delimeter).to eq("\t")
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
it 'accepts custom delimiter and quote char' do
|
|
17
|
+
source = described_class.new(filename: fixture_csv, delimeter: ',', quote_char: '"')
|
|
18
|
+
expect(source.delimeter).to eq(',')
|
|
19
|
+
expect(source.quote_char).to eq('"')
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# CsvSource implements only #each (Kiba source protocol), not Enumerable
|
|
24
|
+
# rubocop:disable Style/MapIntoArray
|
|
25
|
+
describe '#each' do
|
|
26
|
+
it 'yields each row as a hash with symbolized keys' do
|
|
27
|
+
source = described_class.new(filename: fixture_csv, delimeter: ',', quote_char: '"')
|
|
28
|
+
rows = []
|
|
29
|
+
source.each { |row| rows << row }
|
|
30
|
+
|
|
31
|
+
expect(rows.length).to eq(6)
|
|
32
|
+
expect(rows.first).to be_a(Hash)
|
|
33
|
+
expect(rows.first.keys).to include(:country, :postal_code, :city)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
it 'parses the correct data from each row' do
|
|
37
|
+
source = described_class.new(filename: fixture_csv, delimeter: ',', quote_char: '"')
|
|
38
|
+
rows = []
|
|
39
|
+
source.each { |row| rows << row }
|
|
40
|
+
|
|
41
|
+
first = rows.first
|
|
42
|
+
expect(first[:country]).to eq('US')
|
|
43
|
+
expect(first[:postal_code]).to eq('10001')
|
|
44
|
+
expect(first[:city]).to eq('New York')
|
|
45
|
+
expect(first[:short_state]).to eq('NY')
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
it 'handles rows from multiple countries' do
|
|
49
|
+
source = described_class.new(filename: fixture_csv, delimeter: ',', quote_char: '"')
|
|
50
|
+
countries = []
|
|
51
|
+
source.each { |row| countries << row[:country] }
|
|
52
|
+
|
|
53
|
+
expect(countries.uniq.sort).to eq(%w[CA GB US])
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
# rubocop:enable Style/MapIntoArray
|
|
57
|
+
end
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'kiba'
|
|
4
|
+
require 'etl/free_zipcode_data_job'
|
|
5
|
+
|
|
6
|
+
RSpec.describe ETL::FreeZipcodeDataJob do
|
|
7
|
+
let(:db) { create_test_database(line_count: 6) }
|
|
8
|
+
let(:fixture_csv) { File.join(FreeZipcodeData.root, 'spec', 'fixtures', 'test_data.csv') }
|
|
9
|
+
let(:logger) { FreeZipcodeData::Logger.instance }
|
|
10
|
+
let(:string_io) { StringIO.new }
|
|
11
|
+
let(:options) do
|
|
12
|
+
OpenStruct.new(
|
|
13
|
+
country_tablename: 'countries',
|
|
14
|
+
state_tablename: 'states',
|
|
15
|
+
county_tablename: 'counties',
|
|
16
|
+
zipcode_tablename: 'zipcodes',
|
|
17
|
+
verbose: false
|
|
18
|
+
)
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
before do
|
|
22
|
+
FreeZipcodeData::Options.instance.initialize_hash(options)
|
|
23
|
+
logger.log_provider = Logger.new(string_io)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
describe '.setup' do
|
|
27
|
+
it 'returns a Kiba job definition' do
|
|
28
|
+
job = described_class.setup(fixture_csv, db, logger, options)
|
|
29
|
+
expect(job).not_to be_nil
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
describe 'full ETL pipeline' do
|
|
34
|
+
before do
|
|
35
|
+
# Build all tables
|
|
36
|
+
FreeZipcodeData::CountryTable.new(database: db, tablename: 'countries').build
|
|
37
|
+
FreeZipcodeData::StateTable.new(database: db, tablename: 'states').build
|
|
38
|
+
FreeZipcodeData::CountyTable.new(database: db, tablename: 'counties').build
|
|
39
|
+
FreeZipcodeData::ZipcodeTable.new(database: db, tablename: 'zipcodes').build
|
|
40
|
+
|
|
41
|
+
job = described_class.setup(fixture_csv, db, logger, options)
|
|
42
|
+
Kiba.run(job)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
it 'populates the countries table' do
|
|
46
|
+
rows = db.execute('SELECT alpha2 FROM countries ORDER BY alpha2')
|
|
47
|
+
expect(rows.flatten).to include('CA', 'GB', 'US')
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
it 'populates the states table' do
|
|
51
|
+
rows = db.execute('SELECT abbr FROM states ORDER BY abbr')
|
|
52
|
+
abbrs = rows.flatten
|
|
53
|
+
expect(abbrs).to include('CA', 'IL', 'NY')
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
it 'populates the counties table' do
|
|
57
|
+
rows = db.execute('SELECT name FROM counties ORDER BY name')
|
|
58
|
+
names = rows.flatten
|
|
59
|
+
expect(names).to include('Cook', 'Los Angeles', 'New York')
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
it 'populates the zipcodes table' do
|
|
63
|
+
rows = db.execute('SELECT code FROM zipcodes ORDER BY code')
|
|
64
|
+
codes = rows.flatten
|
|
65
|
+
expect(codes).to include('10001', '60601', '90210')
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
it 'links zipcodes to states' do
|
|
69
|
+
rows = db.execute(<<-SQL)
|
|
70
|
+
SELECT z.code, s.abbr
|
|
71
|
+
FROM zipcodes z
|
|
72
|
+
JOIN states s ON CAST(z.state_id AS INTEGER) = s.id
|
|
73
|
+
WHERE z.code = '60601'
|
|
74
|
+
SQL
|
|
75
|
+
expect(rows[0]).to eq(%w[60601 IL])
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
it 'links states to countries' do
|
|
79
|
+
rows = db.execute(<<-SQL)
|
|
80
|
+
SELECT s.abbr, c.alpha2
|
|
81
|
+
FROM states s
|
|
82
|
+
JOIN countries c ON s.country_id = c.id
|
|
83
|
+
WHERE s.abbr = 'NY'
|
|
84
|
+
SQL
|
|
85
|
+
expect(rows[0]).to eq(%w[NY US])
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
it 'stores geocode data for zipcodes' do
|
|
89
|
+
rows = db.execute("SELECT lat, lon FROM zipcodes WHERE code = '10001'")
|
|
90
|
+
lat = rows[0][0].to_f
|
|
91
|
+
lon = rows[0][1].to_f
|
|
92
|
+
expect(lat).to be_within(0.01).of(40.7484)
|
|
93
|
+
expect(lon).to be_within(0.01).of(-73.9967)
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
it 'scopes duplicate state abbreviations by country' do
|
|
97
|
+
us_ny = db.execute(<<-SQL)
|
|
98
|
+
SELECT s.id, s.name, c.alpha2
|
|
99
|
+
FROM states s
|
|
100
|
+
JOIN countries c ON s.country_id = c.id
|
|
101
|
+
WHERE s.abbr = 'NY' AND c.alpha2 = 'US'
|
|
102
|
+
SQL
|
|
103
|
+
ca_ny = db.execute(<<-SQL)
|
|
104
|
+
SELECT s.id, s.name, c.alpha2
|
|
105
|
+
FROM states s
|
|
106
|
+
JOIN countries c ON s.country_id = c.id
|
|
107
|
+
WHERE s.abbr = 'NY' AND c.alpha2 = 'CA'
|
|
108
|
+
SQL
|
|
109
|
+
expect(us_ny.length).to eq(1)
|
|
110
|
+
expect(ca_ny.length).to eq(1)
|
|
111
|
+
expect(us_ny[0][0]).not_to eq(ca_ny[0][0])
|
|
112
|
+
expect(us_ny[0][1]).to eq('New York')
|
|
113
|
+
expect(ca_ny[0][1]).to eq('Northern York')
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
it 'links cross-country zipcodes to the correct state' do
|
|
117
|
+
us_zip = db.execute(<<-SQL)
|
|
118
|
+
SELECT z.code, s.name, c.alpha2
|
|
119
|
+
FROM zipcodes z
|
|
120
|
+
JOIN states s ON CAST(z.state_id AS INTEGER) = s.id
|
|
121
|
+
JOIN countries c ON s.country_id = c.id
|
|
122
|
+
WHERE z.code = '10001'
|
|
123
|
+
SQL
|
|
124
|
+
ca_zip = db.execute(<<-SQL)
|
|
125
|
+
SELECT z.code, s.name, c.alpha2
|
|
126
|
+
FROM zipcodes z
|
|
127
|
+
JOIN states s ON CAST(z.state_id AS INTEGER) = s.id
|
|
128
|
+
JOIN countries c ON s.country_id = c.id
|
|
129
|
+
WHERE z.code = 'K0A'
|
|
130
|
+
SQL
|
|
131
|
+
expect(us_zip[0]).to eq(['10001', 'New York', 'US'])
|
|
132
|
+
expect(ca_zip[0]).to eq(['K0A', 'Northern York', 'CA'])
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
end
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
---
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
US 10001 New York New York NY New York 061 Manhattan MN 40.7484 -73.9967 4
|
|
2
|
+
US 90210 Beverly Hills California CA Los Angeles 037 LA 34.0901 -118.4065 4
|
|
3
|
+
US 60601 Chicago Illinois IL Cook 031 CK 41.8819 -87.6278 4
|
|
4
|
+
CA H2X Montreal Quebec QC Montreal 45.5088 -73.5878 4
|
|
5
|
+
GB SW1A London England ENG Westminster City of Westminster 51.5014 -0.1419 1
|
|
Binary file
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
COUNTRY,POSTAL_CODE,CITY,STATE,SHORT_STATE,COUNTY,SHORT_COUNTY,COMMUNITY,SHORT_COMMUNITY,LATITUDE,LONGITUDE,ACCURACY
|
|
2
|
+
US,10001,New York,New York,NY,New York,061,Manhattan,MN,40.7484,-73.9967,4
|
|
3
|
+
US,90210,Beverly Hills,California,CA,Los Angeles,037,,LA,34.0901,-118.4065,4
|
|
4
|
+
US,60601,Chicago,Illinois,IL,Cook,031,,CK,41.8819,-87.6278,4
|
|
5
|
+
CA,H2X,Montreal,Quebec,QC,,,Montreal,,45.5088,-73.5878,4
|
|
6
|
+
CA,K0A,Almonte,Northern York,NY,Lanark,LNK,,,45.2260,-76.1840,4
|
|
7
|
+
GB,SW1A,London,England,ENG,Westminster,,City of Westminster,,51.5014,-0.1419,1
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
US 10001 New York New York NY New York 061 Manhattan MN 40.7484 -73.9967 4
|
|
2
|
+
US 90210 Beverly Hills California CA Los Angeles 037 LA 34.0901 -118.4065 4
|
|
3
|
+
US 60601 Chicago Illinois IL Cook 031 CK 41.8819 -87.6278 4
|
|
4
|
+
CA H2X Montreal Quebec QC Montreal 45.5088 -73.5878 4
|
|
5
|
+
GB SW1A London England ENG Westminster City of Westminster 51.5014 -0.1419 1
|