free_zipcode_data 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +7 -0
- data/.rspec +1 -0
- data/.rubocop.yml +88 -0
- data/CHANGELOG +42 -0
- data/CODE_OF_CONDUCT.md +42 -0
- data/CONTRIBUTING.md +25 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +82 -0
- data/ISSUE_TEMPLATE.md +9 -0
- data/LICENSE.md +21 -0
- data/PULL_REQUEST_TEMPLATE.md +5 -0
- data/README.md +115 -0
- data/Rakefile +10 -0
- data/SUPPORT.md +6 -0
- data/all_us_counties.csv +3143 -0
- data/all_us_states.csv +52 -0
- data/all_us_zipcodes.csv +42367 -0
- data/bin/free_zipcode_data +9 -0
- data/counties_states_zipcodes.sql +46899 -0
- data/country_lookup_table.yml +989 -0
- data/free_zipcode_data.gemspec +43 -0
- data/lib/etl/common.rb +24 -0
- data/lib/etl/csv_source.rb +26 -0
- data/lib/etl/free_zipcode_data_job.rb +39 -0
- data/lib/free_zipcode_data/country_table.rb +46 -0
- data/lib/free_zipcode_data/county_table.rb +50 -0
- data/lib/free_zipcode_data/data_source.rb +88 -0
- data/lib/free_zipcode_data/db_table.rb +58 -0
- data/lib/free_zipcode_data/logger.rb +57 -0
- data/lib/free_zipcode_data/options.rb +21 -0
- data/lib/free_zipcode_data/runner.rb +172 -0
- data/lib/free_zipcode_data/sqlite_ram.rb +39 -0
- data/lib/free_zipcode_data/state_table.rb +51 -0
- data/lib/free_zipcode_data/version.rb +5 -0
- data/lib/free_zipcode_data/zipcode_table.rb +57 -0
- data/lib/free_zipcode_data.rb +43 -0
- data/lib/tasks/version.rake +181 -0
- data/spec/spec_helper.rb +35 -0
- metadata +270 -0
@@ -0,0 +1,43 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
# coding: utf-8
|
3
|
+
|
4
|
+
lib = File.expand_path('../lib', __FILE__)
|
5
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
6
|
+
require 'free_zipcode_data/version'
|
7
|
+
|
8
|
+
# rubocop:disable Metrics/BlockLength
|
9
|
+
Gem::Specification.new do |spec|
|
10
|
+
spec.name = 'free_zipcode_data'
|
11
|
+
spec.version = FreeZipcodeData::VERSION
|
12
|
+
spec.authors = ['Chris Blackburn', 'Chris McKnight']
|
13
|
+
spec.email = ['87a1779b@opayq.com', 'fixme@mcknight.bogus']
|
14
|
+
spec.summary = 'Free US and world-wide postal codes in SQLite and CSV format'
|
15
|
+
spec.description = <<~STRING
|
16
|
+
Free US and world-wide postal codes in SQLite and CSV format.
|
17
|
+
Automated zipcode/postal code aggregation and processing for any needs.
|
18
|
+
STRING
|
19
|
+
spec.homepage = 'https://github.com/midwire/free_zipcode_data'
|
20
|
+
spec.license = 'MIT'
|
21
|
+
|
22
|
+
spec.required_ruby_version = '>= 2.3.0'
|
23
|
+
spec.files = `git ls-files -z`.split("\x0")
|
24
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
25
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
26
|
+
spec.require_paths = ['lib']
|
27
|
+
|
28
|
+
spec.add_development_dependency 'bundler', '~> 1.16'
|
29
|
+
spec.add_development_dependency 'pry-nav', '~> 0.2'
|
30
|
+
spec.add_development_dependency 'rake', '~> 12.0'
|
31
|
+
spec.add_development_dependency 'rspec', '~> 3.7'
|
32
|
+
spec.add_development_dependency 'rubocop', '~> 0.55'
|
33
|
+
spec.add_development_dependency 'ruby-prof', '~> 0.17'
|
34
|
+
spec.add_development_dependency 'simplecov', '~> 0.16'
|
35
|
+
|
36
|
+
spec.add_runtime_dependency 'colored', '~> 1.2'
|
37
|
+
spec.add_runtime_dependency 'kiba', '~> 2.0'
|
38
|
+
spec.add_runtime_dependency 'ruby-progressbar', '~> 1.9'
|
39
|
+
spec.add_runtime_dependency 'rubyzip', '~> 1.2'
|
40
|
+
spec.add_runtime_dependency 'sqlite3', '~> 1.3'
|
41
|
+
spec.add_runtime_dependency 'trollop', '~> 2.1'
|
42
|
+
end
|
43
|
+
# rubocop:enable Metrics/BlockLength
|
data/lib/etl/common.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'csv_source'
|
4
|
+
require_relative '../free_zipcode_data/country_table'
|
5
|
+
require_relative '../free_zipcode_data/state_table'
|
6
|
+
require_relative '../free_zipcode_data/county_table'
|
7
|
+
require_relative '../free_zipcode_data/zipcode_table'
|
8
|
+
|
9
|
+
def show_me
|
10
|
+
transform do |row|
|
11
|
+
ap row
|
12
|
+
row
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def limit(count)
|
17
|
+
count = Integer(count || -1)
|
18
|
+
return if count == -1
|
19
|
+
transform do |row|
|
20
|
+
@counter ||= 0
|
21
|
+
@counter += 1
|
22
|
+
@counter > count ? nil : row
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'csv'
|
4
|
+
|
5
|
+
class CsvSource
|
6
|
+
attr_reader :filename, :delimeter, :quote_char, :headers
|
7
|
+
|
8
|
+
def initialize(filename:, headers: true, delimeter: "\t", quote_char: '"')
|
9
|
+
@filename = filename
|
10
|
+
@headers = headers
|
11
|
+
@delimeter = delimeter
|
12
|
+
@quote_char = quote_char
|
13
|
+
end
|
14
|
+
|
15
|
+
def each
|
16
|
+
CSV.open(filename,
|
17
|
+
col_sep: delimeter,
|
18
|
+
headers: headers,
|
19
|
+
header_converters: :symbol,
|
20
|
+
quote_char: quote_char) do |csv|
|
21
|
+
csv.each do |row|
|
22
|
+
yield(row.to_hash)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'common'
|
4
|
+
|
5
|
+
module ETL
|
6
|
+
module FreeZipcodeDataJob
|
7
|
+
module_function
|
8
|
+
|
9
|
+
def setup(country_file, database, logger, options)
|
10
|
+
Kiba.parse do
|
11
|
+
pre_process do
|
12
|
+
logger.info("Processing '#{country_file}' data, please be patient...")
|
13
|
+
end
|
14
|
+
|
15
|
+
source CsvSource, filename: country_file, quote_char: '"', delimeter: ','
|
16
|
+
|
17
|
+
destination FreeZipcodeData::CountryTable,
|
18
|
+
database: database,
|
19
|
+
tablename: options[:country_tablename]
|
20
|
+
|
21
|
+
destination FreeZipcodeData::StateTable,
|
22
|
+
database: database,
|
23
|
+
tablename: options[:state_tablename]
|
24
|
+
|
25
|
+
destination FreeZipcodeData::CountyTable,
|
26
|
+
database: database,
|
27
|
+
tablename: options[:county_tablename]
|
28
|
+
|
29
|
+
destination FreeZipcodeData::ZipcodeTable,
|
30
|
+
database: database,
|
31
|
+
tablename: options[:zipcode_tablename]
|
32
|
+
|
33
|
+
post_process do
|
34
|
+
logger.verbose('Finished generating table data...')
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'db_table'
|
4
|
+
|
5
|
+
module FreeZipcodeData
|
6
|
+
class CountryTable < DbTable
|
7
|
+
def build
|
8
|
+
schema = <<-SQL
|
9
|
+
create table #{tablename} (
|
10
|
+
id integer not null primary key,
|
11
|
+
alpha2 varchar(2) not null,
|
12
|
+
alpha3 varchar(3),
|
13
|
+
iso varchar(3),
|
14
|
+
name varchar(255) not null
|
15
|
+
)
|
16
|
+
SQL
|
17
|
+
database.execute_batch(schema)
|
18
|
+
|
19
|
+
ndx = <<-SQL
|
20
|
+
CREATE UNIQUE INDEX "main"."unique_country_alpha2"
|
21
|
+
ON #{tablename} (alpha2 COLLATE NOCASE ASC);
|
22
|
+
SQL
|
23
|
+
database.execute_batch(ndx)
|
24
|
+
end
|
25
|
+
|
26
|
+
def write(row)
|
27
|
+
country_hash = country_lookup_table[row[:country]]
|
28
|
+
|
29
|
+
sql = <<-SQL
|
30
|
+
INSERT INTO countries (alpha2, alpha3, iso, name)
|
31
|
+
VALUES ('#{row[:country]}',
|
32
|
+
'#{country_hash[:alpha3]}',
|
33
|
+
'#{country_hash[:iso]}',
|
34
|
+
'#{country_hash[:name]}')
|
35
|
+
SQL
|
36
|
+
|
37
|
+
begin
|
38
|
+
database.execute(sql)
|
39
|
+
rescue SQLite3::ConstraintException
|
40
|
+
# Swallow duplicates
|
41
|
+
end
|
42
|
+
|
43
|
+
update_progress
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'db_table'
|
4
|
+
|
5
|
+
module FreeZipcodeData
|
6
|
+
class CountyTable < DbTable
|
7
|
+
def build
|
8
|
+
schema = <<-SQL
|
9
|
+
create table #{tablename} (
|
10
|
+
id integer not null primary key,
|
11
|
+
state_id integer,
|
12
|
+
abbr varchar(255),
|
13
|
+
name varchar(255),
|
14
|
+
county_seat varchar(255)
|
15
|
+
)
|
16
|
+
SQL
|
17
|
+
database.execute_batch(schema)
|
18
|
+
|
19
|
+
ndx = <<-SQL
|
20
|
+
CREATE UNIQUE INDEX "main"."unique_county"
|
21
|
+
ON #{tablename} (state_id, abbr, name COLLATE NOCASE ASC);
|
22
|
+
SQL
|
23
|
+
database.execute_batch(ndx)
|
24
|
+
end
|
25
|
+
|
26
|
+
def write(row)
|
27
|
+
return nil unless row[:county]
|
28
|
+
state_id = get_state_id(row[:short_state], row[:state])
|
29
|
+
return nil unless state_id
|
30
|
+
|
31
|
+
sql = <<-SQL
|
32
|
+
INSERT INTO counties (state_id, abbr, name)
|
33
|
+
VALUES ('#{state_id}',
|
34
|
+
'#{row[:short_county]}',
|
35
|
+
'#{escape_single_quotes(row[:county])}'
|
36
|
+
)
|
37
|
+
SQL
|
38
|
+
|
39
|
+
begin
|
40
|
+
database.execute(sql)
|
41
|
+
rescue SQLite3::ConstraintException
|
42
|
+
# swallow duplicates
|
43
|
+
rescue StandardError => err
|
44
|
+
raise "Please file an issue at #{ISSUE_URL}: [#{err}] -> SQL: [#{sql}]"
|
45
|
+
end
|
46
|
+
|
47
|
+
update_progress
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,88 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'csv'
|
4
|
+
require 'open-uri'
|
5
|
+
require 'zip'
|
6
|
+
|
7
|
+
module FreeZipcodeData
|
8
|
+
class DataSource
|
9
|
+
BASE_URL = 'http://download.geonames.org/export/zip'
|
10
|
+
|
11
|
+
attr_reader :country, :options
|
12
|
+
|
13
|
+
def initialize(country = nil)
|
14
|
+
@country = country
|
15
|
+
@options = Options.instance.hash
|
16
|
+
@logger = Logger.instance
|
17
|
+
end
|
18
|
+
|
19
|
+
def download
|
20
|
+
return nil if !options.clobber && File.exist?(zipfile_path)
|
21
|
+
FileUtils.mkdir_p(options.work_dir)
|
22
|
+
@logger.info("Downloading: #{zipfile} from GeoNames...")
|
23
|
+
open(zipfile_path, 'wb') do |file|
|
24
|
+
file << open("#{BASE_URL}/#{zipfile}").read
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def datafile
|
29
|
+
@datafile ||= begin
|
30
|
+
datafile_with_headers
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
private
|
35
|
+
|
36
|
+
def zipfile
|
37
|
+
@zipfile ||= begin
|
38
|
+
filename = country.nil? ? 'allCountries' : country.upcase
|
39
|
+
filename += '.zip' unless filename =~ /\.zip$/
|
40
|
+
filename
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def zipfile_path
|
45
|
+
@zipfile_path ||= File.join(options.work_dir, zipfile)
|
46
|
+
end
|
47
|
+
|
48
|
+
def unzipped_datafile
|
49
|
+
@unzipped_datafile ||= begin
|
50
|
+
country_file = nil
|
51
|
+
Zip::File.open(zipfile_path) do |zip|
|
52
|
+
zip.each do |entry|
|
53
|
+
next if entry.name =~ /readme/i
|
54
|
+
country_file = File.join(options.work_dir, entry.name)
|
55
|
+
if File.exist?(country_file)
|
56
|
+
if options[:clobber]
|
57
|
+
Zip.on_exists_proc = true
|
58
|
+
Logger.instance.verbose("Extracting: #{zipfile}...")
|
59
|
+
entry.extract(country_file)
|
60
|
+
end
|
61
|
+
else
|
62
|
+
Logger.instance.verbose("Extracting: #{zipfile}...")
|
63
|
+
entry.extract(country_file)
|
64
|
+
end
|
65
|
+
break
|
66
|
+
end
|
67
|
+
end
|
68
|
+
country_file
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def datafile_with_headers
|
73
|
+
filename = "#{unzipped_datafile}.csv"
|
74
|
+
if File.exist?(filename) && !options[:clobber]
|
75
|
+
@logger.verbose("File: #{filename} already exists, skipping...")
|
76
|
+
return filename
|
77
|
+
end
|
78
|
+
@logger.verbose("Preparing: #{filename} for processing...")
|
79
|
+
CSV.open(filename, 'w') do |outfile|
|
80
|
+
outfile << %w[COUNTRY POSTAL_CODE CITY STATE SHORT_STATE COUNTY SHORT_COUNTY COMMUNITY SHORT_COMMUNITY LATITUDE LONGITUDE ACCURACY]
|
81
|
+
CSV.foreach(unzipped_datafile, headers: false, col_sep: "\t", quote_char: '|') do |row|
|
82
|
+
outfile << row
|
83
|
+
end
|
84
|
+
end
|
85
|
+
filename
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'yaml'
|
4
|
+
require 'ruby-progressbar'
|
5
|
+
|
6
|
+
module FreeZipcodeData
|
7
|
+
class DbTable
|
8
|
+
ISSUE_URL = 'https://github.com/midwire/free_zipcode_data/issues/new'
|
9
|
+
|
10
|
+
attr_reader :database, :tablename
|
11
|
+
@@progressbar = nil
|
12
|
+
|
13
|
+
def initialize(database:, tablename:)
|
14
|
+
@database = database
|
15
|
+
@tablename = tablename
|
16
|
+
lc = select_first('SELECT value FROM meta where name = "line_count"')
|
17
|
+
@@progressbar = ProgressBar.create(total: lc.to_i * 4, format: '%t: |%B| %e')
|
18
|
+
end
|
19
|
+
|
20
|
+
def update_progress
|
21
|
+
@@progressbar.increment
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def country_lookup_table
|
27
|
+
@country_lookup_table ||= YAML.load_file('country_lookup_table.yml')
|
28
|
+
end
|
29
|
+
|
30
|
+
def select_first(sql)
|
31
|
+
rows = database.execute(sql)
|
32
|
+
rows[0].nil? ? nil : rows[0].first
|
33
|
+
rescue SQLite3::SQLException => err
|
34
|
+
raise "Please file an issue at #{ISSUE_URL}: [#{err}] -> SQL: [#{sql}]"
|
35
|
+
end
|
36
|
+
|
37
|
+
def get_country_id(country)
|
38
|
+
sql = "SELECT id FROM countries WHERE alpha2 = '#{country}'"
|
39
|
+
select_first(sql)
|
40
|
+
end
|
41
|
+
|
42
|
+
def get_state_id(state_abbr, state_name)
|
43
|
+
sql = "SELECT id FROM states
|
44
|
+
WHERE abbr = '#{state_abbr}' OR name = '#{escape_single_quotes(state_name)}'"
|
45
|
+
select_first(sql)
|
46
|
+
end
|
47
|
+
|
48
|
+
def get_county_id(county)
|
49
|
+
return nil if county.nil?
|
50
|
+
sql = "SELECT id FROM counties WHERE name = '#{escape_single_quotes(county)}'"
|
51
|
+
select_first(sql)
|
52
|
+
end
|
53
|
+
|
54
|
+
def escape_single_quotes(string)
|
55
|
+
string&.gsub(/[']/, '\'\'') || ''
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'singleton'
|
4
|
+
require 'logger'
|
5
|
+
|
6
|
+
module FreeZipcodeData
|
7
|
+
class Logger
|
8
|
+
include Singleton
|
9
|
+
|
10
|
+
attr_accessor :log_provider
|
11
|
+
|
12
|
+
def initialize(provider = default_logger)
|
13
|
+
@log_provider = provider
|
14
|
+
end
|
15
|
+
|
16
|
+
def log_exception(e, data = {})
|
17
|
+
msg = "EXCEPTION : #{e.class.name} : #{e.message}"
|
18
|
+
msg += "\n data : #{data.inspect}" if data && !data.empty?
|
19
|
+
msg += "\n #{e.backtrace[0, 6].join("\n ")}"
|
20
|
+
log_provider.error(msg)
|
21
|
+
end
|
22
|
+
|
23
|
+
def method_missing(meth, *args, &block)
|
24
|
+
if log_provider.respond_to?(meth)
|
25
|
+
log_provider.send(meth, *args, &block)
|
26
|
+
else
|
27
|
+
super
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def respond_to?(meth, include_private = false)
|
32
|
+
if log_provider.respond_to?(meth)
|
33
|
+
true
|
34
|
+
else
|
35
|
+
super
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def verbose(msg)
|
40
|
+
info(msg) if options&.verbose
|
41
|
+
end
|
42
|
+
|
43
|
+
private
|
44
|
+
|
45
|
+
def default_logger
|
46
|
+
logger = ::Logger.new(STDOUT)
|
47
|
+
logger.formatter = proc do |_, _, _, msg|
|
48
|
+
"#{msg}\n"
|
49
|
+
end
|
50
|
+
logger
|
51
|
+
end
|
52
|
+
|
53
|
+
def options
|
54
|
+
Options.instance.hash
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'singleton'
|
4
|
+
|
5
|
+
module FreeZipcodeData
|
6
|
+
class Options
|
7
|
+
include Singleton
|
8
|
+
|
9
|
+
def initialize_hash(hash)
|
10
|
+
@@_options = hash
|
11
|
+
end
|
12
|
+
|
13
|
+
def [](key)
|
14
|
+
@@_options[key]
|
15
|
+
end
|
16
|
+
|
17
|
+
def hash
|
18
|
+
@@_options
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|