free_zipcode_data 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +7 -0
- data/.rspec +1 -0
- data/.rubocop.yml +88 -0
- data/CHANGELOG +42 -0
- data/CODE_OF_CONDUCT.md +42 -0
- data/CONTRIBUTING.md +25 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +82 -0
- data/ISSUE_TEMPLATE.md +9 -0
- data/LICENSE.md +21 -0
- data/PULL_REQUEST_TEMPLATE.md +5 -0
- data/README.md +115 -0
- data/Rakefile +10 -0
- data/SUPPORT.md +6 -0
- data/all_us_counties.csv +3143 -0
- data/all_us_states.csv +52 -0
- data/all_us_zipcodes.csv +42367 -0
- data/bin/free_zipcode_data +9 -0
- data/counties_states_zipcodes.sql +46899 -0
- data/country_lookup_table.yml +989 -0
- data/free_zipcode_data.gemspec +43 -0
- data/lib/etl/common.rb +24 -0
- data/lib/etl/csv_source.rb +26 -0
- data/lib/etl/free_zipcode_data_job.rb +39 -0
- data/lib/free_zipcode_data/country_table.rb +46 -0
- data/lib/free_zipcode_data/county_table.rb +50 -0
- data/lib/free_zipcode_data/data_source.rb +88 -0
- data/lib/free_zipcode_data/db_table.rb +58 -0
- data/lib/free_zipcode_data/logger.rb +57 -0
- data/lib/free_zipcode_data/options.rb +21 -0
- data/lib/free_zipcode_data/runner.rb +172 -0
- data/lib/free_zipcode_data/sqlite_ram.rb +39 -0
- data/lib/free_zipcode_data/state_table.rb +51 -0
- data/lib/free_zipcode_data/version.rb +5 -0
- data/lib/free_zipcode_data/zipcode_table.rb +57 -0
- data/lib/free_zipcode_data.rb +43 -0
- data/lib/tasks/version.rake +181 -0
- data/spec/spec_helper.rb +35 -0
- metadata +270 -0
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
# coding: utf-8
|
|
3
|
+
|
|
4
|
+
lib = File.expand_path('../lib', __FILE__)
|
|
5
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
6
|
+
require 'free_zipcode_data/version'
|
|
7
|
+
|
|
8
|
+
# rubocop:disable Metrics/BlockLength
|
|
9
|
+
Gem::Specification.new do |spec|
|
|
10
|
+
spec.name = 'free_zipcode_data'
|
|
11
|
+
spec.version = FreeZipcodeData::VERSION
|
|
12
|
+
spec.authors = ['Chris Blackburn', 'Chris McKnight']
|
|
13
|
+
spec.email = ['87a1779b@opayq.com', 'fixme@mcknight.bogus']
|
|
14
|
+
spec.summary = 'Free US and world-wide postal codes in SQLite and CSV format'
|
|
15
|
+
spec.description = <<~STRING
|
|
16
|
+
Free US and world-wide postal codes in SQLite and CSV format.
|
|
17
|
+
Automated zipcode/postal code aggregation and processing for any needs.
|
|
18
|
+
STRING
|
|
19
|
+
spec.homepage = 'https://github.com/midwire/free_zipcode_data'
|
|
20
|
+
spec.license = 'MIT'
|
|
21
|
+
|
|
22
|
+
spec.required_ruby_version = '>= 2.3.0'
|
|
23
|
+
spec.files = `git ls-files -z`.split("\x0")
|
|
24
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
|
25
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
|
26
|
+
spec.require_paths = ['lib']
|
|
27
|
+
|
|
28
|
+
spec.add_development_dependency 'bundler', '~> 1.16'
|
|
29
|
+
spec.add_development_dependency 'pry-nav', '~> 0.2'
|
|
30
|
+
spec.add_development_dependency 'rake', '~> 12.0'
|
|
31
|
+
spec.add_development_dependency 'rspec', '~> 3.7'
|
|
32
|
+
spec.add_development_dependency 'rubocop', '~> 0.55'
|
|
33
|
+
spec.add_development_dependency 'ruby-prof', '~> 0.17'
|
|
34
|
+
spec.add_development_dependency 'simplecov', '~> 0.16'
|
|
35
|
+
|
|
36
|
+
spec.add_runtime_dependency 'colored', '~> 1.2'
|
|
37
|
+
spec.add_runtime_dependency 'kiba', '~> 2.0'
|
|
38
|
+
spec.add_runtime_dependency 'ruby-progressbar', '~> 1.9'
|
|
39
|
+
spec.add_runtime_dependency 'rubyzip', '~> 1.2'
|
|
40
|
+
spec.add_runtime_dependency 'sqlite3', '~> 1.3'
|
|
41
|
+
spec.add_runtime_dependency 'trollop', '~> 2.1'
|
|
42
|
+
end
|
|
43
|
+
# rubocop:enable Metrics/BlockLength
|
data/lib/etl/common.rb
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'csv_source'
|
|
4
|
+
require_relative '../free_zipcode_data/country_table'
|
|
5
|
+
require_relative '../free_zipcode_data/state_table'
|
|
6
|
+
require_relative '../free_zipcode_data/county_table'
|
|
7
|
+
require_relative '../free_zipcode_data/zipcode_table'
|
|
8
|
+
|
|
9
|
+
def show_me
|
|
10
|
+
transform do |row|
|
|
11
|
+
ap row
|
|
12
|
+
row
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def limit(count)
|
|
17
|
+
count = Integer(count || -1)
|
|
18
|
+
return if count == -1
|
|
19
|
+
transform do |row|
|
|
20
|
+
@counter ||= 0
|
|
21
|
+
@counter += 1
|
|
22
|
+
@counter > count ? nil : row
|
|
23
|
+
end
|
|
24
|
+
end
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'csv'
|
|
4
|
+
|
|
5
|
+
class CsvSource
|
|
6
|
+
attr_reader :filename, :delimeter, :quote_char, :headers
|
|
7
|
+
|
|
8
|
+
def initialize(filename:, headers: true, delimeter: "\t", quote_char: '"')
|
|
9
|
+
@filename = filename
|
|
10
|
+
@headers = headers
|
|
11
|
+
@delimeter = delimeter
|
|
12
|
+
@quote_char = quote_char
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def each
|
|
16
|
+
CSV.open(filename,
|
|
17
|
+
col_sep: delimeter,
|
|
18
|
+
headers: headers,
|
|
19
|
+
header_converters: :symbol,
|
|
20
|
+
quote_char: quote_char) do |csv|
|
|
21
|
+
csv.each do |row|
|
|
22
|
+
yield(row.to_hash)
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'common'
|
|
4
|
+
|
|
5
|
+
module ETL
|
|
6
|
+
module FreeZipcodeDataJob
|
|
7
|
+
module_function
|
|
8
|
+
|
|
9
|
+
def setup(country_file, database, logger, options)
|
|
10
|
+
Kiba.parse do
|
|
11
|
+
pre_process do
|
|
12
|
+
logger.info("Processing '#{country_file}' data, please be patient...")
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
source CsvSource, filename: country_file, quote_char: '"', delimeter: ','
|
|
16
|
+
|
|
17
|
+
destination FreeZipcodeData::CountryTable,
|
|
18
|
+
database: database,
|
|
19
|
+
tablename: options[:country_tablename]
|
|
20
|
+
|
|
21
|
+
destination FreeZipcodeData::StateTable,
|
|
22
|
+
database: database,
|
|
23
|
+
tablename: options[:state_tablename]
|
|
24
|
+
|
|
25
|
+
destination FreeZipcodeData::CountyTable,
|
|
26
|
+
database: database,
|
|
27
|
+
tablename: options[:county_tablename]
|
|
28
|
+
|
|
29
|
+
destination FreeZipcodeData::ZipcodeTable,
|
|
30
|
+
database: database,
|
|
31
|
+
tablename: options[:zipcode_tablename]
|
|
32
|
+
|
|
33
|
+
post_process do
|
|
34
|
+
logger.verbose('Finished generating table data...')
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'db_table'
|
|
4
|
+
|
|
5
|
+
module FreeZipcodeData
|
|
6
|
+
class CountryTable < DbTable
|
|
7
|
+
def build
|
|
8
|
+
schema = <<-SQL
|
|
9
|
+
create table #{tablename} (
|
|
10
|
+
id integer not null primary key,
|
|
11
|
+
alpha2 varchar(2) not null,
|
|
12
|
+
alpha3 varchar(3),
|
|
13
|
+
iso varchar(3),
|
|
14
|
+
name varchar(255) not null
|
|
15
|
+
)
|
|
16
|
+
SQL
|
|
17
|
+
database.execute_batch(schema)
|
|
18
|
+
|
|
19
|
+
ndx = <<-SQL
|
|
20
|
+
CREATE UNIQUE INDEX "main"."unique_country_alpha2"
|
|
21
|
+
ON #{tablename} (alpha2 COLLATE NOCASE ASC);
|
|
22
|
+
SQL
|
|
23
|
+
database.execute_batch(ndx)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def write(row)
|
|
27
|
+
country_hash = country_lookup_table[row[:country]]
|
|
28
|
+
|
|
29
|
+
sql = <<-SQL
|
|
30
|
+
INSERT INTO countries (alpha2, alpha3, iso, name)
|
|
31
|
+
VALUES ('#{row[:country]}',
|
|
32
|
+
'#{country_hash[:alpha3]}',
|
|
33
|
+
'#{country_hash[:iso]}',
|
|
34
|
+
'#{country_hash[:name]}')
|
|
35
|
+
SQL
|
|
36
|
+
|
|
37
|
+
begin
|
|
38
|
+
database.execute(sql)
|
|
39
|
+
rescue SQLite3::ConstraintException
|
|
40
|
+
# Swallow duplicates
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
update_progress
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'db_table'
|
|
4
|
+
|
|
5
|
+
module FreeZipcodeData
|
|
6
|
+
class CountyTable < DbTable
|
|
7
|
+
def build
|
|
8
|
+
schema = <<-SQL
|
|
9
|
+
create table #{tablename} (
|
|
10
|
+
id integer not null primary key,
|
|
11
|
+
state_id integer,
|
|
12
|
+
abbr varchar(255),
|
|
13
|
+
name varchar(255),
|
|
14
|
+
county_seat varchar(255)
|
|
15
|
+
)
|
|
16
|
+
SQL
|
|
17
|
+
database.execute_batch(schema)
|
|
18
|
+
|
|
19
|
+
ndx = <<-SQL
|
|
20
|
+
CREATE UNIQUE INDEX "main"."unique_county"
|
|
21
|
+
ON #{tablename} (state_id, abbr, name COLLATE NOCASE ASC);
|
|
22
|
+
SQL
|
|
23
|
+
database.execute_batch(ndx)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def write(row)
|
|
27
|
+
return nil unless row[:county]
|
|
28
|
+
state_id = get_state_id(row[:short_state], row[:state])
|
|
29
|
+
return nil unless state_id
|
|
30
|
+
|
|
31
|
+
sql = <<-SQL
|
|
32
|
+
INSERT INTO counties (state_id, abbr, name)
|
|
33
|
+
VALUES ('#{state_id}',
|
|
34
|
+
'#{row[:short_county]}',
|
|
35
|
+
'#{escape_single_quotes(row[:county])}'
|
|
36
|
+
)
|
|
37
|
+
SQL
|
|
38
|
+
|
|
39
|
+
begin
|
|
40
|
+
database.execute(sql)
|
|
41
|
+
rescue SQLite3::ConstraintException
|
|
42
|
+
# swallow duplicates
|
|
43
|
+
rescue StandardError => err
|
|
44
|
+
raise "Please file an issue at #{ISSUE_URL}: [#{err}] -> SQL: [#{sql}]"
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
update_progress
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'csv'
|
|
4
|
+
require 'open-uri'
|
|
5
|
+
require 'zip'
|
|
6
|
+
|
|
7
|
+
module FreeZipcodeData
|
|
8
|
+
class DataSource
|
|
9
|
+
BASE_URL = 'http://download.geonames.org/export/zip'
|
|
10
|
+
|
|
11
|
+
attr_reader :country, :options
|
|
12
|
+
|
|
13
|
+
def initialize(country = nil)
|
|
14
|
+
@country = country
|
|
15
|
+
@options = Options.instance.hash
|
|
16
|
+
@logger = Logger.instance
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def download
|
|
20
|
+
return nil if !options.clobber && File.exist?(zipfile_path)
|
|
21
|
+
FileUtils.mkdir_p(options.work_dir)
|
|
22
|
+
@logger.info("Downloading: #{zipfile} from GeoNames...")
|
|
23
|
+
open(zipfile_path, 'wb') do |file|
|
|
24
|
+
file << open("#{BASE_URL}/#{zipfile}").read
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def datafile
|
|
29
|
+
@datafile ||= begin
|
|
30
|
+
datafile_with_headers
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
private
|
|
35
|
+
|
|
36
|
+
def zipfile
|
|
37
|
+
@zipfile ||= begin
|
|
38
|
+
filename = country.nil? ? 'allCountries' : country.upcase
|
|
39
|
+
filename += '.zip' unless filename =~ /\.zip$/
|
|
40
|
+
filename
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def zipfile_path
|
|
45
|
+
@zipfile_path ||= File.join(options.work_dir, zipfile)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def unzipped_datafile
|
|
49
|
+
@unzipped_datafile ||= begin
|
|
50
|
+
country_file = nil
|
|
51
|
+
Zip::File.open(zipfile_path) do |zip|
|
|
52
|
+
zip.each do |entry|
|
|
53
|
+
next if entry.name =~ /readme/i
|
|
54
|
+
country_file = File.join(options.work_dir, entry.name)
|
|
55
|
+
if File.exist?(country_file)
|
|
56
|
+
if options[:clobber]
|
|
57
|
+
Zip.on_exists_proc = true
|
|
58
|
+
Logger.instance.verbose("Extracting: #{zipfile}...")
|
|
59
|
+
entry.extract(country_file)
|
|
60
|
+
end
|
|
61
|
+
else
|
|
62
|
+
Logger.instance.verbose("Extracting: #{zipfile}...")
|
|
63
|
+
entry.extract(country_file)
|
|
64
|
+
end
|
|
65
|
+
break
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
country_file
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def datafile_with_headers
|
|
73
|
+
filename = "#{unzipped_datafile}.csv"
|
|
74
|
+
if File.exist?(filename) && !options[:clobber]
|
|
75
|
+
@logger.verbose("File: #{filename} already exists, skipping...")
|
|
76
|
+
return filename
|
|
77
|
+
end
|
|
78
|
+
@logger.verbose("Preparing: #{filename} for processing...")
|
|
79
|
+
CSV.open(filename, 'w') do |outfile|
|
|
80
|
+
outfile << %w[COUNTRY POSTAL_CODE CITY STATE SHORT_STATE COUNTY SHORT_COUNTY COMMUNITY SHORT_COMMUNITY LATITUDE LONGITUDE ACCURACY]
|
|
81
|
+
CSV.foreach(unzipped_datafile, headers: false, col_sep: "\t", quote_char: '|') do |row|
|
|
82
|
+
outfile << row
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
filename
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'yaml'
|
|
4
|
+
require 'ruby-progressbar'
|
|
5
|
+
|
|
6
|
+
module FreeZipcodeData
|
|
7
|
+
class DbTable
|
|
8
|
+
ISSUE_URL = 'https://github.com/midwire/free_zipcode_data/issues/new'
|
|
9
|
+
|
|
10
|
+
attr_reader :database, :tablename
|
|
11
|
+
@@progressbar = nil
|
|
12
|
+
|
|
13
|
+
def initialize(database:, tablename:)
|
|
14
|
+
@database = database
|
|
15
|
+
@tablename = tablename
|
|
16
|
+
lc = select_first('SELECT value FROM meta where name = "line_count"')
|
|
17
|
+
@@progressbar = ProgressBar.create(total: lc.to_i * 4, format: '%t: |%B| %e')
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def update_progress
|
|
21
|
+
@@progressbar.increment
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
private
|
|
25
|
+
|
|
26
|
+
def country_lookup_table
|
|
27
|
+
@country_lookup_table ||= YAML.load_file('country_lookup_table.yml')
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def select_first(sql)
|
|
31
|
+
rows = database.execute(sql)
|
|
32
|
+
rows[0].nil? ? nil : rows[0].first
|
|
33
|
+
rescue SQLite3::SQLException => err
|
|
34
|
+
raise "Please file an issue at #{ISSUE_URL}: [#{err}] -> SQL: [#{sql}]"
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def get_country_id(country)
|
|
38
|
+
sql = "SELECT id FROM countries WHERE alpha2 = '#{country}'"
|
|
39
|
+
select_first(sql)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def get_state_id(state_abbr, state_name)
|
|
43
|
+
sql = "SELECT id FROM states
|
|
44
|
+
WHERE abbr = '#{state_abbr}' OR name = '#{escape_single_quotes(state_name)}'"
|
|
45
|
+
select_first(sql)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def get_county_id(county)
|
|
49
|
+
return nil if county.nil?
|
|
50
|
+
sql = "SELECT id FROM counties WHERE name = '#{escape_single_quotes(county)}'"
|
|
51
|
+
select_first(sql)
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def escape_single_quotes(string)
|
|
55
|
+
string&.gsub(/[']/, '\'\'') || ''
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'singleton'
|
|
4
|
+
require 'logger'
|
|
5
|
+
|
|
6
|
+
module FreeZipcodeData
|
|
7
|
+
class Logger
|
|
8
|
+
include Singleton
|
|
9
|
+
|
|
10
|
+
attr_accessor :log_provider
|
|
11
|
+
|
|
12
|
+
def initialize(provider = default_logger)
|
|
13
|
+
@log_provider = provider
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def log_exception(e, data = {})
|
|
17
|
+
msg = "EXCEPTION : #{e.class.name} : #{e.message}"
|
|
18
|
+
msg += "\n data : #{data.inspect}" if data && !data.empty?
|
|
19
|
+
msg += "\n #{e.backtrace[0, 6].join("\n ")}"
|
|
20
|
+
log_provider.error(msg)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def method_missing(meth, *args, &block)
|
|
24
|
+
if log_provider.respond_to?(meth)
|
|
25
|
+
log_provider.send(meth, *args, &block)
|
|
26
|
+
else
|
|
27
|
+
super
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def respond_to?(meth, include_private = false)
|
|
32
|
+
if log_provider.respond_to?(meth)
|
|
33
|
+
true
|
|
34
|
+
else
|
|
35
|
+
super
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def verbose(msg)
|
|
40
|
+
info(msg) if options&.verbose
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
private
|
|
44
|
+
|
|
45
|
+
def default_logger
|
|
46
|
+
logger = ::Logger.new(STDOUT)
|
|
47
|
+
logger.formatter = proc do |_, _, _, msg|
|
|
48
|
+
"#{msg}\n"
|
|
49
|
+
end
|
|
50
|
+
logger
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def options
|
|
54
|
+
Options.instance.hash
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'singleton'
|
|
4
|
+
|
|
5
|
+
module FreeZipcodeData
|
|
6
|
+
class Options
|
|
7
|
+
include Singleton
|
|
8
|
+
|
|
9
|
+
def initialize_hash(hash)
|
|
10
|
+
@@_options = hash
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def [](key)
|
|
14
|
+
@@_options[key]
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def hash
|
|
18
|
+
@@_options
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|