free_zipcode_data 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+ # coding: utf-8
3
+
4
+ lib = File.expand_path('../lib', __FILE__)
5
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
6
+ require 'free_zipcode_data/version'
7
+
8
+ # rubocop:disable Metrics/BlockLength
9
+ Gem::Specification.new do |spec|
10
+ spec.name = 'free_zipcode_data'
11
+ spec.version = FreeZipcodeData::VERSION
12
+ spec.authors = ['Chris Blackburn', 'Chris McKnight']
13
+ spec.email = ['87a1779b@opayq.com', 'fixme@mcknight.bogus']
14
+ spec.summary = 'Free US and world-wide postal codes in SQLite and CSV format'
15
+ spec.description = <<~STRING
16
+ Free US and world-wide postal codes in SQLite and CSV format.
17
+ Automated zipcode/postal code aggregation and processing for any needs.
18
+ STRING
19
+ spec.homepage = 'https://github.com/midwire/free_zipcode_data'
20
+ spec.license = 'MIT'
21
+
22
+ spec.required_ruby_version = '>= 2.3.0'
23
+ spec.files = `git ls-files -z`.split("\x0")
24
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
25
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
26
+ spec.require_paths = ['lib']
27
+
28
+ spec.add_development_dependency 'bundler', '~> 1.16'
29
+ spec.add_development_dependency 'pry-nav', '~> 0.2'
30
+ spec.add_development_dependency 'rake', '~> 12.0'
31
+ spec.add_development_dependency 'rspec', '~> 3.7'
32
+ spec.add_development_dependency 'rubocop', '~> 0.55'
33
+ spec.add_development_dependency 'ruby-prof', '~> 0.17'
34
+ spec.add_development_dependency 'simplecov', '~> 0.16'
35
+
36
+ spec.add_runtime_dependency 'colored', '~> 1.2'
37
+ spec.add_runtime_dependency 'kiba', '~> 2.0'
38
+ spec.add_runtime_dependency 'ruby-progressbar', '~> 1.9'
39
+ spec.add_runtime_dependency 'rubyzip', '~> 1.2'
40
+ spec.add_runtime_dependency 'sqlite3', '~> 1.3'
41
+ spec.add_runtime_dependency 'trollop', '~> 2.1'
42
+ end
43
+ # rubocop:enable Metrics/BlockLength
data/lib/etl/common.rb ADDED
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'csv_source'
4
+ require_relative '../free_zipcode_data/country_table'
5
+ require_relative '../free_zipcode_data/state_table'
6
+ require_relative '../free_zipcode_data/county_table'
7
+ require_relative '../free_zipcode_data/zipcode_table'
8
+
9
+ def show_me
10
+ transform do |row|
11
+ ap row
12
+ row
13
+ end
14
+ end
15
+
16
+ def limit(count)
17
+ count = Integer(count || -1)
18
+ return if count == -1
19
+ transform do |row|
20
+ @counter ||= 0
21
+ @counter += 1
22
+ @counter > count ? nil : row
23
+ end
24
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'csv'
4
+
5
+ class CsvSource
6
+ attr_reader :filename, :delimeter, :quote_char, :headers
7
+
8
+ def initialize(filename:, headers: true, delimeter: "\t", quote_char: '"')
9
+ @filename = filename
10
+ @headers = headers
11
+ @delimeter = delimeter
12
+ @quote_char = quote_char
13
+ end
14
+
15
+ def each
16
+ CSV.open(filename,
17
+ col_sep: delimeter,
18
+ headers: headers,
19
+ header_converters: :symbol,
20
+ quote_char: quote_char) do |csv|
21
+ csv.each do |row|
22
+ yield(row.to_hash)
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'common'
4
+
5
+ module ETL
6
+ module FreeZipcodeDataJob
7
+ module_function
8
+
9
+ def setup(country_file, database, logger, options)
10
+ Kiba.parse do
11
+ pre_process do
12
+ logger.info("Processing '#{country_file}' data, please be patient...")
13
+ end
14
+
15
+ source CsvSource, filename: country_file, quote_char: '"', delimeter: ','
16
+
17
+ destination FreeZipcodeData::CountryTable,
18
+ database: database,
19
+ tablename: options[:country_tablename]
20
+
21
+ destination FreeZipcodeData::StateTable,
22
+ database: database,
23
+ tablename: options[:state_tablename]
24
+
25
+ destination FreeZipcodeData::CountyTable,
26
+ database: database,
27
+ tablename: options[:county_tablename]
28
+
29
+ destination FreeZipcodeData::ZipcodeTable,
30
+ database: database,
31
+ tablename: options[:zipcode_tablename]
32
+
33
+ post_process do
34
+ logger.verbose('Finished generating table data...')
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'db_table'
4
+
5
+ module FreeZipcodeData
6
+ class CountryTable < DbTable
7
+ def build
8
+ schema = <<-SQL
9
+ create table #{tablename} (
10
+ id integer not null primary key,
11
+ alpha2 varchar(2) not null,
12
+ alpha3 varchar(3),
13
+ iso varchar(3),
14
+ name varchar(255) not null
15
+ )
16
+ SQL
17
+ database.execute_batch(schema)
18
+
19
+ ndx = <<-SQL
20
+ CREATE UNIQUE INDEX "main"."unique_country_alpha2"
21
+ ON #{tablename} (alpha2 COLLATE NOCASE ASC);
22
+ SQL
23
+ database.execute_batch(ndx)
24
+ end
25
+
26
+ def write(row)
27
+ country_hash = country_lookup_table[row[:country]]
28
+
29
+ sql = <<-SQL
30
+ INSERT INTO countries (alpha2, alpha3, iso, name)
31
+ VALUES ('#{row[:country]}',
32
+ '#{country_hash[:alpha3]}',
33
+ '#{country_hash[:iso]}',
34
+ '#{country_hash[:name]}')
35
+ SQL
36
+
37
+ begin
38
+ database.execute(sql)
39
+ rescue SQLite3::ConstraintException
40
+ # Swallow duplicates
41
+ end
42
+
43
+ update_progress
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'db_table'
4
+
5
+ module FreeZipcodeData
6
+ class CountyTable < DbTable
7
+ def build
8
+ schema = <<-SQL
9
+ create table #{tablename} (
10
+ id integer not null primary key,
11
+ state_id integer,
12
+ abbr varchar(255),
13
+ name varchar(255),
14
+ county_seat varchar(255)
15
+ )
16
+ SQL
17
+ database.execute_batch(schema)
18
+
19
+ ndx = <<-SQL
20
+ CREATE UNIQUE INDEX "main"."unique_county"
21
+ ON #{tablename} (state_id, abbr, name COLLATE NOCASE ASC);
22
+ SQL
23
+ database.execute_batch(ndx)
24
+ end
25
+
26
+ def write(row)
27
+ return nil unless row[:county]
28
+ state_id = get_state_id(row[:short_state], row[:state])
29
+ return nil unless state_id
30
+
31
+ sql = <<-SQL
32
+ INSERT INTO counties (state_id, abbr, name)
33
+ VALUES ('#{state_id}',
34
+ '#{row[:short_county]}',
35
+ '#{escape_single_quotes(row[:county])}'
36
+ )
37
+ SQL
38
+
39
+ begin
40
+ database.execute(sql)
41
+ rescue SQLite3::ConstraintException
42
+ # swallow duplicates
43
+ rescue StandardError => err
44
+ raise "Please file an issue at #{ISSUE_URL}: [#{err}] -> SQL: [#{sql}]"
45
+ end
46
+
47
+ update_progress
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,88 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'csv'
4
+ require 'open-uri'
5
+ require 'zip'
6
+
7
+ module FreeZipcodeData
8
+ class DataSource
9
+ BASE_URL = 'http://download.geonames.org/export/zip'
10
+
11
+ attr_reader :country, :options
12
+
13
+ def initialize(country = nil)
14
+ @country = country
15
+ @options = Options.instance.hash
16
+ @logger = Logger.instance
17
+ end
18
+
19
+ def download
20
+ return nil if !options.clobber && File.exist?(zipfile_path)
21
+ FileUtils.mkdir_p(options.work_dir)
22
+ @logger.info("Downloading: #{zipfile} from GeoNames...")
23
+ open(zipfile_path, 'wb') do |file|
24
+ file << open("#{BASE_URL}/#{zipfile}").read
25
+ end
26
+ end
27
+
28
+ def datafile
29
+ @datafile ||= begin
30
+ datafile_with_headers
31
+ end
32
+ end
33
+
34
+ private
35
+
36
+ def zipfile
37
+ @zipfile ||= begin
38
+ filename = country.nil? ? 'allCountries' : country.upcase
39
+ filename += '.zip' unless filename =~ /\.zip$/
40
+ filename
41
+ end
42
+ end
43
+
44
+ def zipfile_path
45
+ @zipfile_path ||= File.join(options.work_dir, zipfile)
46
+ end
47
+
48
+ def unzipped_datafile
49
+ @unzipped_datafile ||= begin
50
+ country_file = nil
51
+ Zip::File.open(zipfile_path) do |zip|
52
+ zip.each do |entry|
53
+ next if entry.name =~ /readme/i
54
+ country_file = File.join(options.work_dir, entry.name)
55
+ if File.exist?(country_file)
56
+ if options[:clobber]
57
+ Zip.on_exists_proc = true
58
+ Logger.instance.verbose("Extracting: #{zipfile}...")
59
+ entry.extract(country_file)
60
+ end
61
+ else
62
+ Logger.instance.verbose("Extracting: #{zipfile}...")
63
+ entry.extract(country_file)
64
+ end
65
+ break
66
+ end
67
+ end
68
+ country_file
69
+ end
70
+ end
71
+
72
+ def datafile_with_headers
73
+ filename = "#{unzipped_datafile}.csv"
74
+ if File.exist?(filename) && !options[:clobber]
75
+ @logger.verbose("File: #{filename} already exists, skipping...")
76
+ return filename
77
+ end
78
+ @logger.verbose("Preparing: #{filename} for processing...")
79
+ CSV.open(filename, 'w') do |outfile|
80
+ outfile << %w[COUNTRY POSTAL_CODE CITY STATE SHORT_STATE COUNTY SHORT_COUNTY COMMUNITY SHORT_COMMUNITY LATITUDE LONGITUDE ACCURACY]
81
+ CSV.foreach(unzipped_datafile, headers: false, col_sep: "\t", quote_char: '|') do |row|
82
+ outfile << row
83
+ end
84
+ end
85
+ filename
86
+ end
87
+ end
88
+ end
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'yaml'
4
+ require 'ruby-progressbar'
5
+
6
+ module FreeZipcodeData
7
+ class DbTable
8
+ ISSUE_URL = 'https://github.com/midwire/free_zipcode_data/issues/new'
9
+
10
+ attr_reader :database, :tablename
11
+ @@progressbar = nil
12
+
13
+ def initialize(database:, tablename:)
14
+ @database = database
15
+ @tablename = tablename
16
+ lc = select_first('SELECT value FROM meta where name = "line_count"')
17
+ @@progressbar = ProgressBar.create(total: lc.to_i * 4, format: '%t: |%B| %e')
18
+ end
19
+
20
+ def update_progress
21
+ @@progressbar.increment
22
+ end
23
+
24
+ private
25
+
26
+ def country_lookup_table
27
+ @country_lookup_table ||= YAML.load_file('country_lookup_table.yml')
28
+ end
29
+
30
+ def select_first(sql)
31
+ rows = database.execute(sql)
32
+ rows[0].nil? ? nil : rows[0].first
33
+ rescue SQLite3::SQLException => err
34
+ raise "Please file an issue at #{ISSUE_URL}: [#{err}] -> SQL: [#{sql}]"
35
+ end
36
+
37
+ def get_country_id(country)
38
+ sql = "SELECT id FROM countries WHERE alpha2 = '#{country}'"
39
+ select_first(sql)
40
+ end
41
+
42
+ def get_state_id(state_abbr, state_name)
43
+ sql = "SELECT id FROM states
44
+ WHERE abbr = '#{state_abbr}' OR name = '#{escape_single_quotes(state_name)}'"
45
+ select_first(sql)
46
+ end
47
+
48
+ def get_county_id(county)
49
+ return nil if county.nil?
50
+ sql = "SELECT id FROM counties WHERE name = '#{escape_single_quotes(county)}'"
51
+ select_first(sql)
52
+ end
53
+
54
+ def escape_single_quotes(string)
55
+ string&.gsub(/[']/, '\'\'') || ''
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'singleton'
4
+ require 'logger'
5
+
6
+ module FreeZipcodeData
7
+ class Logger
8
+ include Singleton
9
+
10
+ attr_accessor :log_provider
11
+
12
+ def initialize(provider = default_logger)
13
+ @log_provider = provider
14
+ end
15
+
16
+ def log_exception(e, data = {})
17
+ msg = "EXCEPTION : #{e.class.name} : #{e.message}"
18
+ msg += "\n data : #{data.inspect}" if data && !data.empty?
19
+ msg += "\n #{e.backtrace[0, 6].join("\n ")}"
20
+ log_provider.error(msg)
21
+ end
22
+
23
+ def method_missing(meth, *args, &block)
24
+ if log_provider.respond_to?(meth)
25
+ log_provider.send(meth, *args, &block)
26
+ else
27
+ super
28
+ end
29
+ end
30
+
31
+ def respond_to?(meth, include_private = false)
32
+ if log_provider.respond_to?(meth)
33
+ true
34
+ else
35
+ super
36
+ end
37
+ end
38
+
39
+ def verbose(msg)
40
+ info(msg) if options&.verbose
41
+ end
42
+
43
+ private
44
+
45
+ def default_logger
46
+ logger = ::Logger.new(STDOUT)
47
+ logger.formatter = proc do |_, _, _, msg|
48
+ "#{msg}\n"
49
+ end
50
+ logger
51
+ end
52
+
53
+ def options
54
+ Options.instance.hash
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'singleton'
4
+
5
+ module FreeZipcodeData
6
+ class Options
7
+ include Singleton
8
+
9
+ def initialize_hash(hash)
10
+ @@_options = hash
11
+ end
12
+
13
+ def [](key)
14
+ @@_options[key]
15
+ end
16
+
17
+ def hash
18
+ @@_options
19
+ end
20
+ end
21
+ end