free_zipcode_data 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+ # coding: utf-8
3
+
4
+ lib = File.expand_path('../lib', __FILE__)
5
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
6
+ require 'free_zipcode_data/version'
7
+
8
+ # rubocop:disable Metrics/BlockLength
9
+ Gem::Specification.new do |spec|
10
+ spec.name = 'free_zipcode_data'
11
+ spec.version = FreeZipcodeData::VERSION
12
+ spec.authors = ['Chris Blackburn', 'Chris McKnight']
13
+ spec.email = ['87a1779b@opayq.com', 'fixme@mcknight.bogus']
14
+ spec.summary = 'Free US and world-wide postal codes in SQLite and CSV format'
15
+ spec.description = <<~STRING
16
+ Free US and world-wide postal codes in SQLite and CSV format.
17
+ Automated zipcode/postal code aggregation and processing for any needs.
18
+ STRING
19
+ spec.homepage = 'https://github.com/midwire/free_zipcode_data'
20
+ spec.license = 'MIT'
21
+
22
+ spec.required_ruby_version = '>= 2.3.0'
23
+ spec.files = `git ls-files -z`.split("\x0")
24
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
25
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
26
+ spec.require_paths = ['lib']
27
+
28
+ spec.add_development_dependency 'bundler', '~> 1.16'
29
+ spec.add_development_dependency 'pry-nav', '~> 0.2'
30
+ spec.add_development_dependency 'rake', '~> 12.0'
31
+ spec.add_development_dependency 'rspec', '~> 3.7'
32
+ spec.add_development_dependency 'rubocop', '~> 0.55'
33
+ spec.add_development_dependency 'ruby-prof', '~> 0.17'
34
+ spec.add_development_dependency 'simplecov', '~> 0.16'
35
+
36
+ spec.add_runtime_dependency 'colored', '~> 1.2'
37
+ spec.add_runtime_dependency 'kiba', '~> 2.0'
38
+ spec.add_runtime_dependency 'ruby-progressbar', '~> 1.9'
39
+ spec.add_runtime_dependency 'rubyzip', '~> 1.2'
40
+ spec.add_runtime_dependency 'sqlite3', '~> 1.3'
41
+ spec.add_runtime_dependency 'trollop', '~> 2.1'
42
+ end
43
+ # rubocop:enable Metrics/BlockLength
data/lib/etl/common.rb ADDED
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'csv_source'
4
+ require_relative '../free_zipcode_data/country_table'
5
+ require_relative '../free_zipcode_data/state_table'
6
+ require_relative '../free_zipcode_data/county_table'
7
+ require_relative '../free_zipcode_data/zipcode_table'
8
+
9
+ def show_me
10
+ transform do |row|
11
+ ap row
12
+ row
13
+ end
14
+ end
15
+
16
+ def limit(count)
17
+ count = Integer(count || -1)
18
+ return if count == -1
19
+ transform do |row|
20
+ @counter ||= 0
21
+ @counter += 1
22
+ @counter > count ? nil : row
23
+ end
24
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'csv'
4
+
5
+ class CsvSource
6
+ attr_reader :filename, :delimeter, :quote_char, :headers
7
+
8
+ def initialize(filename:, headers: true, delimeter: "\t", quote_char: '"')
9
+ @filename = filename
10
+ @headers = headers
11
+ @delimeter = delimeter
12
+ @quote_char = quote_char
13
+ end
14
+
15
+ def each
16
+ CSV.open(filename,
17
+ col_sep: delimeter,
18
+ headers: headers,
19
+ header_converters: :symbol,
20
+ quote_char: quote_char) do |csv|
21
+ csv.each do |row|
22
+ yield(row.to_hash)
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'common'
4
+
5
+ module ETL
6
+ module FreeZipcodeDataJob
7
+ module_function
8
+
9
+ def setup(country_file, database, logger, options)
10
+ Kiba.parse do
11
+ pre_process do
12
+ logger.info("Processing '#{country_file}' data, please be patient...")
13
+ end
14
+
15
+ source CsvSource, filename: country_file, quote_char: '"', delimeter: ','
16
+
17
+ destination FreeZipcodeData::CountryTable,
18
+ database: database,
19
+ tablename: options[:country_tablename]
20
+
21
+ destination FreeZipcodeData::StateTable,
22
+ database: database,
23
+ tablename: options[:state_tablename]
24
+
25
+ destination FreeZipcodeData::CountyTable,
26
+ database: database,
27
+ tablename: options[:county_tablename]
28
+
29
+ destination FreeZipcodeData::ZipcodeTable,
30
+ database: database,
31
+ tablename: options[:zipcode_tablename]
32
+
33
+ post_process do
34
+ logger.verbose('Finished generating table data...')
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'db_table'
4
+
5
+ module FreeZipcodeData
6
+ class CountryTable < DbTable
7
+ def build
8
+ schema = <<-SQL
9
+ create table #{tablename} (
10
+ id integer not null primary key,
11
+ alpha2 varchar(2) not null,
12
+ alpha3 varchar(3),
13
+ iso varchar(3),
14
+ name varchar(255) not null
15
+ )
16
+ SQL
17
+ database.execute_batch(schema)
18
+
19
+ ndx = <<-SQL
20
+ CREATE UNIQUE INDEX "main"."unique_country_alpha2"
21
+ ON #{tablename} (alpha2 COLLATE NOCASE ASC);
22
+ SQL
23
+ database.execute_batch(ndx)
24
+ end
25
+
26
+ def write(row)
27
+ country_hash = country_lookup_table[row[:country]]
28
+
29
+ sql = <<-SQL
30
+ INSERT INTO countries (alpha2, alpha3, iso, name)
31
+ VALUES ('#{row[:country]}',
32
+ '#{country_hash[:alpha3]}',
33
+ '#{country_hash[:iso]}',
34
+ '#{country_hash[:name]}')
35
+ SQL
36
+
37
+ begin
38
+ database.execute(sql)
39
+ rescue SQLite3::ConstraintException
40
+ # Swallow duplicates
41
+ end
42
+
43
+ update_progress
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'db_table'
4
+
5
+ module FreeZipcodeData
6
+ class CountyTable < DbTable
7
+ def build
8
+ schema = <<-SQL
9
+ create table #{tablename} (
10
+ id integer not null primary key,
11
+ state_id integer,
12
+ abbr varchar(255),
13
+ name varchar(255),
14
+ county_seat varchar(255)
15
+ )
16
+ SQL
17
+ database.execute_batch(schema)
18
+
19
+ ndx = <<-SQL
20
+ CREATE UNIQUE INDEX "main"."unique_county"
21
+ ON #{tablename} (state_id, abbr, name COLLATE NOCASE ASC);
22
+ SQL
23
+ database.execute_batch(ndx)
24
+ end
25
+
26
+ def write(row)
27
+ return nil unless row[:county]
28
+ state_id = get_state_id(row[:short_state], row[:state])
29
+ return nil unless state_id
30
+
31
+ sql = <<-SQL
32
+ INSERT INTO counties (state_id, abbr, name)
33
+ VALUES ('#{state_id}',
34
+ '#{row[:short_county]}',
35
+ '#{escape_single_quotes(row[:county])}'
36
+ )
37
+ SQL
38
+
39
+ begin
40
+ database.execute(sql)
41
+ rescue SQLite3::ConstraintException
42
+ # swallow duplicates
43
+ rescue StandardError => err
44
+ raise "Please file an issue at #{ISSUE_URL}: [#{err}] -> SQL: [#{sql}]"
45
+ end
46
+
47
+ update_progress
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,88 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'csv'
4
+ require 'open-uri'
5
+ require 'zip'
6
+
7
+ module FreeZipcodeData
8
+ class DataSource
9
+ BASE_URL = 'http://download.geonames.org/export/zip'
10
+
11
+ attr_reader :country, :options
12
+
13
+ def initialize(country = nil)
14
+ @country = country
15
+ @options = Options.instance.hash
16
+ @logger = Logger.instance
17
+ end
18
+
19
+ def download
20
+ return nil if !options.clobber && File.exist?(zipfile_path)
21
+ FileUtils.mkdir_p(options.work_dir)
22
+ @logger.info("Downloading: #{zipfile} from GeoNames...")
23
+ open(zipfile_path, 'wb') do |file|
24
+ file << open("#{BASE_URL}/#{zipfile}").read
25
+ end
26
+ end
27
+
28
+ def datafile
29
+ @datafile ||= begin
30
+ datafile_with_headers
31
+ end
32
+ end
33
+
34
+ private
35
+
36
+ def zipfile
37
+ @zipfile ||= begin
38
+ filename = country.nil? ? 'allCountries' : country.upcase
39
+ filename += '.zip' unless filename =~ /\.zip$/
40
+ filename
41
+ end
42
+ end
43
+
44
+ def zipfile_path
45
+ @zipfile_path ||= File.join(options.work_dir, zipfile)
46
+ end
47
+
48
+ def unzipped_datafile
49
+ @unzipped_datafile ||= begin
50
+ country_file = nil
51
+ Zip::File.open(zipfile_path) do |zip|
52
+ zip.each do |entry|
53
+ next if entry.name =~ /readme/i
54
+ country_file = File.join(options.work_dir, entry.name)
55
+ if File.exist?(country_file)
56
+ if options[:clobber]
57
+ Zip.on_exists_proc = true
58
+ Logger.instance.verbose("Extracting: #{zipfile}...")
59
+ entry.extract(country_file)
60
+ end
61
+ else
62
+ Logger.instance.verbose("Extracting: #{zipfile}...")
63
+ entry.extract(country_file)
64
+ end
65
+ break
66
+ end
67
+ end
68
+ country_file
69
+ end
70
+ end
71
+
72
+ def datafile_with_headers
73
+ filename = "#{unzipped_datafile}.csv"
74
+ if File.exist?(filename) && !options[:clobber]
75
+ @logger.verbose("File: #{filename} already exists, skipping...")
76
+ return filename
77
+ end
78
+ @logger.verbose("Preparing: #{filename} for processing...")
79
+ CSV.open(filename, 'w') do |outfile|
80
+ outfile << %w[COUNTRY POSTAL_CODE CITY STATE SHORT_STATE COUNTY SHORT_COUNTY COMMUNITY SHORT_COMMUNITY LATITUDE LONGITUDE ACCURACY]
81
+ CSV.foreach(unzipped_datafile, headers: false, col_sep: "\t", quote_char: '|') do |row|
82
+ outfile << row
83
+ end
84
+ end
85
+ filename
86
+ end
87
+ end
88
+ end
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'yaml'
4
+ require 'ruby-progressbar'
5
+
6
+ module FreeZipcodeData
7
+ class DbTable
8
+ ISSUE_URL = 'https://github.com/midwire/free_zipcode_data/issues/new'
9
+
10
+ attr_reader :database, :tablename
11
+ @@progressbar = nil
12
+
13
+ def initialize(database:, tablename:)
14
+ @database = database
15
+ @tablename = tablename
16
+ lc = select_first('SELECT value FROM meta where name = "line_count"')
17
+ @@progressbar = ProgressBar.create(total: lc.to_i * 4, format: '%t: |%B| %e')
18
+ end
19
+
20
+ def update_progress
21
+ @@progressbar.increment
22
+ end
23
+
24
+ private
25
+
26
+ def country_lookup_table
27
+ @country_lookup_table ||= YAML.load_file('country_lookup_table.yml')
28
+ end
29
+
30
+ def select_first(sql)
31
+ rows = database.execute(sql)
32
+ rows[0].nil? ? nil : rows[0].first
33
+ rescue SQLite3::SQLException => err
34
+ raise "Please file an issue at #{ISSUE_URL}: [#{err}] -> SQL: [#{sql}]"
35
+ end
36
+
37
+ def get_country_id(country)
38
+ sql = "SELECT id FROM countries WHERE alpha2 = '#{country}'"
39
+ select_first(sql)
40
+ end
41
+
42
+ def get_state_id(state_abbr, state_name)
43
+ sql = "SELECT id FROM states
44
+ WHERE abbr = '#{state_abbr}' OR name = '#{escape_single_quotes(state_name)}'"
45
+ select_first(sql)
46
+ end
47
+
48
+ def get_county_id(county)
49
+ return nil if county.nil?
50
+ sql = "SELECT id FROM counties WHERE name = '#{escape_single_quotes(county)}'"
51
+ select_first(sql)
52
+ end
53
+
54
+ def escape_single_quotes(string)
55
+ string&.gsub(/[']/, '\'\'') || ''
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'singleton'
4
+ require 'logger'
5
+
6
+ module FreeZipcodeData
7
+ class Logger
8
+ include Singleton
9
+
10
+ attr_accessor :log_provider
11
+
12
+ def initialize(provider = default_logger)
13
+ @log_provider = provider
14
+ end
15
+
16
+ def log_exception(e, data = {})
17
+ msg = "EXCEPTION : #{e.class.name} : #{e.message}"
18
+ msg += "\n data : #{data.inspect}" if data && !data.empty?
19
+ msg += "\n #{e.backtrace[0, 6].join("\n ")}"
20
+ log_provider.error(msg)
21
+ end
22
+
23
+ def method_missing(meth, *args, &block)
24
+ if log_provider.respond_to?(meth)
25
+ log_provider.send(meth, *args, &block)
26
+ else
27
+ super
28
+ end
29
+ end
30
+
31
+ def respond_to?(meth, include_private = false)
32
+ if log_provider.respond_to?(meth)
33
+ true
34
+ else
35
+ super
36
+ end
37
+ end
38
+
39
+ def verbose(msg)
40
+ info(msg) if options&.verbose
41
+ end
42
+
43
+ private
44
+
45
+ def default_logger
46
+ logger = ::Logger.new(STDOUT)
47
+ logger.formatter = proc do |_, _, _, msg|
48
+ "#{msg}\n"
49
+ end
50
+ logger
51
+ end
52
+
53
+ def options
54
+ Options.instance.hash
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'singleton'
4
+
5
+ module FreeZipcodeData
6
+ class Options
7
+ include Singleton
8
+
9
+ def initialize_hash(hash)
10
+ @@_options = hash
11
+ end
12
+
13
+ def [](key)
14
+ @@_options[key]
15
+ end
16
+
17
+ def hash
18
+ @@_options
19
+ end
20
+ end
21
+ end