free_zipcode_data 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,172 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'colored'
4
+ require 'trollop'
5
+ require 'kiba'
6
+
7
+ require_relative '../etl/free_zipcode_data_job'
8
+
9
+ require 'pry' if ENV.fetch('APP_ENV', '') == 'development'
10
+
11
+ module FreeZipcodeData
12
+ # rubocop:disable Metrics/ClassLength
13
+ class Runner
14
+ attr_accessor :logger, :options
15
+
16
+ # Make a singleton but allow the class to be instantiated for easier testing
17
+ def self.instance
18
+ @instance || new
19
+ end
20
+
21
+ def initialize
22
+ @logger = Logger.instance
23
+ end
24
+
25
+ def start
26
+ start_time = Time.now
27
+ opt = FreeZipcodeData::Options.instance
28
+ opt.initialize_hash(collect_args)
29
+ @options = opt.hash
30
+
31
+ logger.info("Starting FreeZipcodeData v#{VERSION}...".green)
32
+
33
+ datasource = DataSource.new(options.country)
34
+ datasource.download
35
+
36
+ db_file = File.join(options.work_dir, 'free_zipcode_data.sqlite3')
37
+ database = SqliteRam.new(db_file)
38
+ configure_meta(database.conn, datasource.datafile)
39
+
40
+ %i[country state county zipcode].each { |t| initialize_table(t, database) }
41
+
42
+ extract_transform_load(datasource, database)
43
+
44
+ logger.info("Saving database to disk '#{db_file}'...")
45
+ database.save_to_disk
46
+
47
+ if options.generate_files
48
+ logger.info('Generating .csv files...')
49
+ database.dump_tables(options.work_dir)
50
+ end
51
+
52
+ elapsed = Time.at(Time.now - start_time).utc.strftime('%H:%M:%S')
53
+ logger.info("Processed #{datasource_line_count} zipcodes in [#{elapsed}].".yellow)
54
+ end
55
+
56
+ private
57
+
58
+ def initialize_table(table_sym, database)
59
+ tablename = options["#{table_sym}_tablename".to_sym]
60
+ logger.verbose("Initializing #{table_sym} table: '#{tablename}'...")
61
+ klass = instance_eval("#{titleize(table_sym)}Table", __FILE__, __LINE__)
62
+ table = klass.new(
63
+ database: database.conn,
64
+ tablename: tablename
65
+ )
66
+ table.build
67
+ end
68
+
69
+ def datasource_line_count(filename)
70
+ @datasource_line_count ||= begin
71
+ count = File.foreach(filename).inject(0) { |c, _line| c + 1 }
72
+ logger.verbose("Processing #{count} zipcodes in '#{filename}'...")
73
+ count
74
+ end
75
+ end
76
+
77
+ def configure_meta(database, datasource)
78
+ schema = <<-SQL
79
+ create table meta (
80
+ id integer not null primary key,
81
+ name varchar(255),
82
+ value varchar(255)
83
+ )
84
+ SQL
85
+ database.execute_batch(schema)
86
+
87
+ sql = <<-SQL
88
+ INSERT INTO meta (name, value)
89
+ VALUES ('line_count', #{datasource_line_count(datasource)})
90
+ SQL
91
+ database.execute(sql)
92
+ end
93
+
94
+ def extract_transform_load(datasource, database)
95
+ job = ETL::FreeZipcodeDataJob.setup(
96
+ datasource.datafile,
97
+ database.conn,
98
+ logger,
99
+ options
100
+ )
101
+ Kiba.run(job)
102
+ end
103
+
104
+ # rubocop:disable Metrics/BlockLength
105
+ # rubocop:disable Metrics/MethodLength
106
+ def collect_args
107
+ Trollop.options do
108
+ opt(
109
+ :work_dir,
110
+ 'REQUIRED: Specify your work/build directory, where the SQLite and .csv files will be built',
111
+ type: :string, required: true, short: '-w'
112
+ )
113
+ opt(
114
+ :country,
115
+ 'Specify the country code for processing, or all countries if not specified',
116
+ type: :string, required: false, short: '-f'
117
+ )
118
+ opt(
119
+ :generate_files,
120
+ 'Generate CSV files: [counties.csv, states.csv, countries.csv, zipcodes.csv]',
121
+ type: :boolean, required: false, short: '-g', default: false
122
+ )
123
+ opt(
124
+ :country_tablename,
125
+ 'Specify the name for the `countries` table',
126
+ type: :string, required: false, default: 'countries'
127
+ )
128
+ opt(
129
+ :state_tablename,
130
+ 'Specify the name for the `states` table',
131
+ type: :string, required: false, default: 'states'
132
+ )
133
+ opt(
134
+ :county_tablename,
135
+ 'Specify the name for the `counties` table',
136
+ type: :string, required: false, default: 'counties'
137
+ )
138
+ opt(
139
+ :zipcode_tablename,
140
+ 'Specify the name for the `zipcodes` table',
141
+ type: :string, required: false, default: 'zipcodes'
142
+ )
143
+ opt(
144
+ :clobber,
145
+ 'Overwrite existing files',
146
+ type: :boolean, required: false, short: '-c', default: false
147
+ )
148
+ opt(
149
+ :dry_run,
150
+ 'Do not actually move or copy files',
151
+ type: :boolean, required: false, short: '-d',
152
+ default: false
153
+ )
154
+ opt(
155
+ :verbose,
156
+ 'Be verbose with output',
157
+ type: :boolean, required: false, short: '-v',
158
+ default: false
159
+ )
160
+ end
161
+ end
162
+ # rubocop:enable Metrics/MethodLength
163
+ # rubocop:enable Metrics/BlockLength
164
+
165
+ def titleize(string)
166
+ ret = string.to_s.dup
167
+ ret[0] = ret[0].capitalize
168
+ ret
169
+ end
170
+ end
171
+ # rubocop:enable Metrics/ClassLength
172
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'sqlite3'
4
+ require 'csv'
5
+
6
+ # Open a SQlite DB, work with it in-memory and save back to disk
7
+ class SqliteRam
8
+ attr_reader :filename, :conn
9
+
10
+ def initialize(sqlite_filename)
11
+ @filename = sqlite_filename
12
+ @ram_db = SQLite3::Database.new(':memory:')
13
+ @file_db = SQLite3::Database.new(sqlite_filename)
14
+ @conn = @ram_db
15
+ end
16
+
17
+ def save_to_disk
18
+ backup = SQLite3::Backup.new(@file_db, 'main', @ram_db, 'main')
19
+ backup.step(-1)
20
+ backup.finish
21
+ end
22
+
23
+ def dump_tables(path)
24
+ tables = conn.execute('select name from sqlite_master where type = "table"')
25
+ sql = nil
26
+ tables.each do |table_array|
27
+ table = table_array.first
28
+ headers_sql = "pragma table_info('#{table}')"
29
+ header = conn.execute(headers_sql).map { |e| e[1] }
30
+ CSV.open(File.join(path, "#{table}.csv"), 'w') do |csv|
31
+ csv << header
32
+ sql = "select * from #{table}"
33
+ conn.execute(sql).each do |row_array|
34
+ csv << row_array
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'db_table'
4
+
5
+ module FreeZipcodeData
6
+ class StateTable < DbTable
7
+ def build
8
+ schema = <<-SQL
9
+ create table #{tablename} (
10
+ id integer not null primary key,
11
+ country_id integer not null,
12
+ abbr varchar(2) not null,
13
+ name varchar(255)
14
+ )
15
+ SQL
16
+ database.execute_batch(schema)
17
+
18
+ ndx = <<-SQL
19
+ CREATE UNIQUE INDEX "main"."unique_state"
20
+ ON #{tablename} (abbr, country_id COLLATE NOCASE ASC);
21
+ SQL
22
+ database.execute_batch(ndx)
23
+
24
+ ndx = <<-SQL
25
+ CREATE UNIQUE INDEX "main"."state_name"
26
+ ON #{tablename} (name COLLATE NOCASE ASC);
27
+ SQL
28
+ database.execute_batch(ndx)
29
+ end
30
+
31
+ def write(row)
32
+ return nil unless row[:short_state]
33
+ row[:state] = 'Marshall Islands' if row[:short_state] == 'MH' && row[:state].nil?
34
+ country_id = get_country_id(row[:country])
35
+ sql = <<-SQL
36
+ INSERT INTO states (abbr, name, country_id)
37
+ VALUES ('#{row[:short_state]}',
38
+ '#{escape_single_quotes(row[:state])}',
39
+ #{country_id}
40
+ )
41
+ SQL
42
+ begin
43
+ database.execute(sql)
44
+ rescue SQLite3::ConstraintException
45
+ # Swallow duplicates
46
+ end
47
+
48
+ update_progress
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FreeZipcodeData
4
+ VERSION = '1.0.1'.freeze
5
+ end
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'db_table'
4
+
5
+ module FreeZipcodeData
6
+ class ZipcodeTable < DbTable
7
+ def build
8
+ schema = <<-SQL
9
+ create table #{tablename} (
10
+ id integer not null primary key,
11
+ code varchar(10) not null,
12
+ state_id integer,
13
+ city varchar(255),
14
+ area_code varchar(3),
15
+ lat float,
16
+ lon float,
17
+ accuracy varchar(8)
18
+ )
19
+ SQL
20
+ database.execute_batch(schema)
21
+
22
+ ndx = <<-SQL
23
+ CREATE UNIQUE INDEX "main"."unique_zipcode"
24
+ ON #{tablename} (state_id, code, city COLLATE NOCASE ASC);
25
+ SQL
26
+ database.execute_batch(ndx)
27
+ end
28
+
29
+ def write(row)
30
+ return nil unless row[:postal_code]
31
+
32
+ state_id = get_state_id(row[:short_state], row[:state])
33
+ city_name = escape_single_quotes(row[:city])
34
+
35
+ sql = <<-SQL
36
+ INSERT INTO zipcodes (code, state_id, city, lat, lon, accuracy)
37
+ VALUES ('#{row[:postal_code]}',
38
+ '#{state_id}',
39
+ '#{city_name}',
40
+ '#{row[:latitude]}',
41
+ '#{row[:longitude]}',
42
+ '#{row[:accuracy]}'
43
+ )
44
+ SQL
45
+
46
+ begin
47
+ database.execute(sql)
48
+ rescue SQLite3::ConstraintException => _err
49
+ # there are some duplicates - swallow them
50
+ rescue StandardError => err
51
+ raise "Please file an issue at #{ISSUE_URL}: [#{err}] -> SQL: [#{sql}]"
52
+ end
53
+
54
+ update_progress
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'readline'
4
+
5
+ require 'free_zipcode_data/version'
6
+
7
+ module FreeZipcodeData
8
+ def self.root
9
+ Pathname.new(File.dirname(__FILE__)).parent
10
+ end
11
+
12
+ def self.current_environment
13
+ ENV.fetch('APP_ENV', 'development')
14
+ end
15
+
16
+ #:nocov:
17
+ def self.config_file(filename = '.free_zipcode_data.yml')
18
+ return root.join('spec', 'fixtures', filename) if current_environment == 'test'
19
+ home = ENV.fetch('HOME')
20
+ file = ENV.fetch('FZD_CONFIG_FILE', File.join(home, '.free_zipcode_data.yml'))
21
+ FileUtils.touch(file)
22
+ file
23
+ end
24
+ #:nocov:
25
+
26
+ def self.os
27
+ if RUBY_PLATFORM.match?(/cygwin|mswin|mingw|bccwin|wince|emx/)
28
+ :retarded
29
+ else
30
+ :normal
31
+ end
32
+ end
33
+
34
+ autoload :CountryTable, 'free_zipcode_data/country_table'
35
+ autoload :StateTable, 'free_zipcode_data/state_table'
36
+ autoload :CountyTable, 'free_zipcode_data/county_table'
37
+ autoload :ZipcodeTable, 'free_zipcode_data/zipcode_table'
38
+ autoload :DataSource, 'free_zipcode_data/data_source'
39
+ autoload :Logger, 'free_zipcode_data/logger'
40
+ autoload :Options, 'free_zipcode_data/options'
41
+ autoload :Settings, 'free_zipcode_data/settings'
42
+ autoload :SqliteRam, 'free_zipcode_data/sqlite_ram'
43
+ end
@@ -0,0 +1,181 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rake'
4
+ require 'readline'
5
+ require 'fileutils'
6
+
7
+ # rubocop:disable Metrics/BlockLength
8
+ namespace :version do
9
+ PROJECT_ROOT = File.expand_path(FileUtils.pwd).freeze
10
+ PROJECT_NAME = ENV['PROJECT_NAME'] || File.basename(PROJECT_ROOT)
11
+
12
+ desc 'Write changes to the CHANGELOG'
13
+ task :changes do
14
+ text = ask('CHANGELOG Entry:')
15
+ text.insert(
16
+ 0,
17
+ "*#{read_version.join('.')}* (#{Time.now.strftime('%B %d, %Y')})\n\n"
18
+ )
19
+ text << "\n"
20
+ prepend_changelog(text)
21
+ launch_editor(changelog)
22
+ end
23
+
24
+ desc 'Increment the patch version and write changes to the changelog'
25
+ task :bump_patch do
26
+ exit unless check_branch_and_warn
27
+ major, minor, patch = read_version
28
+ patch = patch.to_i + 1
29
+ write_version_file([major, minor, patch])
30
+ update_readme_version_strings
31
+ Rake::Task['version:changes'].invoke
32
+ end
33
+
34
+ desc 'Alias for :bump_patch'
35
+ task bump: :bump_patch
36
+
37
+ desc 'Increment the minor version and write changes to the changelog'
38
+ task :bump_minor do
39
+ exit unless check_branch_and_warn
40
+ major, minor, _patch = read_version
41
+ minor = minor.to_i + 1
42
+ patch = 0
43
+ write_version_file([major, minor, patch])
44
+ update_readme_version_strings
45
+ Rake::Task['version:changes'].invoke
46
+ end
47
+
48
+ desc 'Increment the major version and write changes to the changelog'
49
+ task :bump_major do
50
+ exit unless check_branch_and_warn
51
+ major, _minor, _patch = read_version
52
+ major = major.to_i + 1
53
+ minor = 0
54
+ patch = 0
55
+ write_version_file([major, minor, patch])
56
+ update_readme_version_strings
57
+ Rake::Task['version:changes'].invoke
58
+ end
59
+
60
+ private
61
+
62
+ def version_file_path
63
+ split = PROJECT_NAME.split('-')
64
+ "#{PROJECT_ROOT}/lib/#{split.join('/')}/version.rb"
65
+ end
66
+
67
+ def module_name
68
+ if PROJECT_NAME =~ /-/
69
+ PROJECT_NAME.split('-').map(&:capitalize).join('::')
70
+ elsif PROJECT_NAME =~ /_/
71
+ PROJECT_NAME.split('_').map(&:capitalize).join
72
+ else
73
+ PROJECT_NAME.capitalize
74
+ end
75
+ end
76
+
77
+ def read_version
78
+ silence_warnings do
79
+ load version_file_path
80
+ end
81
+ text = eval("#{module_name}::VERSION")
82
+ text.split('.')
83
+ end
84
+
85
+ def write_version_file(version_array)
86
+ version = version_array.join('.')
87
+ new_version = %( VERSION = '#{version}'.freeze)
88
+ lines = File.readlines(version_file_path)
89
+ File.open(version_file_path, 'w') do |f|
90
+ lines.each do |line|
91
+ if line =~ /VERSION/
92
+ f.write("#{new_version}\n")
93
+ else
94
+ f.write(line)
95
+ end
96
+ end
97
+ end
98
+ end
99
+
100
+ def update_readme_version_strings
101
+ version_string = read_version.join('.')
102
+ readme = open('README.md').read
103
+ regex = /^\*\*Version: [0-9\.]+\*\*$/i
104
+ return nil unless readme =~ regex
105
+ File.open('README.md', 'w') do |f|
106
+ f.write(readme.gsub(regex, "**Version: #{version_string}**"))
107
+ end
108
+ end
109
+
110
+ def changelog
111
+ return @changelog_path if @changelog_path
112
+ @changelog_path = File.join(PROJECT_ROOT, 'CHANGELOG')
113
+ FileUtils.touch(@changelog_path)
114
+ @changelog_path
115
+ end
116
+
117
+ def prepend_changelog(text_array)
118
+ old = File.read(changelog).to_s.chomp
119
+ text_array.push(old)
120
+ File.open(changelog, 'w') do |f|
121
+ text_array.flatten.each do |line|
122
+ f.puts(line)
123
+ end
124
+ end
125
+ end
126
+
127
+ # rubocop:disable Lint/AssignmentInCondition
128
+ def ask(message)
129
+ response = []
130
+ puts message
131
+ puts 'Hit <Control>-D when finished:'
132
+ while line = Readline.readline('* ', false)
133
+ response << "* #{line.chomp}" unless line.nil?
134
+ end
135
+ response
136
+ end
137
+ # rubocop:enable Lint/AssignmentInCondition
138
+
139
+ def current_branch
140
+ `git symbolic-ref --short HEAD`.chomp
141
+ end
142
+
143
+ def branch_warning_message
144
+ <<~STRING
145
+ You typically do not want to bump versions on the 'master' branch
146
+ unless you plan to rebase or back-merge into 'develop'.
147
+
148
+ If you don't care or don't know what I'm talking about just enter 'y'
149
+ and continue.
150
+
151
+ Optionally, you can hit 'n' to abort and switch your branch to 'develop'
152
+ or whatever branch you use for development, bump the version, merge to
153
+ 'master' then 'rake release'.
154
+
155
+ Do you really want to bump the version on your 'master' branch? (y/n)
156
+ STRING
157
+ end
158
+
159
+ def check_branch_and_warn
160
+ return true unless current_branch == 'master'
161
+ puts(branch_warning_message)
162
+ while (line = $stdin.gets.chomp)
163
+ return true if line =~ /[yY]/
164
+ puts 'Aborting version bump.'
165
+ return false
166
+ end
167
+ end
168
+
169
+ def launch_editor(file)
170
+ system("#{ENV['EDITOR']} #{file}") if ENV['EDITOR']
171
+ end
172
+
173
+ def silence_warnings
174
+ original_verbosity = $VERBOSE
175
+ $VERBOSE = nil
176
+ yield
177
+ ensure
178
+ $VERBOSE = original_verbosity
179
+ end
180
+ end
181
+ # rubocop:enable Metrics/BlockLength