free_zipcode_data 1.0.6 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/.rubocop.yml +25 -16
  4. data/.ruby-version +1 -1
  5. data/CHANGELOG +11 -0
  6. data/CLAUDE.md +89 -0
  7. data/Gemfile +10 -0
  8. data/Gemfile.lock +50 -36
  9. data/README.md +3 -5
  10. data/Rakefile +1 -1
  11. data/free_zipcode_data.gemspec +8 -14
  12. data/lib/etl/common.rb +1 -0
  13. data/lib/etl/csv_source.rb +4 -4
  14. data/lib/free_zipcode_data/country_table.rb +10 -2
  15. data/lib/free_zipcode_data/county_table.rb +14 -6
  16. data/lib/free_zipcode_data/data_source.rb +2 -2
  17. data/lib/free_zipcode_data/db_table.rb +54 -7
  18. data/lib/free_zipcode_data/logger.rb +8 -12
  19. data/lib/free_zipcode_data/runner.rb +2 -2
  20. data/lib/free_zipcode_data/state_table.rb +37 -5
  21. data/lib/free_zipcode_data/version.rb +1 -1
  22. data/lib/free_zipcode_data/zipcode_table.rb +15 -5
  23. data/lib/free_zipcode_data.rb +3 -3
  24. data/lib/tasks/version.rake +27 -24
  25. data/spec/etl/csv_source_spec.rb +57 -0
  26. data/spec/etl/free_zipcode_data_job_spec.rb +135 -0
  27. data/spec/fixtures/.free_zipcode_data.yml +1 -0
  28. data/spec/fixtures/US.txt +5 -0
  29. data/spec/fixtures/US.zip +0 -0
  30. data/spec/fixtures/test_data.csv +7 -0
  31. data/spec/fixtures/test_data.txt +5 -0
  32. data/spec/free_zipcode_data/country_table_spec.rb +52 -0
  33. data/spec/free_zipcode_data/county_table_spec.rb +84 -0
  34. data/spec/free_zipcode_data/data_source_spec.rb +131 -0
  35. data/spec/free_zipcode_data/db_table_spec.rb +164 -0
  36. data/spec/free_zipcode_data/logger_spec.rb +78 -0
  37. data/spec/free_zipcode_data/options_spec.rb +37 -0
  38. data/spec/free_zipcode_data/runner_spec.rb +91 -0
  39. data/spec/free_zipcode_data/sqlite_ram_spec.rb +64 -0
  40. data/spec/free_zipcode_data/state_table_spec.rb +112 -0
  41. data/spec/free_zipcode_data/zipcode_table_spec.rb +102 -0
  42. data/spec/free_zipcode_data_spec.rb +38 -0
  43. data/spec/spec_helper.rb +23 -2
  44. data/spec/support/database_helpers.rb +48 -0
  45. metadata +38 -91
@@ -58,9 +58,9 @@ module FreeZipcodeData
58
58
  private
59
59
 
60
60
  def initialize_table(table_sym, database)
61
- tablename = options["#{table_sym}_tablename".to_sym]
61
+ tablename = options[:"#{table_sym}_tablename"]
62
62
  logger.verbose("Initializing #{table_sym} table: '#{tablename}'...")
63
- klass = instance_eval("#{titleize(table_sym)}Table", __FILE__, __LINE__)
63
+ klass = FreeZipcodeData.const_get(:"#{titleize(table_sym)}Table")
64
64
  table = klass.new(
65
65
  database: database.conn,
66
66
  tablename: tablename
@@ -17,21 +17,27 @@ module FreeZipcodeData
17
17
 
18
18
  ndx = <<-SQL
19
19
  CREATE UNIQUE INDEX "main"."unique_state"
20
- ON #{tablename} (abbr, country_id COLLATE NOCASE ASC);
20
+ ON #{tablename} (abbr COLLATE NOCASE ASC, country_id);
21
21
  SQL
22
22
  database.execute_batch(ndx)
23
23
 
24
24
  ndx = <<-SQL
25
25
  CREATE UNIQUE INDEX "main"."state_name"
26
- ON #{tablename} (name COLLATE NOCASE ASC);
26
+ ON #{tablename} (name COLLATE NOCASE ASC, country_id);
27
27
  SQL
28
28
  database.execute_batch(ndx)
29
29
  end
30
30
 
31
31
  def write(row)
32
- return nil unless row[:short_state]
32
+ return nil unless synthesize_state(row)
33
+
33
34
  row[:state] = 'Marshall Islands' if row[:short_state] == 'MH' && row[:state].nil?
34
35
  country_id = get_country_id(row[:country])
36
+ unless country_id
37
+ warn_once("Country '#{row[:country]}' not found in countries table, skipping state")
38
+ return nil
39
+ end
40
+
35
41
  sql = <<-SQL
36
42
  INSERT INTO states (abbr, name, country_id)
37
43
  VALUES ('#{row[:short_state]}',
@@ -41,11 +47,37 @@ module FreeZipcodeData
41
47
  SQL
42
48
  begin
43
49
  database.execute(sql)
44
- rescue SQLite3::ConstraintException
45
- # Swallow duplicates
50
+ rescue SQLite3::ConstraintException => e
51
+ unless e.message.include?('UNIQUE')
52
+ raise "Please file an issue at #{ISSUE_URL}: [#{e}] -> SQL: [#{sql}]"
53
+ end
54
+ rescue StandardError => e
55
+ raise "Please file an issue at #{ISSUE_URL}: [#{e}] -> SQL: [#{sql}]"
46
56
  end
47
57
 
48
58
  update_progress
49
59
  end
60
+
61
+ private
62
+
63
+ # Synthesize state from country for stateless countries.
64
+ # Mutates the row hash so downstream Kiba destinations (CountyTable, ZipcodeTable)
65
+ # see the synthesized short_state and state values.
66
+ def synthesize_state(row)
67
+ if row[:short_state].nil? || row[:short_state] == ''
68
+ country_entry = country_lookup_table[row[:country]]
69
+ unless country_entry
70
+ warn_once(
71
+ "Cannot synthesize state for country '#{row[:country]}': " \
72
+ 'not in country_lookup_table'
73
+ )
74
+ return false
75
+ end
76
+
77
+ row[:short_state] = row[:country]
78
+ row[:state] = country_entry[:name]
79
+ end
80
+ row[:short_state]
81
+ end
50
82
  end
51
83
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module FreeZipcodeData
4
- VERSION = '1.0.6'.freeze
4
+ VERSION = '1.1.0'
5
5
  end
@@ -29,7 +29,15 @@ module FreeZipcodeData
29
29
  def write(row)
30
30
  return nil unless row[:postal_code]
31
31
 
32
- state_id = get_state_id(row[:short_state], row[:state])
32
+ state_id = get_state_id(row[:country], row[:short_state], row[:state])
33
+ unless state_id
34
+ logger.verbose(
35
+ "Skipping zipcode '#{row[:postal_code]}': no state found for " \
36
+ "abbr='#{row[:short_state]}', country='#{row[:country]}'"
37
+ )
38
+ return nil
39
+ end
40
+
33
41
  city_name = escape_single_quotes(row[:city])
34
42
 
35
43
  sql = <<-SQL
@@ -45,10 +53,12 @@ module FreeZipcodeData
45
53
 
46
54
  begin
47
55
  database.execute(sql)
48
- rescue SQLite3::ConstraintException => _err
49
- # there are some duplicates - swallow them
50
- rescue StandardError => err
51
- raise "Please file an issue at #{ISSUE_URL}: [#{err}] -> SQL: [#{sql}]"
56
+ rescue SQLite3::ConstraintException => e
57
+ unless e.message.include?('UNIQUE')
58
+ raise "Please file an issue at #{ISSUE_URL}: [#{e}] -> SQL: [#{sql}]"
59
+ end
60
+ rescue StandardError => e
61
+ raise "Please file an issue at #{ISSUE_URL}: [#{e}] -> SQL: [#{sql}]"
52
62
  end
53
63
 
54
64
  update_progress
@@ -14,16 +14,16 @@ module FreeZipcodeData
14
14
  ENV.fetch('APP_ENV', 'development')
15
15
  end
16
16
 
17
- #:nocov:
17
+ # :nocov:
18
18
  def self.config_file(filename = '.free_zipcode_data.yml')
19
19
  return root.join('spec', 'fixtures', filename) if current_environment == 'test'
20
20
 
21
- home = ENV.fetch('HOME')
21
+ home = Dir.home
22
22
  file = ENV.fetch('FZD_CONFIG_FILE', File.join(home, '.free_zipcode_data.yml'))
23
23
  FileUtils.touch(file)
24
24
  file
25
25
  end
26
- #:nocov:
26
+ # :nocov:
27
27
 
28
28
  def self.os
29
29
  if RUBY_PLATFORM.match?(/cygwin|mswin|mingw|bccwin|wince|emx/)
@@ -6,8 +6,6 @@ require 'fileutils'
6
6
 
7
7
  # rubocop:disable Metrics/BlockLength
8
8
  namespace :version do
9
- PROJECT_ROOT = File.expand_path(FileUtils.pwd).freeze
10
- PROJECT_NAME = ENV['PROJECT_NAME'] || File.basename(PROJECT_ROOT)
11
9
 
12
10
  desc 'Write changes to the CHANGELOG'
13
11
  task :changes do
@@ -23,7 +21,7 @@ namespace :version do
23
21
 
24
22
  desc 'Increment the patch version and write changes to the changelog'
25
23
  task :bump_patch do
26
- exit unless check_branch_and_warn
24
+ exit unless check_branch_and_warn?
27
25
  major, minor, patch = read_version
28
26
  patch = patch.to_i + 1
29
27
  write_version_file([major, minor, patch])
@@ -36,7 +34,7 @@ namespace :version do
36
34
 
37
35
  desc 'Increment the minor version and write changes to the changelog'
38
36
  task :bump_minor do
39
- exit unless check_branch_and_warn
37
+ exit unless check_branch_and_warn?
40
38
  major, minor, _patch = read_version
41
39
  minor = minor.to_i + 1
42
40
  patch = 0
@@ -47,7 +45,7 @@ namespace :version do
47
45
 
48
46
  desc 'Increment the major version and write changes to the changelog'
49
47
  task :bump_major do
50
- exit unless check_branch_and_warn
48
+ exit unless check_branch_and_warn?
51
49
  major, _minor, _patch = read_version
52
50
  major = major.to_i + 1
53
51
  minor = 0
@@ -59,19 +57,27 @@ namespace :version do
59
57
 
60
58
  private
61
59
 
60
+ def project_root
61
+ @project_root ||= File.expand_path(FileUtils.pwd).freeze
62
+ end
63
+
64
+ def project_name
65
+ @project_name ||= ENV['PROJECT_NAME'] || File.basename(project_root)
66
+ end
67
+
62
68
  def version_file_path
63
- split = PROJECT_NAME.split('-')
64
- "#{PROJECT_ROOT}/lib/#{split.join('/')}/version.rb"
69
+ split = project_name.split('-')
70
+ "#{project_root}/lib/#{split.join('/')}/version.rb"
65
71
  end
66
72
 
67
73
  def module_name
68
- case PROJECT_NAME
74
+ case project_name
69
75
  when /-/
70
- PROJECT_NAME.split('-').map(&:capitalize).join('::')
76
+ project_name.split('-').map(&:capitalize).join('::')
71
77
  when /_/
72
- PROJECT_NAME.split('_').map(&:capitalize).join
78
+ project_name.split('_').map(&:capitalize).join
73
79
  else
74
- PROJECT_NAME.capitalize
80
+ project_name.capitalize
75
81
  end
76
82
  end
77
83
 
@@ -79,13 +85,13 @@ namespace :version do
79
85
  silence_warnings do
80
86
  load version_file_path
81
87
  end
82
- text = eval("#{module_name}::VERSION")
88
+ text = module_name.split('::').inject(Object) { |mod, name| mod.const_get(name) }::VERSION
83
89
  text.split('.')
84
90
  end
85
91
 
86
92
  def write_version_file(version_array)
87
93
  version = version_array.join('.')
88
- new_version = %( VERSION = '#{version}'.freeze)
94
+ new_version = %( VERSION = '#{version}')
89
95
  lines = File.readlines(version_file_path)
90
96
  File.open(version_file_path, 'w') do |f|
91
97
  lines.each do |line|
@@ -100,19 +106,17 @@ namespace :version do
100
106
 
101
107
  def update_readme_version_strings
102
108
  version_string = read_version.join('.')
103
- readme = open('README.md').read
109
+ readme = File.read('README.md')
104
110
  regex = /^\*\*Version: [0-9.]+\*\*$/i
105
111
  return nil unless readme =~ regex
106
112
 
107
- File.open('README.md', 'w') do |f|
108
- f.write(readme.gsub(regex, "**Version: #{version_string}**"))
109
- end
113
+ File.write('README.md', readme.gsub(regex, "**Version: #{version_string}**"))
110
114
  end
111
115
 
112
116
  def changelog
113
117
  return @changelog_path if @changelog_path
114
118
 
115
- @changelog_path = File.join(PROJECT_ROOT, 'CHANGELOG')
119
+ @changelog_path = File.join(project_root, 'CHANGELOG')
116
120
  FileUtils.touch(@changelog_path)
117
121
  @changelog_path
118
122
  end
@@ -159,16 +163,15 @@ namespace :version do
159
163
  STRING
160
164
  end
161
165
 
162
- def check_branch_and_warn
166
+ def check_branch_and_warn?
163
167
  return true unless current_branch == 'master'
164
168
 
165
169
  puts(branch_warning_message)
166
- while (line = $stdin.gets.chomp)
167
- return true if line =~ /[yY]/
170
+ line = $stdin.gets.chomp
171
+ return true if line =~ /[yY]/
168
172
 
169
- puts 'Aborting version bump.'
170
- return false
171
- end
173
+ puts 'Aborting version bump.'
174
+ false
172
175
  end
173
176
 
174
177
  def launch_editor(file)
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'etl/csv_source'
4
+
5
+ RSpec.describe CsvSource do
6
+ let(:fixture_csv) { File.join(FreeZipcodeData.root, 'spec', 'fixtures', 'test_data.csv') }
7
+
8
+ describe '#initialize' do
9
+ it 'stores the filename and options' do
10
+ source = described_class.new(filename: fixture_csv)
11
+ expect(source.filename).to eq(fixture_csv)
12
+ expect(source.headers).to be true
13
+ expect(source.delimeter).to eq("\t")
14
+ end
15
+
16
+ it 'accepts custom delimiter and quote char' do
17
+ source = described_class.new(filename: fixture_csv, delimeter: ',', quote_char: '"')
18
+ expect(source.delimeter).to eq(',')
19
+ expect(source.quote_char).to eq('"')
20
+ end
21
+ end
22
+
23
+ # CsvSource implements only #each (Kiba source protocol), not Enumerable
24
+ # rubocop:disable Style/MapIntoArray
25
+ describe '#each' do
26
+ it 'yields each row as a hash with symbolized keys' do
27
+ source = described_class.new(filename: fixture_csv, delimeter: ',', quote_char: '"')
28
+ rows = []
29
+ source.each { |row| rows << row }
30
+
31
+ expect(rows.length).to eq(6)
32
+ expect(rows.first).to be_a(Hash)
33
+ expect(rows.first.keys).to include(:country, :postal_code, :city)
34
+ end
35
+
36
+ it 'parses the correct data from each row' do
37
+ source = described_class.new(filename: fixture_csv, delimeter: ',', quote_char: '"')
38
+ rows = []
39
+ source.each { |row| rows << row }
40
+
41
+ first = rows.first
42
+ expect(first[:country]).to eq('US')
43
+ expect(first[:postal_code]).to eq('10001')
44
+ expect(first[:city]).to eq('New York')
45
+ expect(first[:short_state]).to eq('NY')
46
+ end
47
+
48
+ it 'handles rows from multiple countries' do
49
+ source = described_class.new(filename: fixture_csv, delimeter: ',', quote_char: '"')
50
+ countries = []
51
+ source.each { |row| countries << row[:country] }
52
+
53
+ expect(countries.uniq.sort).to eq(%w[CA GB US])
54
+ end
55
+ end
56
+ # rubocop:enable Style/MapIntoArray
57
+ end
@@ -0,0 +1,135 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'kiba'
4
+ require 'etl/free_zipcode_data_job'
5
+
6
+ RSpec.describe ETL::FreeZipcodeDataJob do
7
+ let(:db) { create_test_database(line_count: 6) }
8
+ let(:fixture_csv) { File.join(FreeZipcodeData.root, 'spec', 'fixtures', 'test_data.csv') }
9
+ let(:logger) { FreeZipcodeData::Logger.instance }
10
+ let(:string_io) { StringIO.new }
11
+ let(:options) do
12
+ OpenStruct.new(
13
+ country_tablename: 'countries',
14
+ state_tablename: 'states',
15
+ county_tablename: 'counties',
16
+ zipcode_tablename: 'zipcodes',
17
+ verbose: false
18
+ )
19
+ end
20
+
21
+ before do
22
+ FreeZipcodeData::Options.instance.initialize_hash(options)
23
+ logger.log_provider = Logger.new(string_io)
24
+ end
25
+
26
+ describe '.setup' do
27
+ it 'returns a Kiba job definition' do
28
+ job = described_class.setup(fixture_csv, db, logger, options)
29
+ expect(job).not_to be_nil
30
+ end
31
+ end
32
+
33
+ describe 'full ETL pipeline' do
34
+ before do
35
+ # Build all tables
36
+ FreeZipcodeData::CountryTable.new(database: db, tablename: 'countries').build
37
+ FreeZipcodeData::StateTable.new(database: db, tablename: 'states').build
38
+ FreeZipcodeData::CountyTable.new(database: db, tablename: 'counties').build
39
+ FreeZipcodeData::ZipcodeTable.new(database: db, tablename: 'zipcodes').build
40
+
41
+ job = described_class.setup(fixture_csv, db, logger, options)
42
+ Kiba.run(job)
43
+ end
44
+
45
+ it 'populates the countries table' do
46
+ rows = db.execute('SELECT alpha2 FROM countries ORDER BY alpha2')
47
+ expect(rows.flatten).to include('CA', 'GB', 'US')
48
+ end
49
+
50
+ it 'populates the states table' do
51
+ rows = db.execute('SELECT abbr FROM states ORDER BY abbr')
52
+ abbrs = rows.flatten
53
+ expect(abbrs).to include('CA', 'IL', 'NY')
54
+ end
55
+
56
+ it 'populates the counties table' do
57
+ rows = db.execute('SELECT name FROM counties ORDER BY name')
58
+ names = rows.flatten
59
+ expect(names).to include('Cook', 'Los Angeles', 'New York')
60
+ end
61
+
62
+ it 'populates the zipcodes table' do
63
+ rows = db.execute('SELECT code FROM zipcodes ORDER BY code')
64
+ codes = rows.flatten
65
+ expect(codes).to include('10001', '60601', '90210')
66
+ end
67
+
68
+ it 'links zipcodes to states' do
69
+ rows = db.execute(<<-SQL)
70
+ SELECT z.code, s.abbr
71
+ FROM zipcodes z
72
+ JOIN states s ON CAST(z.state_id AS INTEGER) = s.id
73
+ WHERE z.code = '60601'
74
+ SQL
75
+ expect(rows[0]).to eq(%w[60601 IL])
76
+ end
77
+
78
+ it 'links states to countries' do
79
+ rows = db.execute(<<-SQL)
80
+ SELECT s.abbr, c.alpha2
81
+ FROM states s
82
+ JOIN countries c ON s.country_id = c.id
83
+ WHERE s.abbr = 'NY'
84
+ SQL
85
+ expect(rows[0]).to eq(%w[NY US])
86
+ end
87
+
88
+ it 'stores geocode data for zipcodes' do
89
+ rows = db.execute("SELECT lat, lon FROM zipcodes WHERE code = '10001'")
90
+ lat = rows[0][0].to_f
91
+ lon = rows[0][1].to_f
92
+ expect(lat).to be_within(0.01).of(40.7484)
93
+ expect(lon).to be_within(0.01).of(-73.9967)
94
+ end
95
+
96
+ it 'scopes duplicate state abbreviations by country' do
97
+ us_ny = db.execute(<<-SQL)
98
+ SELECT s.id, s.name, c.alpha2
99
+ FROM states s
100
+ JOIN countries c ON s.country_id = c.id
101
+ WHERE s.abbr = 'NY' AND c.alpha2 = 'US'
102
+ SQL
103
+ ca_ny = db.execute(<<-SQL)
104
+ SELECT s.id, s.name, c.alpha2
105
+ FROM states s
106
+ JOIN countries c ON s.country_id = c.id
107
+ WHERE s.abbr = 'NY' AND c.alpha2 = 'CA'
108
+ SQL
109
+ expect(us_ny.length).to eq(1)
110
+ expect(ca_ny.length).to eq(1)
111
+ expect(us_ny[0][0]).not_to eq(ca_ny[0][0])
112
+ expect(us_ny[0][1]).to eq('New York')
113
+ expect(ca_ny[0][1]).to eq('Northern York')
114
+ end
115
+
116
+ it 'links cross-country zipcodes to the correct state' do
117
+ us_zip = db.execute(<<-SQL)
118
+ SELECT z.code, s.name, c.alpha2
119
+ FROM zipcodes z
120
+ JOIN states s ON CAST(z.state_id AS INTEGER) = s.id
121
+ JOIN countries c ON s.country_id = c.id
122
+ WHERE z.code = '10001'
123
+ SQL
124
+ ca_zip = db.execute(<<-SQL)
125
+ SELECT z.code, s.name, c.alpha2
126
+ FROM zipcodes z
127
+ JOIN states s ON CAST(z.state_id AS INTEGER) = s.id
128
+ JOIN countries c ON s.country_id = c.id
129
+ WHERE z.code = 'K0A'
130
+ SQL
131
+ expect(us_zip[0]).to eq(['10001', 'New York', 'US'])
132
+ expect(ca_zip[0]).to eq(['K0A', 'Northern York', 'CA'])
133
+ end
134
+ end
135
+ end
@@ -0,0 +1 @@
1
+ ---
@@ -0,0 +1,5 @@
1
+ US 10001 New York New York NY New York 061 Manhattan MN 40.7484 -73.9967 4
2
+ US 90210 Beverly Hills California CA Los Angeles 037 LA 34.0901 -118.4065 4
3
+ US 60601 Chicago Illinois IL Cook 031 CK 41.8819 -87.6278 4
4
+ CA H2X Montreal Quebec QC Montreal 45.5088 -73.5878 4
5
+ GB SW1A London England ENG Westminster City of Westminster 51.5014 -0.1419 1
Binary file
@@ -0,0 +1,7 @@
1
+ COUNTRY,POSTAL_CODE,CITY,STATE,SHORT_STATE,COUNTY,SHORT_COUNTY,COMMUNITY,SHORT_COMMUNITY,LATITUDE,LONGITUDE,ACCURACY
2
+ US,10001,New York,New York,NY,New York,061,Manhattan,MN,40.7484,-73.9967,4
3
+ US,90210,Beverly Hills,California,CA,Los Angeles,037,,LA,34.0901,-118.4065,4
4
+ US,60601,Chicago,Illinois,IL,Cook,031,,CK,41.8819,-87.6278,4
5
+ CA,H2X,Montreal,Quebec,QC,,,Montreal,,45.5088,-73.5878,4
6
+ CA,K0A,Almonte,Northern York,NY,Lanark,LNK,,,45.2260,-76.1840,4
7
+ GB,SW1A,London,England,ENG,Westminster,,City of Westminster,,51.5014,-0.1419,1
@@ -0,0 +1,5 @@
1
+ US 10001 New York New York NY New York 061 Manhattan MN 40.7484 -73.9967 4
2
+ US 90210 Beverly Hills California CA Los Angeles 037 LA 34.0901 -118.4065 4
3
+ US 60601 Chicago Illinois IL Cook 031 CK 41.8819 -87.6278 4
4
+ CA H2X Montreal Quebec QC Montreal 45.5088 -73.5878 4
5
+ GB SW1A London England ENG Westminster City of Westminster 51.5014 -0.1419 1
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'free_zipcode_data/country_table'
4
+
5
+ RSpec.describe FreeZipcodeData::CountryTable do
6
+ let(:db) { create_test_database(line_count: 5) }
7
+ let(:table) { described_class.new(database: db, tablename: 'countries') }
8
+
9
+ before { table.build }
10
+
11
+ describe '#build' do
12
+ it 'creates the countries table' do
13
+ tables = db.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='countries'")
14
+ expect(tables.length).to eq(1)
15
+ end
16
+
17
+ it 'creates the unique alpha2 index' do
18
+ indexes = db.execute("SELECT name FROM sqlite_master WHERE type='index' AND tbl_name='countries'")
19
+ index_names = indexes.map(&:first)
20
+ expect(index_names).to include('unique_country_alpha2')
21
+ end
22
+
23
+ it 'creates columns for alpha2, alpha3, iso, and name' do
24
+ columns = db.execute("PRAGMA table_info('countries')").map { |c| c[1] }
25
+ expect(columns).to include('alpha2', 'alpha3', 'iso', 'name')
26
+ end
27
+ end
28
+
29
+ describe '#write' do
30
+ it 'inserts a country row using the lookup table' do
31
+ table.write({ country: 'US' })
32
+ rows = db.execute('SELECT alpha2, alpha3, name FROM countries')
33
+ expect(rows.length).to eq(1)
34
+ expect(rows[0]).to eq(['US', 'USA', 'United States of America'])
35
+ end
36
+
37
+ it 'inserts multiple different countries' do
38
+ table.write({ country: 'US' })
39
+ table.write({ country: 'CA' })
40
+ table.write({ country: 'GB' })
41
+ rows = db.execute('SELECT alpha2 FROM countries ORDER BY alpha2')
42
+ expect(rows.flatten).to eq(%w[CA GB US])
43
+ end
44
+
45
+ it 'silently ignores duplicate country codes' do
46
+ table.write({ country: 'US' })
47
+ expect { table.write({ country: 'US' }) }.not_to raise_error
48
+ rows = db.execute('SELECT COUNT(*) FROM countries')
49
+ expect(rows[0][0]).to eq(1)
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,84 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'free_zipcode_data/county_table'
4
+
5
+ RSpec.describe FreeZipcodeData::CountyTable do
6
+ let(:db) { create_test_database(line_count: 5) }
7
+ let(:table) { described_class.new(database: db, tablename: 'counties') }
8
+
9
+ before do
10
+ seed_countries(db)
11
+ seed_states(db)
12
+ table.build
13
+ end
14
+
15
+ describe '#build' do
16
+ it 'creates the counties table' do
17
+ tables = db.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='counties'")
18
+ expect(tables.length).to eq(1)
19
+ end
20
+
21
+ it 'creates the unique_county index' do
22
+ indexes = db.execute("SELECT name FROM sqlite_master WHERE type='index' AND tbl_name='counties'")
23
+ index_names = indexes.map(&:first)
24
+ expect(index_names).to include('unique_county')
25
+ end
26
+
27
+ it 'creates columns for state_id, abbr, name, and county_seat' do
28
+ columns = db.execute("PRAGMA table_info('counties')").map { |c| c[1] }
29
+ expect(columns).to include('state_id', 'abbr', 'name', 'county_seat')
30
+ end
31
+ end
32
+
33
+ describe '#write' do
34
+ it 'inserts a county row' do
35
+ table.write({ country: 'US', county: 'Cook', short_county: '031', short_state: 'IL',
36
+ state: 'Illinois' })
37
+ rows = db.execute('SELECT name, abbr FROM counties')
38
+ expect(rows.length).to eq(1)
39
+ expect(rows[0]).to eq(%w[Cook 031])
40
+ end
41
+
42
+ it 'links the county to its state' do
43
+ table.write({ country: 'US', county: 'Cook', short_county: '031', short_state: 'IL',
44
+ state: 'Illinois' })
45
+ state_id = db.execute("SELECT id FROM states WHERE abbr = 'IL'")[0][0]
46
+ county_state_id = db.execute('SELECT state_id FROM counties')[0][0]
47
+ expect(county_state_id).to eq(state_id)
48
+ end
49
+
50
+ it 'returns nil and skips when county is nil' do
51
+ result = table.write({ country: 'US', county: nil, short_county: nil, short_state: 'IL',
52
+ state: 'Illinois' })
53
+ expect(result).to be_nil
54
+ rows = db.execute('SELECT COUNT(*) FROM counties')
55
+ expect(rows[0][0]).to eq(0)
56
+ end
57
+
58
+ it 'returns nil when state cannot be found' do
59
+ result = table.write({ country: 'US', county: 'Unknown', short_county: '999', short_state: 'ZZ',
60
+ state: 'Nonexistent' })
61
+ expect(result).to be_nil
62
+ rows = db.execute('SELECT COUNT(*) FROM counties')
63
+ expect(rows[0][0]).to eq(0)
64
+ end
65
+
66
+ it 'silently ignores duplicate county entries' do
67
+ table.write({ country: 'US', county: 'Cook', short_county: '031', short_state: 'IL',
68
+ state: 'Illinois' })
69
+ expect do
70
+ table.write({ country: 'US', county: 'Cook', short_county: '031', short_state: 'IL',
71
+ state: 'Illinois' })
72
+ end.not_to raise_error
73
+ rows = db.execute('SELECT COUNT(*) FROM counties')
74
+ expect(rows[0][0]).to eq(1)
75
+ end
76
+
77
+ it 'handles county names with single quotes' do
78
+ table.write({ country: 'US', county: "Prince George's", short_county: '033', short_state: 'NY',
79
+ state: 'New York' })
80
+ rows = db.execute('SELECT name FROM counties')
81
+ expect(rows[0][0]).to eq("Prince George's")
82
+ end
83
+ end
84
+ end