free_zipcode_data 1.0.6 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/.rubocop.yml +25 -16
  4. data/.ruby-version +1 -1
  5. data/CHANGELOG +11 -0
  6. data/CLAUDE.md +89 -0
  7. data/Gemfile +10 -0
  8. data/Gemfile.lock +50 -36
  9. data/README.md +3 -5
  10. data/Rakefile +1 -1
  11. data/free_zipcode_data.gemspec +8 -14
  12. data/lib/etl/common.rb +1 -0
  13. data/lib/etl/csv_source.rb +4 -4
  14. data/lib/free_zipcode_data/country_table.rb +10 -2
  15. data/lib/free_zipcode_data/county_table.rb +14 -6
  16. data/lib/free_zipcode_data/data_source.rb +2 -2
  17. data/lib/free_zipcode_data/db_table.rb +54 -7
  18. data/lib/free_zipcode_data/logger.rb +8 -12
  19. data/lib/free_zipcode_data/runner.rb +2 -2
  20. data/lib/free_zipcode_data/state_table.rb +37 -5
  21. data/lib/free_zipcode_data/version.rb +1 -1
  22. data/lib/free_zipcode_data/zipcode_table.rb +15 -5
  23. data/lib/free_zipcode_data.rb +3 -3
  24. data/lib/tasks/version.rake +27 -24
  25. data/spec/etl/csv_source_spec.rb +57 -0
  26. data/spec/etl/free_zipcode_data_job_spec.rb +135 -0
  27. data/spec/fixtures/.free_zipcode_data.yml +1 -0
  28. data/spec/fixtures/US.txt +5 -0
  29. data/spec/fixtures/US.zip +0 -0
  30. data/spec/fixtures/test_data.csv +7 -0
  31. data/spec/fixtures/test_data.txt +5 -0
  32. data/spec/free_zipcode_data/country_table_spec.rb +52 -0
  33. data/spec/free_zipcode_data/county_table_spec.rb +84 -0
  34. data/spec/free_zipcode_data/data_source_spec.rb +131 -0
  35. data/spec/free_zipcode_data/db_table_spec.rb +164 -0
  36. data/spec/free_zipcode_data/logger_spec.rb +78 -0
  37. data/spec/free_zipcode_data/options_spec.rb +37 -0
  38. data/spec/free_zipcode_data/runner_spec.rb +91 -0
  39. data/spec/free_zipcode_data/sqlite_ram_spec.rb +64 -0
  40. data/spec/free_zipcode_data/state_table_spec.rb +112 -0
  41. data/spec/free_zipcode_data/zipcode_table_spec.rb +102 -0
  42. data/spec/free_zipcode_data_spec.rb +38 -0
  43. data/spec/spec_helper.rb +23 -2
  44. data/spec/support/database_helpers.rb +48 -0
  45. metadata +38 -91
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c86afee1e7972351d5d9f827348d4a0cd5cd3607e5ee5b2b7b4bc73db723bd2c
4
- data.tar.gz: be4ee76892a7ebd900f9182360fd0b6263e9e523d35c50bafba4cf962ed74e8c
3
+ metadata.gz: 7ec6a6653ed5f1da585ef025e5150b5595b6600db3acdb4aa1aea4274213fee4
4
+ data.tar.gz: 72c8b8636e2b7cd3cfe927d6f0cd3a27d22ae33c94ebc063fb6bb1b7acf22dcb
5
5
  SHA512:
6
- metadata.gz: 067d3c28ae188431f5d6418fa195fe72cbba273b266be6cae5aade8156e52d6c99cbe5767b31c3e6ce4214dd70a5969e93b85ea3f66a5eb3fa1434c1af7a2ffc
7
- data.tar.gz: 2e80bb40c1e6b9863548a2f87630ce9e6ca0201d1520703b1b6f63d1b3020eaef8f6ce570df1d73365be3e4d7b7cd79cd5901a4677bf02956b20bbb2ce56e1c0
6
+ metadata.gz: 0307eb9d96805a1ff9510f9b8ccc8d64c14c7d1acd9a47199304d65d403f9c39d27656547fe93f5f1be7615fbb31c2ed0f50762d88801e0bbf00239dbdbce22a
7
+ data.tar.gz: 4bfc6e97652ee64d75d2b68b1c1dd84824658bf7abe9bf6611b7b2b2446febd16ea9439d3da682ae762bf5d77659b83ac9349d0e5ae537448a0679829b240147
data/.gitignore CHANGED
@@ -5,3 +5,4 @@
5
5
  /stubs
6
6
  /vendor/bundle/
7
7
  /pkg
8
+ .claude/
data/.rubocop.yml CHANGED
@@ -1,23 +1,24 @@
1
1
  AllCops:
2
- TargetRubyVersion: 2.7
2
+ TargetRubyVersion: 3.4
3
3
 
4
4
  # Include gemspec and Rakefile
5
5
  Include:
6
- - '**/*.gemspec'
7
- - '**/*.podspec'
8
- - '**/*.jbuilder'
9
- - '**/*.rake'
10
- - '**/Gemfile'
11
- - '**/Rakefile'
12
- - '**/Capfile'
13
- - '**/Guardfile'
14
- - '**/Podfile'
15
- - '**/Thorfile'
16
- - '**/Vagrantfile'
6
+ - "**/*.rb"
7
+ - "**/*.gemspec"
8
+ - "**/*.podspec"
9
+ - "**/*.jbuilder"
10
+ - "**/*.rake"
11
+ - "**/Gemfile"
12
+ - "**/Rakefile"
13
+ - "**/Capfile"
14
+ - "**/Guardfile"
15
+ - "**/Podfile"
16
+ - "**/Thorfile"
17
+ - "**/Vagrantfile"
17
18
  Exclude:
18
- - 'vendor/**/*'
19
- - 'stubs/**/*'
20
- - 'spec/support/shared_contexts/*'
19
+ - "vendor/**/*"
20
+ - "stubs/**/*"
21
+ - "spec/support/shared_contexts/*"
21
22
 
22
23
  NewCops: enable
23
24
 
@@ -51,6 +52,10 @@ Style/DoubleNegation:
51
52
  Style/PerlBackrefs:
52
53
  Enabled: false
53
54
 
55
+ Style/OpenStructUse:
56
+ Exclude:
57
+ - "spec/**/*"
58
+
54
59
  ########################################
55
60
  # Lint Cops
56
61
 
@@ -66,6 +71,10 @@ Security/Eval:
66
71
  ########################################
67
72
  # Metrics Cops
68
73
 
74
+ Metrics/BlockLength:
75
+ Exclude:
76
+ - "spec/**/*"
77
+
69
78
  Metrics/MethodLength:
70
79
  CountComments: false # count full line comments?
71
80
  Max: 30
@@ -77,7 +86,7 @@ Metrics/AbcSize:
77
86
  Enabled: false
78
87
 
79
88
  ########################################
80
- # Metrics Cops
89
+ # Naming Cops
81
90
 
82
91
  Naming/FileName:
83
92
  Enabled: false
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 3.0.2
1
+ 3.4.8
data/CHANGELOG CHANGED
@@ -1,3 +1,14 @@
1
+ *1.1.0* (February 14, 2026)
2
+
3
+ * Fix state abbreviation uniqueness: state lookups are now scoped by country, allowing the same abbreviation (e.g., "NY") in different countries
4
+ * Update state and state_name indexes to include country_id for cross-country uniqueness
5
+ * Synthesize state entries from country data for countries without state/province subdivisions
6
+ * Add logging for silent failure cascade: warn when countries, states, counties, or zipcodes are skipped due to missing lookups
7
+ * Narrow rescue SQLite3::ConstraintException to only swallow UNIQUE violations; re-raise NOT NULL, FOREIGN KEY, and CHECK constraint errors
8
+ * Add comprehensive RSpec test suite with cross-country integration tests
9
+ * Upgrade to Ruby 3.4.8 and fix rubyzip 3.x API compatibility
10
+ * Update README to remove stale rake task references
11
+
1
12
  *1.0.6* (September 30, 2025)
2
13
 
3
14
  * Bump rexml from 3.3.9 to 3.4.2
data/CLAUDE.md ADDED
@@ -0,0 +1,89 @@
1
+ # CLAUDE.md
2
+
3
+ This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
4
+
5
+ ## Project Overview
6
+
7
+ A Ruby gem that downloads postal/zipcode data from GeoNames.org, processes it via an ETL pipeline, and outputs an SQLite3 database and optional CSV files. Supports single-country or all-countries processing.
8
+
9
+ ## Commands
10
+
11
+ ```bash
12
+ # Install dependencies (vendored to vendor/bundle, binstubs in stubs/)
13
+ bundle install
14
+
15
+ # Run all tests
16
+ bundle exec rspec
17
+
18
+ # Run a single test file
19
+ bundle exec rspec spec/path/to/file_spec.rb
20
+
21
+ # Run a specific test by line number
22
+ bundle exec rspec spec/path/to/file_spec.rb:42
23
+
24
+ # Lint
25
+ bundle exec rubocop
26
+
27
+ # Lint with auto-correct
28
+ bundle exec rubocop -a
29
+
30
+ # Version bumping (do on develop branch, not master)
31
+ bundle exec rake version:bump_patch
32
+ bundle exec rake version:bump_minor
33
+ bundle exec rake version:bump_major
34
+
35
+ # Build and install gem
36
+ bundle exec rake build
37
+ bundle exec rake install
38
+
39
+ # Release gem
40
+ bundle exec rake release
41
+ ```
42
+
43
+ ## Architecture
44
+
45
+ The gem follows an ETL (Extract, Transform, Load) pattern using the Kiba gem:
46
+
47
+ 1. **Extract**: `DataSource` downloads zip files from GeoNames.org, extracts them, and prepares CSV files with headers
48
+ 2. **Source**: `CsvSource` (Kiba source) feeds rows from the prepared CSV into the pipeline
49
+ 3. **Load**: Four Kiba destination table classes write rows into an in-memory SQLite database
50
+
51
+ ### Key Flow
52
+
53
+ `bin/free_zipcode_data` → `Runner#start` → `DataSource#download` → `DataSource#datafile` (extract zip + add CSV headers) → `SqliteRam` (in-memory DB) → `ETL::FreeZipcodeDataJob` (Kiba pipeline) → `SqliteRam#save_to_disk`
54
+
55
+ ### Core Classes
56
+
57
+ - **`FreeZipcodeData::Runner`** - CLI entry point; parses args via Optimist, orchestrates the full pipeline
58
+ - **`FreeZipcodeData::DataSource`** - Downloads and extracts GeoNames zip files, prepares CSV with headers
59
+ - **`SqliteRam`** - Wraps SQLite3; works entirely in-memory then saves to disk via `SQLite3::Backup`
60
+ - **`FreeZipcodeData::DbTable`** - Base class for all table classes; provides progress bar, SQL helpers, and country lookup from `country_lookup_table.yml`
61
+ - **`FreeZipcodeData::CountryTable`/`StateTable`/`CountyTable`/`ZipcodeTable`** - Kiba destinations; each has `build` (creates schema + indexes) and `write` (inserts rows, swallows duplicate constraint violations)
62
+ - **`ETL::FreeZipcodeDataJob`** - Configures the Kiba pipeline with one source and four destinations
63
+ - **`CsvSource`** - Kiba-compatible CSV reader
64
+
65
+ ### Singletons
66
+
67
+ `Options` and `Logger` are singletons (via Ruby's `Singleton` module). `Runner` has an `.instance` convenience class method (returns `new` each time, not cached).
68
+
69
+ ## Configuration
70
+
71
+ - `.ruby-version`: 3.4.8
72
+ - Bundle path: `vendor/bundle` (binstubs in `stubs/`)
73
+ - Environment: `APP_ENV` controls environment (`test`, `development`)
74
+ - Config file: `~/.free_zipcode_data.yml` (overridable via `FZD_CONFIG_FILE` env var; uses `spec/fixtures/` version in test)
75
+
76
+ ## Rubocop
77
+
78
+ Key style settings (`.rubocop.yml`):
79
+ - Target Ruby 3.4
80
+ - Max line length: 110
81
+ - Max method length: 30 lines
82
+ - `Style/ClassVars`, `Style/Documentation`, `Metrics/AbcSize`, `Lint/SuppressedException` disabled
83
+ - `vendor/` and `stubs/` excluded
84
+
85
+ ## Git Workflow
86
+
87
+ - `master` is the release branch
88
+ - `develop` is the development branch
89
+ - Version bumps should happen on `develop`, then merge to `master` before `rake release`
data/Gemfile CHANGED
@@ -4,3 +4,13 @@ source 'https://rubygems.org'
4
4
  git_source(:github) { |repo| "https://github.com/#{repo}.git" }
5
5
 
6
6
  gemspec
7
+
8
+ group :development do
9
+ gem 'bundler'
10
+ gem 'pry-nav', '~> 0.2'
11
+ gem 'rake', '~> 13.0'
12
+ gem 'rspec', '~> 3.7'
13
+ gem 'rubocop'
14
+ gem 'ruby-prof', '~> 0.17'
15
+ gem 'simplecov', '~> 0.16'
16
+ end
data/Gemfile.lock CHANGED
@@ -1,9 +1,11 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- free_zipcode_data (1.0.6)
4
+ free_zipcode_data (1.1.0)
5
5
  colored (~> 1.2)
6
+ csv
6
7
  kiba (~> 4.0)
8
+ logger
7
9
  optimist (~> 3.0)
8
10
  ruby-progressbar (~> 1.9)
9
11
  rubyzip (>= 1.2.2)
@@ -12,63 +14,75 @@ PATH
12
14
  GEM
13
15
  remote: https://rubygems.org/
14
16
  specs:
15
- ast (2.4.2)
17
+ ast (2.4.3)
16
18
  coderay (1.1.3)
17
19
  colored (1.2)
18
- diff-lcs (1.4.4)
19
- docile (1.4.0)
20
+ csv (3.3.5)
21
+ diff-lcs (1.6.2)
22
+ docile (1.4.1)
23
+ json (2.18.1)
20
24
  kiba (4.0.0)
25
+ language_server-protocol (3.17.0.5)
26
+ lint_roller (1.1.0)
27
+ logger (1.7.0)
21
28
  method_source (0.9.2)
22
29
  mini_portile2 (2.8.9)
23
30
  optimist (3.2.1)
24
- parallel (1.21.0)
25
- parser (3.0.2.0)
31
+ parallel (1.27.0)
32
+ parser (3.3.10.1)
26
33
  ast (~> 2.4.1)
34
+ racc
35
+ prism (1.9.0)
27
36
  pry (0.12.2)
28
37
  coderay (~> 1.1.0)
29
38
  method_source (~> 0.9.0)
30
39
  pry-nav (0.3.0)
31
40
  pry (>= 0.9.10, < 0.13.0)
32
- rainbow (3.0.0)
33
- rake (13.0.6)
34
- regexp_parser (2.1.1)
35
- rexml (3.4.2)
36
- rspec (3.10.0)
37
- rspec-core (~> 3.10.0)
38
- rspec-expectations (~> 3.10.0)
39
- rspec-mocks (~> 3.10.0)
40
- rspec-core (3.10.1)
41
- rspec-support (~> 3.10.0)
42
- rspec-expectations (3.10.1)
41
+ racc (1.8.1)
42
+ rainbow (3.1.1)
43
+ rake (13.3.1)
44
+ regexp_parser (2.11.3)
45
+ rspec (3.13.2)
46
+ rspec-core (~> 3.13.0)
47
+ rspec-expectations (~> 3.13.0)
48
+ rspec-mocks (~> 3.13.0)
49
+ rspec-core (3.13.6)
50
+ rspec-support (~> 3.13.0)
51
+ rspec-expectations (3.13.5)
43
52
  diff-lcs (>= 1.2.0, < 2.0)
44
- rspec-support (~> 3.10.0)
45
- rspec-mocks (3.10.2)
53
+ rspec-support (~> 3.13.0)
54
+ rspec-mocks (3.13.7)
46
55
  diff-lcs (>= 1.2.0, < 2.0)
47
- rspec-support (~> 3.10.0)
48
- rspec-support (3.10.3)
49
- rubocop (1.22.3)
56
+ rspec-support (~> 3.13.0)
57
+ rspec-support (3.13.7)
58
+ rubocop (1.84.2)
59
+ json (~> 2.3)
60
+ language_server-protocol (~> 3.17.0.2)
61
+ lint_roller (~> 1.1.0)
50
62
  parallel (~> 1.10)
51
- parser (>= 3.0.0.0)
63
+ parser (>= 3.3.0.2)
52
64
  rainbow (>= 2.2.2, < 4.0)
53
- regexp_parser (>= 1.8, < 3.0)
54
- rexml
55
- rubocop-ast (>= 1.12.0, < 2.0)
65
+ regexp_parser (>= 2.9.3, < 3.0)
66
+ rubocop-ast (>= 1.49.0, < 2.0)
56
67
  ruby-progressbar (~> 1.7)
57
- unicode-display_width (>= 1.4.0, < 3.0)
58
- rubocop-ast (1.12.0)
59
- parser (>= 3.0.1.1)
68
+ unicode-display_width (>= 2.4.0, < 4.0)
69
+ rubocop-ast (1.49.0)
70
+ parser (>= 3.3.7.2)
71
+ prism (~> 1.7)
60
72
  ruby-prof (0.18.0)
61
- ruby-progressbar (1.11.0)
62
- rubyzip (3.1.1)
63
- simplecov (0.21.2)
73
+ ruby-progressbar (1.13.0)
74
+ rubyzip (3.2.2)
75
+ simplecov (0.22.0)
64
76
  docile (~> 1.1)
65
77
  simplecov-html (~> 0.11)
66
78
  simplecov_json_formatter (~> 0.1)
67
- simplecov-html (0.12.3)
68
- simplecov_json_formatter (0.1.3)
79
+ simplecov-html (0.13.2)
80
+ simplecov_json_formatter (0.1.4)
69
81
  sqlite3 (1.7.3)
70
82
  mini_portile2 (~> 2.8.0)
71
- unicode-display_width (2.1.0)
83
+ unicode-display_width (3.2.0)
84
+ unicode-emoji (~> 4.1)
85
+ unicode-emoji (4.2.0)
72
86
 
73
87
  PLATFORMS
74
88
  ruby
@@ -84,4 +98,4 @@ DEPENDENCIES
84
98
  simplecov (~> 0.16)
85
99
 
86
100
  BUNDLED WITH
87
- 2.2.22
101
+ 2.6.9
data/README.md CHANGED
@@ -6,11 +6,11 @@ This project is an automated solution for retrieving and collating US and worldw
6
6
 
7
7
  ## History
8
8
 
9
- In 2011, we originally pulled down all the US census data we could find, parsed it and exported it into 3 .csv files. Later, we wrote 3 rake tasks to automate this process.
9
+ In 2011, we originally pulled down all the US census data we could find, parsed it and exported it into 3 .csv files.
10
10
 
11
11
  In 2017 we began using [GeoNames](http://www.geonames.org) data, which is licensed under Creative Commons. We are grateful to [GeoNames](http://www.geonames.org) for sharing, and urge you to [visit their site](http://www.geonames.org) and support their work.
12
12
 
13
- In 2018 we refactored the project and made it into a Ruby gem with a command-line executable for automating this process.
13
+ In 2018 we refactored the project and made it into a Ruby gem with a unified command-line executable (`free_zipcode_data`) that handles downloading, processing, and database generation in a single step.
14
14
 
15
15
  ## What's Included
16
16
 
@@ -22,7 +22,7 @@ See the GeoNames [readme.txt](http://download.geonames.org/export/zip/readme.txt
22
22
 
23
23
  ## Usage
24
24
 
25
- First, you need to install Ruby and Rubygems. Though that is not a difficult task, it is beyond the scope of this README. A search engine of your choice will help discover how to do this. Once you have done that:
25
+ First, you need to install Ruby 3.4+ and Rubygems. Though that is not a difficult task, it is beyond the scope of this README. A search engine of your choice will help discover how to do this. Once you have done that:
26
26
 
27
27
  ```bash
28
28
  $ gem install free_zipcode_data
@@ -61,8 +61,6 @@ $ free_zipcode_data --work-dir /tmp/work_dir --country US --generate-files
61
61
  $ free_zipcode_data --work-dir /tmp/work_dir --generate-files
62
62
  ```
63
63
 
64
- The rake tasks cascade, from the bottom up. So if you run `rake data:populate_db`, it will automatically call `rake data:build` if the .csv files are missing, which will call `rake data:download` if the .zip files are missing.
65
-
66
64
  ## SQLite3 Database
67
65
 
68
66
  The executable will generate an SQLite3 database in the specified directory `--work-dir` but it will not generate the `.csv` files by default. Specify `--generate-files` if you want those as well.
data/Rakefile CHANGED
@@ -4,7 +4,7 @@ require 'rubygems'
4
4
  require 'bundler/setup'
5
5
 
6
6
  require 'rake'
7
- Dir['lib/tasks/**/*.rake'].sort.each { |ext| load ext }
7
+ Dir['lib/tasks/**/*.rake'].each { |ext| load ext }
8
8
 
9
9
  # Install rubygem tasks
10
10
  Bundler::GemHelper.install_tasks
@@ -23,18 +23,12 @@ Gem::Specification.new do |spec|
23
23
  spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
24
24
  spec.require_paths = ['lib']
25
25
 
26
- spec.add_development_dependency 'bundler'
27
- spec.add_development_dependency 'pry-nav', '~> 0.2'
28
- spec.add_development_dependency 'rake', '~> 13.0'
29
- spec.add_development_dependency 'rspec', '~> 3.7'
30
- spec.add_development_dependency 'rubocop'
31
- spec.add_development_dependency 'ruby-prof', '~> 0.17'
32
- spec.add_development_dependency 'simplecov', '~> 0.16'
33
-
34
- spec.add_runtime_dependency 'colored', '~> 1.2'
35
- spec.add_runtime_dependency 'kiba', '~> 4.0'
36
- spec.add_runtime_dependency 'optimist', '~> 3.0'
37
- spec.add_runtime_dependency 'ruby-progressbar', '~> 1.9'
38
- spec.add_runtime_dependency 'rubyzip', '>= 1.2.2'
39
- spec.add_runtime_dependency 'sqlite3', '~> 1.3'
26
+ spec.add_dependency 'colored', '~> 1.2'
27
+ spec.add_dependency 'csv'
28
+ spec.add_dependency 'kiba', '~> 4.0'
29
+ spec.add_dependency 'logger'
30
+ spec.add_dependency 'optimist', '~> 3.0'
31
+ spec.add_dependency 'ruby-progressbar', '~> 1.9'
32
+ spec.add_dependency 'rubyzip', '>= 1.2.2'
33
+ spec.add_dependency 'sqlite3', '~> 1.3'
40
34
  end
data/lib/etl/common.rb CHANGED
@@ -16,6 +16,7 @@ end
16
16
  def limit(count)
17
17
  count = Integer(count || -1)
18
18
  return if count == -1
19
+
19
20
  transform do |row|
20
21
  @counter ||= 0
21
22
  @counter += 1
@@ -14,10 +14,10 @@ class CsvSource
14
14
 
15
15
  def each
16
16
  CSV.open(filename,
17
- col_sep: delimeter,
18
- headers: headers,
19
- header_converters: :symbol,
20
- quote_char: quote_char) do |csv|
17
+ col_sep: delimeter,
18
+ headers: headers,
19
+ header_converters: :symbol,
20
+ quote_char: quote_char) do |csv|
21
21
  csv.each do |row|
22
22
  yield(row.to_hash)
23
23
  end
@@ -25,6 +25,10 @@ module FreeZipcodeData
25
25
 
26
26
  def write(row)
27
27
  country_hash = country_lookup_table[row[:country]]
28
+ unless country_hash
29
+ warn_once("Skipping unknown country '#{row[:country]}': not in country_lookup_table")
30
+ return update_progress
31
+ end
28
32
 
29
33
  sql = <<-SQL
30
34
  INSERT INTO countries (alpha2, alpha3, iso, name)
@@ -36,8 +40,12 @@ module FreeZipcodeData
36
40
 
37
41
  begin
38
42
  database.execute(sql)
39
- rescue SQLite3::ConstraintException
40
- # Swallow duplicates
43
+ rescue SQLite3::ConstraintException => e
44
+ unless e.message.include?('UNIQUE')
45
+ raise "Please file an issue at #{ISSUE_URL}: [#{e}] -> SQL: [#{sql}]"
46
+ end
47
+ rescue StandardError => e
48
+ raise "Please file an issue at #{ISSUE_URL}: [#{e}] -> SQL: [#{sql}]"
41
49
  end
42
50
 
43
51
  update_progress
@@ -26,8 +26,14 @@ module FreeZipcodeData
26
26
  def write(row)
27
27
  return nil unless row[:county]
28
28
 
29
- state_id = get_state_id(row[:short_state], row[:state])
30
- return nil unless state_id
29
+ state_id = get_state_id(row[:country], row[:short_state], row[:state])
30
+ unless state_id
31
+ logger.verbose(
32
+ "Skipping county '#{row[:county]}': no state found for " \
33
+ "abbr='#{row[:short_state]}', country='#{row[:country]}'"
34
+ )
35
+ return nil
36
+ end
31
37
 
32
38
  sql = <<-SQL
33
39
  INSERT INTO counties (state_id, abbr, name)
@@ -39,10 +45,12 @@ module FreeZipcodeData
39
45
 
40
46
  begin
41
47
  database.execute(sql)
42
- rescue SQLite3::ConstraintException
43
- # swallow duplicates
44
- rescue StandardError => err
45
- raise "Please file an issue at #{ISSUE_URL}: [#{err}] -> SQL: [#{sql}]"
48
+ rescue SQLite3::ConstraintException => e
49
+ unless e.message.include?('UNIQUE')
50
+ raise "Please file an issue at #{ISSUE_URL}: [#{e}] -> SQL: [#{sql}]"
51
+ end
52
+ rescue StandardError => e
53
+ raise "Please file an issue at #{ISSUE_URL}: [#{e}] -> SQL: [#{sql}]"
46
54
  end
47
55
 
48
56
  update_progress
@@ -56,11 +56,11 @@ module FreeZipcodeData
56
56
  if options[:clobber]
57
57
  Zip.on_exists_proc = true
58
58
  Logger.instance.verbose("Extracting: #{zipfile}...")
59
- entry.extract(country_file)
59
+ entry.extract(destination_directory: options.work_dir)
60
60
  end
61
61
  else
62
62
  Logger.instance.verbose("Extracting: #{zipfile}...")
63
- entry.extract(country_file)
63
+ entry.extract(destination_directory: options.work_dir)
64
64
  end
65
65
  break
66
66
  end
@@ -8,6 +8,7 @@ module FreeZipcodeData
8
8
  ISSUE_URL = 'https://github.com/midwire/free_zipcode_data/issues/new'
9
9
 
10
10
  attr_reader :database, :tablename
11
+
11
12
  @@progressbar = nil
12
13
 
13
14
  def initialize(database:, tablename:)
@@ -23,6 +24,18 @@ module FreeZipcodeData
23
24
 
24
25
  private
25
26
 
27
+ def logger
28
+ Logger.instance
29
+ end
30
+
31
+ def warn_once(message)
32
+ @warned_messages ||= {}
33
+ return if @warned_messages[message]
34
+
35
+ logger.warn(message)
36
+ @warned_messages[message] = true
37
+ end
38
+
26
39
  def country_lookup_table
27
40
  @country_lookup_table ||=
28
41
  begin
@@ -33,9 +46,9 @@ module FreeZipcodeData
33
46
 
34
47
  def select_first(sql)
35
48
  rows = database.execute(sql)
36
- rows[0].nil? ? nil : rows[0].first
37
- rescue SQLite3::SQLException => err
38
- raise "Please file an issue at #{ISSUE_URL}: [#{err}] -> SQL: [#{sql}]"
49
+ rows[0]&.first
50
+ rescue SQLite3::SQLException => e
51
+ raise "Please file an issue at #{ISSUE_URL}: [#{e}] -> SQL: [#{sql}]"
39
52
  end
40
53
 
41
54
  def get_country_id(country)
@@ -43,20 +56,54 @@ module FreeZipcodeData
43
56
  select_first(sql)
44
57
  end
45
58
 
46
- def get_state_id(state_abbr, state_name)
47
- sql = "SELECT id FROM states
48
- WHERE abbr = '#{state_abbr}' OR name = '#{escape_single_quotes(state_name)}'"
59
+ # Look up a state ID scoped to a country, trying progressively less specific
60
+ # criteria: (1) abbr + name + country, (2) abbr + country, (3) name + country.
61
+ # Returns nil if no match is found.
62
+ def get_state_id(country, state_abbr, state_name)
63
+ escaped_country = escape_single_quotes(country)
64
+ return nil if escaped_country.empty?
65
+
66
+ escaped_abbr = escape_single_quotes(state_abbr)
67
+ escaped_name = escape_single_quotes(state_name)
68
+ country_cond = "c.alpha2 = '#{escaped_country}'"
69
+ # Most specific lookup: abbr + name + country
70
+ res = find_state_where("s.abbr = '#{escaped_abbr}'", "s.name = '#{escaped_name}'", country_cond)
71
+ return res if res
72
+
73
+ # Fallback: abbr + country only
74
+ res = find_state_where("s.abbr = '#{escaped_abbr}'", country_cond)
75
+ if res
76
+ logger.verbose("State fallback: abbr '#{state_abbr}' + country '#{country}' (name mismatch)")
77
+ return res
78
+ end
79
+ # Fallback: name + country only
80
+ res = find_state_where("s.name = '#{escaped_name}'", country_cond)
81
+ if res
82
+ logger.verbose("State fallback: name '#{state_name}' + country '#{country}' (abbr mismatch)")
83
+ return res
84
+ end
85
+ logger.warn("State lookup failed: abbr='#{state_abbr}', name='#{state_name}', country='#{country}'")
86
+ nil
87
+ end
88
+
89
+ def find_state_where(*conditions)
90
+ sql = <<-SQL
91
+ SELECT s.id FROM states s
92
+ INNER JOIN countries c ON s.country_id = c.id
93
+ WHERE #{conditions.join(' AND ')}
94
+ SQL
49
95
  select_first(sql)
50
96
  end
51
97
 
52
98
  def get_county_id(county)
53
99
  return nil if county.nil?
100
+
54
101
  sql = "SELECT id FROM counties WHERE name = '#{escape_single_quotes(county)}'"
55
102
  select_first(sql)
56
103
  end
57
104
 
58
105
  def escape_single_quotes(string)
59
- string&.gsub(/[']/, '\'\'') || ''
106
+ string&.gsub('\'', '\'\'') || ''
60
107
  end
61
108
  end
62
109
  end
@@ -13,27 +13,23 @@ module FreeZipcodeData
13
13
  @log_provider = provider
14
14
  end
15
15
 
16
- def log_exception(e, data = {})
17
- msg = "EXCEPTION : #{e.class.name} : #{e.message}"
16
+ def log_exception(error, data = {})
17
+ msg = "EXCEPTION : #{error.class.name} : #{error.message}"
18
18
  msg += "\n data : #{data.inspect}" if data && !data.empty?
19
- msg += "\n #{e.backtrace[0, 6].join("\n ")}"
19
+ msg += "\n #{error.backtrace[0, 6].join("\n ")}"
20
20
  log_provider.error(msg)
21
21
  end
22
22
 
23
- def method_missing(meth, *args, &block)
23
+ def method_missing(meth, *, &)
24
24
  if log_provider.respond_to?(meth)
25
- log_provider.send(meth, *args, &block)
25
+ log_provider.send(meth, *, &)
26
26
  else
27
27
  super
28
28
  end
29
29
  end
30
30
 
31
- def respond_to?(meth, include_private = false)
32
- if log_provider.respond_to?(meth)
33
- true
34
- else
35
- super
36
- end
31
+ def respond_to_missing?(meth, include_private = false)
32
+ log_provider.respond_to?(meth) || super
37
33
  end
38
34
 
39
35
  def verbose(msg)
@@ -43,7 +39,7 @@ module FreeZipcodeData
43
39
  private
44
40
 
45
41
  def default_logger
46
- logger = ::Logger.new(STDOUT)
42
+ logger = ::Logger.new($stdout)
47
43
  logger.formatter = proc do |_, _, _, msg|
48
44
  "#{msg}\n"
49
45
  end