free_zipcode_data 1.0.6 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.rubocop.yml +25 -16
- data/.ruby-version +1 -1
- data/CHANGELOG +11 -0
- data/CLAUDE.md +89 -0
- data/Gemfile +10 -0
- data/Gemfile.lock +50 -36
- data/README.md +3 -5
- data/Rakefile +1 -1
- data/free_zipcode_data.gemspec +8 -14
- data/lib/etl/common.rb +1 -0
- data/lib/etl/csv_source.rb +4 -4
- data/lib/free_zipcode_data/country_table.rb +10 -2
- data/lib/free_zipcode_data/county_table.rb +14 -6
- data/lib/free_zipcode_data/data_source.rb +2 -2
- data/lib/free_zipcode_data/db_table.rb +54 -7
- data/lib/free_zipcode_data/logger.rb +8 -12
- data/lib/free_zipcode_data/runner.rb +2 -2
- data/lib/free_zipcode_data/state_table.rb +37 -5
- data/lib/free_zipcode_data/version.rb +1 -1
- data/lib/free_zipcode_data/zipcode_table.rb +15 -5
- data/lib/free_zipcode_data.rb +3 -3
- data/lib/tasks/version.rake +27 -24
- data/spec/etl/csv_source_spec.rb +57 -0
- data/spec/etl/free_zipcode_data_job_spec.rb +135 -0
- data/spec/fixtures/.free_zipcode_data.yml +1 -0
- data/spec/fixtures/US.txt +5 -0
- data/spec/fixtures/US.zip +0 -0
- data/spec/fixtures/test_data.csv +7 -0
- data/spec/fixtures/test_data.txt +5 -0
- data/spec/free_zipcode_data/country_table_spec.rb +52 -0
- data/spec/free_zipcode_data/county_table_spec.rb +84 -0
- data/spec/free_zipcode_data/data_source_spec.rb +131 -0
- data/spec/free_zipcode_data/db_table_spec.rb +164 -0
- data/spec/free_zipcode_data/logger_spec.rb +78 -0
- data/spec/free_zipcode_data/options_spec.rb +37 -0
- data/spec/free_zipcode_data/runner_spec.rb +91 -0
- data/spec/free_zipcode_data/sqlite_ram_spec.rb +64 -0
- data/spec/free_zipcode_data/state_table_spec.rb +112 -0
- data/spec/free_zipcode_data/zipcode_table_spec.rb +102 -0
- data/spec/free_zipcode_data_spec.rb +38 -0
- data/spec/spec_helper.rb +23 -2
- data/spec/support/database_helpers.rb +48 -0
- metadata +38 -91
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 7ec6a6653ed5f1da585ef025e5150b5595b6600db3acdb4aa1aea4274213fee4
|
|
4
|
+
data.tar.gz: 72c8b8636e2b7cd3cfe927d6f0cd3a27d22ae33c94ebc063fb6bb1b7acf22dcb
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 0307eb9d96805a1ff9510f9b8ccc8d64c14c7d1acd9a47199304d65d403f9c39d27656547fe93f5f1be7615fbb31c2ed0f50762d88801e0bbf00239dbdbce22a
|
|
7
|
+
data.tar.gz: 4bfc6e97652ee64d75d2b68b1c1dd84824658bf7abe9bf6611b7b2b2446febd16ea9439d3da682ae762bf5d77659b83ac9349d0e5ae537448a0679829b240147
|
data/.gitignore
CHANGED
data/.rubocop.yml
CHANGED
|
@@ -1,23 +1,24 @@
|
|
|
1
1
|
AllCops:
|
|
2
|
-
TargetRubyVersion:
|
|
2
|
+
TargetRubyVersion: 3.4
|
|
3
3
|
|
|
4
4
|
# Include gemspec and Rakefile
|
|
5
5
|
Include:
|
|
6
|
-
-
|
|
7
|
-
-
|
|
8
|
-
-
|
|
9
|
-
-
|
|
10
|
-
-
|
|
11
|
-
-
|
|
12
|
-
-
|
|
13
|
-
-
|
|
14
|
-
-
|
|
15
|
-
-
|
|
16
|
-
-
|
|
6
|
+
- "**/*.rb"
|
|
7
|
+
- "**/*.gemspec"
|
|
8
|
+
- "**/*.podspec"
|
|
9
|
+
- "**/*.jbuilder"
|
|
10
|
+
- "**/*.rake"
|
|
11
|
+
- "**/Gemfile"
|
|
12
|
+
- "**/Rakefile"
|
|
13
|
+
- "**/Capfile"
|
|
14
|
+
- "**/Guardfile"
|
|
15
|
+
- "**/Podfile"
|
|
16
|
+
- "**/Thorfile"
|
|
17
|
+
- "**/Vagrantfile"
|
|
17
18
|
Exclude:
|
|
18
|
-
-
|
|
19
|
-
-
|
|
20
|
-
-
|
|
19
|
+
- "vendor/**/*"
|
|
20
|
+
- "stubs/**/*"
|
|
21
|
+
- "spec/support/shared_contexts/*"
|
|
21
22
|
|
|
22
23
|
NewCops: enable
|
|
23
24
|
|
|
@@ -51,6 +52,10 @@ Style/DoubleNegation:
|
|
|
51
52
|
Style/PerlBackrefs:
|
|
52
53
|
Enabled: false
|
|
53
54
|
|
|
55
|
+
Style/OpenStructUse:
|
|
56
|
+
Exclude:
|
|
57
|
+
- "spec/**/*"
|
|
58
|
+
|
|
54
59
|
########################################
|
|
55
60
|
# Lint Cops
|
|
56
61
|
|
|
@@ -66,6 +71,10 @@ Security/Eval:
|
|
|
66
71
|
########################################
|
|
67
72
|
# Metrics Cops
|
|
68
73
|
|
|
74
|
+
Metrics/BlockLength:
|
|
75
|
+
Exclude:
|
|
76
|
+
- "spec/**/*"
|
|
77
|
+
|
|
69
78
|
Metrics/MethodLength:
|
|
70
79
|
CountComments: false # count full line comments?
|
|
71
80
|
Max: 30
|
|
@@ -77,7 +86,7 @@ Metrics/AbcSize:
|
|
|
77
86
|
Enabled: false
|
|
78
87
|
|
|
79
88
|
########################################
|
|
80
|
-
#
|
|
89
|
+
# Naming Cops
|
|
81
90
|
|
|
82
91
|
Naming/FileName:
|
|
83
92
|
Enabled: false
|
data/.ruby-version
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
3.
|
|
1
|
+
3.4.8
|
data/CHANGELOG
CHANGED
|
@@ -1,3 +1,14 @@
|
|
|
1
|
+
*1.1.0* (February 14, 2026)
|
|
2
|
+
|
|
3
|
+
* Fix state abbreviation uniqueness: state lookups are now scoped by country, allowing the same abbreviation (e.g., "NY") in different countries
|
|
4
|
+
* Update state and state_name indexes to include country_id for cross-country uniqueness
|
|
5
|
+
* Synthesize state entries from country data for countries without state/province subdivisions
|
|
6
|
+
* Add logging for silent failure cascade: warn when countries, states, counties, or zipcodes are skipped due to missing lookups
|
|
7
|
+
* Narrow rescue SQLite3::ConstraintException to only swallow UNIQUE violations; re-raise NOT NULL, FOREIGN KEY, and CHECK constraint errors
|
|
8
|
+
* Add comprehensive RSpec test suite with cross-country integration tests
|
|
9
|
+
* Upgrade to Ruby 3.4.8 and fix rubyzip 3.x API compatibility
|
|
10
|
+
* Update README to remove stale rake task references
|
|
11
|
+
|
|
1
12
|
*1.0.6* (September 30, 2025)
|
|
2
13
|
|
|
3
14
|
* Bump rexml from 3.3.9 to 3.4.2
|
data/CLAUDE.md
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
# CLAUDE.md
|
|
2
|
+
|
|
3
|
+
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
|
4
|
+
|
|
5
|
+
## Project Overview
|
|
6
|
+
|
|
7
|
+
A Ruby gem that downloads postal/zipcode data from GeoNames.org, processes it via an ETL pipeline, and outputs an SQLite3 database and optional CSV files. Supports single-country or all-countries processing.
|
|
8
|
+
|
|
9
|
+
## Commands
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
# Install dependencies (vendored to vendor/bundle, binstubs in stubs/)
|
|
13
|
+
bundle install
|
|
14
|
+
|
|
15
|
+
# Run all tests
|
|
16
|
+
bundle exec rspec
|
|
17
|
+
|
|
18
|
+
# Run a single test file
|
|
19
|
+
bundle exec rspec spec/path/to/file_spec.rb
|
|
20
|
+
|
|
21
|
+
# Run a specific test by line number
|
|
22
|
+
bundle exec rspec spec/path/to/file_spec.rb:42
|
|
23
|
+
|
|
24
|
+
# Lint
|
|
25
|
+
bundle exec rubocop
|
|
26
|
+
|
|
27
|
+
# Lint with auto-correct
|
|
28
|
+
bundle exec rubocop -a
|
|
29
|
+
|
|
30
|
+
# Version bumping (do on develop branch, not master)
|
|
31
|
+
bundle exec rake version:bump_patch
|
|
32
|
+
bundle exec rake version:bump_minor
|
|
33
|
+
bundle exec rake version:bump_major
|
|
34
|
+
|
|
35
|
+
# Build and install gem
|
|
36
|
+
bundle exec rake build
|
|
37
|
+
bundle exec rake install
|
|
38
|
+
|
|
39
|
+
# Release gem
|
|
40
|
+
bundle exec rake release
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
## Architecture
|
|
44
|
+
|
|
45
|
+
The gem follows an ETL (Extract, Transform, Load) pattern using the Kiba gem:
|
|
46
|
+
|
|
47
|
+
1. **Extract**: `DataSource` downloads zip files from GeoNames.org, extracts them, and prepares CSV files with headers
|
|
48
|
+
2. **Source**: `CsvSource` (Kiba source) feeds rows from the prepared CSV into the pipeline
|
|
49
|
+
3. **Load**: Four Kiba destination table classes write rows into an in-memory SQLite database
|
|
50
|
+
|
|
51
|
+
### Key Flow
|
|
52
|
+
|
|
53
|
+
`bin/free_zipcode_data` → `Runner#start` → `DataSource#download` → `DataSource#datafile` (extract zip + add CSV headers) → `SqliteRam` (in-memory DB) → `ETL::FreeZipcodeDataJob` (Kiba pipeline) → `SqliteRam#save_to_disk`
|
|
54
|
+
|
|
55
|
+
### Core Classes
|
|
56
|
+
|
|
57
|
+
- **`FreeZipcodeData::Runner`** - CLI entry point; parses args via Optimist, orchestrates the full pipeline
|
|
58
|
+
- **`FreeZipcodeData::DataSource`** - Downloads and extracts GeoNames zip files, prepares CSV with headers
|
|
59
|
+
- **`SqliteRam`** - Wraps SQLite3; works entirely in-memory then saves to disk via `SQLite3::Backup`
|
|
60
|
+
- **`FreeZipcodeData::DbTable`** - Base class for all table classes; provides progress bar, SQL helpers, and country lookup from `country_lookup_table.yml`
|
|
61
|
+
- **`FreeZipcodeData::CountryTable`/`StateTable`/`CountyTable`/`ZipcodeTable`** - Kiba destinations; each has `build` (creates schema + indexes) and `write` (inserts rows, swallows duplicate constraint violations)
|
|
62
|
+
- **`ETL::FreeZipcodeDataJob`** - Configures the Kiba pipeline with one source and four destinations
|
|
63
|
+
- **`CsvSource`** - Kiba-compatible CSV reader
|
|
64
|
+
|
|
65
|
+
### Singletons
|
|
66
|
+
|
|
67
|
+
`Options` and `Logger` are singletons (via Ruby's `Singleton` module). `Runner` has an `.instance` convenience class method (returns `new` each time, not cached).
|
|
68
|
+
|
|
69
|
+
## Configuration
|
|
70
|
+
|
|
71
|
+
- `.ruby-version`: 3.4.8
|
|
72
|
+
- Bundle path: `vendor/bundle` (binstubs in `stubs/`)
|
|
73
|
+
- Environment: `APP_ENV` controls environment (`test`, `development`)
|
|
74
|
+
- Config file: `~/.free_zipcode_data.yml` (overridable via `FZD_CONFIG_FILE` env var; uses `spec/fixtures/` version in test)
|
|
75
|
+
|
|
76
|
+
## Rubocop
|
|
77
|
+
|
|
78
|
+
Key style settings (`.rubocop.yml`):
|
|
79
|
+
- Target Ruby 3.4
|
|
80
|
+
- Max line length: 110
|
|
81
|
+
- Max method length: 30 lines
|
|
82
|
+
- `Style/ClassVars`, `Style/Documentation`, `Metrics/AbcSize`, `Lint/SuppressedException` disabled
|
|
83
|
+
- `vendor/` and `stubs/` excluded
|
|
84
|
+
|
|
85
|
+
## Git Workflow
|
|
86
|
+
|
|
87
|
+
- `master` is the release branch
|
|
88
|
+
- `develop` is the development branch
|
|
89
|
+
- Version bumps should happen on `develop`, then merge to `master` before `rake release`
|
data/Gemfile
CHANGED
|
@@ -4,3 +4,13 @@ source 'https://rubygems.org'
|
|
|
4
4
|
git_source(:github) { |repo| "https://github.com/#{repo}.git" }
|
|
5
5
|
|
|
6
6
|
gemspec
|
|
7
|
+
|
|
8
|
+
group :development do
|
|
9
|
+
gem 'bundler'
|
|
10
|
+
gem 'pry-nav', '~> 0.2'
|
|
11
|
+
gem 'rake', '~> 13.0'
|
|
12
|
+
gem 'rspec', '~> 3.7'
|
|
13
|
+
gem 'rubocop'
|
|
14
|
+
gem 'ruby-prof', '~> 0.17'
|
|
15
|
+
gem 'simplecov', '~> 0.16'
|
|
16
|
+
end
|
data/Gemfile.lock
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
free_zipcode_data (1.0
|
|
4
|
+
free_zipcode_data (1.1.0)
|
|
5
5
|
colored (~> 1.2)
|
|
6
|
+
csv
|
|
6
7
|
kiba (~> 4.0)
|
|
8
|
+
logger
|
|
7
9
|
optimist (~> 3.0)
|
|
8
10
|
ruby-progressbar (~> 1.9)
|
|
9
11
|
rubyzip (>= 1.2.2)
|
|
@@ -12,63 +14,75 @@ PATH
|
|
|
12
14
|
GEM
|
|
13
15
|
remote: https://rubygems.org/
|
|
14
16
|
specs:
|
|
15
|
-
ast (2.4.
|
|
17
|
+
ast (2.4.3)
|
|
16
18
|
coderay (1.1.3)
|
|
17
19
|
colored (1.2)
|
|
18
|
-
|
|
19
|
-
|
|
20
|
+
csv (3.3.5)
|
|
21
|
+
diff-lcs (1.6.2)
|
|
22
|
+
docile (1.4.1)
|
|
23
|
+
json (2.18.1)
|
|
20
24
|
kiba (4.0.0)
|
|
25
|
+
language_server-protocol (3.17.0.5)
|
|
26
|
+
lint_roller (1.1.0)
|
|
27
|
+
logger (1.7.0)
|
|
21
28
|
method_source (0.9.2)
|
|
22
29
|
mini_portile2 (2.8.9)
|
|
23
30
|
optimist (3.2.1)
|
|
24
|
-
parallel (1.
|
|
25
|
-
parser (3.
|
|
31
|
+
parallel (1.27.0)
|
|
32
|
+
parser (3.3.10.1)
|
|
26
33
|
ast (~> 2.4.1)
|
|
34
|
+
racc
|
|
35
|
+
prism (1.9.0)
|
|
27
36
|
pry (0.12.2)
|
|
28
37
|
coderay (~> 1.1.0)
|
|
29
38
|
method_source (~> 0.9.0)
|
|
30
39
|
pry-nav (0.3.0)
|
|
31
40
|
pry (>= 0.9.10, < 0.13.0)
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
rspec (3.
|
|
37
|
-
rspec-core (~> 3.
|
|
38
|
-
rspec-expectations (~> 3.
|
|
39
|
-
rspec-mocks (~> 3.
|
|
40
|
-
rspec-core (3.
|
|
41
|
-
rspec-support (~> 3.
|
|
42
|
-
rspec-expectations (3.
|
|
41
|
+
racc (1.8.1)
|
|
42
|
+
rainbow (3.1.1)
|
|
43
|
+
rake (13.3.1)
|
|
44
|
+
regexp_parser (2.11.3)
|
|
45
|
+
rspec (3.13.2)
|
|
46
|
+
rspec-core (~> 3.13.0)
|
|
47
|
+
rspec-expectations (~> 3.13.0)
|
|
48
|
+
rspec-mocks (~> 3.13.0)
|
|
49
|
+
rspec-core (3.13.6)
|
|
50
|
+
rspec-support (~> 3.13.0)
|
|
51
|
+
rspec-expectations (3.13.5)
|
|
43
52
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
44
|
-
rspec-support (~> 3.
|
|
45
|
-
rspec-mocks (3.
|
|
53
|
+
rspec-support (~> 3.13.0)
|
|
54
|
+
rspec-mocks (3.13.7)
|
|
46
55
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
47
|
-
rspec-support (~> 3.
|
|
48
|
-
rspec-support (3.
|
|
49
|
-
rubocop (1.
|
|
56
|
+
rspec-support (~> 3.13.0)
|
|
57
|
+
rspec-support (3.13.7)
|
|
58
|
+
rubocop (1.84.2)
|
|
59
|
+
json (~> 2.3)
|
|
60
|
+
language_server-protocol (~> 3.17.0.2)
|
|
61
|
+
lint_roller (~> 1.1.0)
|
|
50
62
|
parallel (~> 1.10)
|
|
51
|
-
parser (>= 3.
|
|
63
|
+
parser (>= 3.3.0.2)
|
|
52
64
|
rainbow (>= 2.2.2, < 4.0)
|
|
53
|
-
regexp_parser (>=
|
|
54
|
-
|
|
55
|
-
rubocop-ast (>= 1.12.0, < 2.0)
|
|
65
|
+
regexp_parser (>= 2.9.3, < 3.0)
|
|
66
|
+
rubocop-ast (>= 1.49.0, < 2.0)
|
|
56
67
|
ruby-progressbar (~> 1.7)
|
|
57
|
-
unicode-display_width (>=
|
|
58
|
-
rubocop-ast (1.
|
|
59
|
-
parser (>= 3.
|
|
68
|
+
unicode-display_width (>= 2.4.0, < 4.0)
|
|
69
|
+
rubocop-ast (1.49.0)
|
|
70
|
+
parser (>= 3.3.7.2)
|
|
71
|
+
prism (~> 1.7)
|
|
60
72
|
ruby-prof (0.18.0)
|
|
61
|
-
ruby-progressbar (1.
|
|
62
|
-
rubyzip (3.
|
|
63
|
-
simplecov (0.
|
|
73
|
+
ruby-progressbar (1.13.0)
|
|
74
|
+
rubyzip (3.2.2)
|
|
75
|
+
simplecov (0.22.0)
|
|
64
76
|
docile (~> 1.1)
|
|
65
77
|
simplecov-html (~> 0.11)
|
|
66
78
|
simplecov_json_formatter (~> 0.1)
|
|
67
|
-
simplecov-html (0.
|
|
68
|
-
simplecov_json_formatter (0.1.
|
|
79
|
+
simplecov-html (0.13.2)
|
|
80
|
+
simplecov_json_formatter (0.1.4)
|
|
69
81
|
sqlite3 (1.7.3)
|
|
70
82
|
mini_portile2 (~> 2.8.0)
|
|
71
|
-
unicode-display_width (2.
|
|
83
|
+
unicode-display_width (3.2.0)
|
|
84
|
+
unicode-emoji (~> 4.1)
|
|
85
|
+
unicode-emoji (4.2.0)
|
|
72
86
|
|
|
73
87
|
PLATFORMS
|
|
74
88
|
ruby
|
|
@@ -84,4 +98,4 @@ DEPENDENCIES
|
|
|
84
98
|
simplecov (~> 0.16)
|
|
85
99
|
|
|
86
100
|
BUNDLED WITH
|
|
87
|
-
2.
|
|
101
|
+
2.6.9
|
data/README.md
CHANGED
|
@@ -6,11 +6,11 @@ This project is an automated solution for retrieving and collating US and worldw
|
|
|
6
6
|
|
|
7
7
|
## History
|
|
8
8
|
|
|
9
|
-
In 2011, we originally pulled down all the US census data we could find, parsed it and exported it into 3 .csv files.
|
|
9
|
+
In 2011, we originally pulled down all the US census data we could find, parsed it and exported it into 3 .csv files.
|
|
10
10
|
|
|
11
11
|
In 2017 we began using [GeoNames](http://www.geonames.org) data, which is licensed under Creative Commons. We are grateful to [GeoNames](http://www.geonames.org) for sharing, and urge you to [visit their site](http://www.geonames.org) and support their work.
|
|
12
12
|
|
|
13
|
-
In 2018 we refactored the project and made it into a Ruby gem with a command-line executable
|
|
13
|
+
In 2018 we refactored the project and made it into a Ruby gem with a unified command-line executable (`free_zipcode_data`) that handles downloading, processing, and database generation in a single step.
|
|
14
14
|
|
|
15
15
|
## What's Included
|
|
16
16
|
|
|
@@ -22,7 +22,7 @@ See the GeoNames [readme.txt](http://download.geonames.org/export/zip/readme.txt
|
|
|
22
22
|
|
|
23
23
|
## Usage
|
|
24
24
|
|
|
25
|
-
First, you need to install Ruby and Rubygems. Though that is not a difficult task, it is beyond the scope of this README. A search engine of your choice will help discover how to do this. Once you have done that:
|
|
25
|
+
First, you need to install Ruby 3.4+ and Rubygems. Though that is not a difficult task, it is beyond the scope of this README. A search engine of your choice will help discover how to do this. Once you have done that:
|
|
26
26
|
|
|
27
27
|
```bash
|
|
28
28
|
$ gem install free_zipcode_data
|
|
@@ -61,8 +61,6 @@ $ free_zipcode_data --work-dir /tmp/work_dir --country US --generate-files
|
|
|
61
61
|
$ free_zipcode_data --work-dir /tmp/work_dir --generate-files
|
|
62
62
|
```
|
|
63
63
|
|
|
64
|
-
The rake tasks cascade, from the bottom up. So if you run `rake data:populate_db`, it will automatically call `rake data:build` if the .csv files are missing, which will call `rake data:download` if the .zip files are missing.
|
|
65
|
-
|
|
66
64
|
## SQLite3 Database
|
|
67
65
|
|
|
68
66
|
The executable will generate an SQLite3 database in the specified directory `--work-dir` but it will not generate the `.csv` files by default. Specify `--generate-files` if you want those as well.
|
data/Rakefile
CHANGED
data/free_zipcode_data.gemspec
CHANGED
|
@@ -23,18 +23,12 @@ Gem::Specification.new do |spec|
|
|
|
23
23
|
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
|
24
24
|
spec.require_paths = ['lib']
|
|
25
25
|
|
|
26
|
-
spec.
|
|
27
|
-
spec.
|
|
28
|
-
spec.
|
|
29
|
-
spec.
|
|
30
|
-
spec.
|
|
31
|
-
spec.
|
|
32
|
-
spec.
|
|
33
|
-
|
|
34
|
-
spec.add_runtime_dependency 'colored', '~> 1.2'
|
|
35
|
-
spec.add_runtime_dependency 'kiba', '~> 4.0'
|
|
36
|
-
spec.add_runtime_dependency 'optimist', '~> 3.0'
|
|
37
|
-
spec.add_runtime_dependency 'ruby-progressbar', '~> 1.9'
|
|
38
|
-
spec.add_runtime_dependency 'rubyzip', '>= 1.2.2'
|
|
39
|
-
spec.add_runtime_dependency 'sqlite3', '~> 1.3'
|
|
26
|
+
spec.add_dependency 'colored', '~> 1.2'
|
|
27
|
+
spec.add_dependency 'csv'
|
|
28
|
+
spec.add_dependency 'kiba', '~> 4.0'
|
|
29
|
+
spec.add_dependency 'logger'
|
|
30
|
+
spec.add_dependency 'optimist', '~> 3.0'
|
|
31
|
+
spec.add_dependency 'ruby-progressbar', '~> 1.9'
|
|
32
|
+
spec.add_dependency 'rubyzip', '>= 1.2.2'
|
|
33
|
+
spec.add_dependency 'sqlite3', '~> 1.3'
|
|
40
34
|
end
|
data/lib/etl/common.rb
CHANGED
data/lib/etl/csv_source.rb
CHANGED
|
@@ -14,10 +14,10 @@ class CsvSource
|
|
|
14
14
|
|
|
15
15
|
def each
|
|
16
16
|
CSV.open(filename,
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
17
|
+
col_sep: delimeter,
|
|
18
|
+
headers: headers,
|
|
19
|
+
header_converters: :symbol,
|
|
20
|
+
quote_char: quote_char) do |csv|
|
|
21
21
|
csv.each do |row|
|
|
22
22
|
yield(row.to_hash)
|
|
23
23
|
end
|
|
@@ -25,6 +25,10 @@ module FreeZipcodeData
|
|
|
25
25
|
|
|
26
26
|
def write(row)
|
|
27
27
|
country_hash = country_lookup_table[row[:country]]
|
|
28
|
+
unless country_hash
|
|
29
|
+
warn_once("Skipping unknown country '#{row[:country]}': not in country_lookup_table")
|
|
30
|
+
return update_progress
|
|
31
|
+
end
|
|
28
32
|
|
|
29
33
|
sql = <<-SQL
|
|
30
34
|
INSERT INTO countries (alpha2, alpha3, iso, name)
|
|
@@ -36,8 +40,12 @@ module FreeZipcodeData
|
|
|
36
40
|
|
|
37
41
|
begin
|
|
38
42
|
database.execute(sql)
|
|
39
|
-
rescue SQLite3::ConstraintException
|
|
40
|
-
|
|
43
|
+
rescue SQLite3::ConstraintException => e
|
|
44
|
+
unless e.message.include?('UNIQUE')
|
|
45
|
+
raise "Please file an issue at #{ISSUE_URL}: [#{e}] -> SQL: [#{sql}]"
|
|
46
|
+
end
|
|
47
|
+
rescue StandardError => e
|
|
48
|
+
raise "Please file an issue at #{ISSUE_URL}: [#{e}] -> SQL: [#{sql}]"
|
|
41
49
|
end
|
|
42
50
|
|
|
43
51
|
update_progress
|
|
@@ -26,8 +26,14 @@ module FreeZipcodeData
|
|
|
26
26
|
def write(row)
|
|
27
27
|
return nil unless row[:county]
|
|
28
28
|
|
|
29
|
-
state_id = get_state_id(row[:short_state], row[:state])
|
|
30
|
-
|
|
29
|
+
state_id = get_state_id(row[:country], row[:short_state], row[:state])
|
|
30
|
+
unless state_id
|
|
31
|
+
logger.verbose(
|
|
32
|
+
"Skipping county '#{row[:county]}': no state found for " \
|
|
33
|
+
"abbr='#{row[:short_state]}', country='#{row[:country]}'"
|
|
34
|
+
)
|
|
35
|
+
return nil
|
|
36
|
+
end
|
|
31
37
|
|
|
32
38
|
sql = <<-SQL
|
|
33
39
|
INSERT INTO counties (state_id, abbr, name)
|
|
@@ -39,10 +45,12 @@ module FreeZipcodeData
|
|
|
39
45
|
|
|
40
46
|
begin
|
|
41
47
|
database.execute(sql)
|
|
42
|
-
rescue SQLite3::ConstraintException
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
48
|
+
rescue SQLite3::ConstraintException => e
|
|
49
|
+
unless e.message.include?('UNIQUE')
|
|
50
|
+
raise "Please file an issue at #{ISSUE_URL}: [#{e}] -> SQL: [#{sql}]"
|
|
51
|
+
end
|
|
52
|
+
rescue StandardError => e
|
|
53
|
+
raise "Please file an issue at #{ISSUE_URL}: [#{e}] -> SQL: [#{sql}]"
|
|
46
54
|
end
|
|
47
55
|
|
|
48
56
|
update_progress
|
|
@@ -56,11 +56,11 @@ module FreeZipcodeData
|
|
|
56
56
|
if options[:clobber]
|
|
57
57
|
Zip.on_exists_proc = true
|
|
58
58
|
Logger.instance.verbose("Extracting: #{zipfile}...")
|
|
59
|
-
entry.extract(
|
|
59
|
+
entry.extract(destination_directory: options.work_dir)
|
|
60
60
|
end
|
|
61
61
|
else
|
|
62
62
|
Logger.instance.verbose("Extracting: #{zipfile}...")
|
|
63
|
-
entry.extract(
|
|
63
|
+
entry.extract(destination_directory: options.work_dir)
|
|
64
64
|
end
|
|
65
65
|
break
|
|
66
66
|
end
|
|
@@ -8,6 +8,7 @@ module FreeZipcodeData
|
|
|
8
8
|
ISSUE_URL = 'https://github.com/midwire/free_zipcode_data/issues/new'
|
|
9
9
|
|
|
10
10
|
attr_reader :database, :tablename
|
|
11
|
+
|
|
11
12
|
@@progressbar = nil
|
|
12
13
|
|
|
13
14
|
def initialize(database:, tablename:)
|
|
@@ -23,6 +24,18 @@ module FreeZipcodeData
|
|
|
23
24
|
|
|
24
25
|
private
|
|
25
26
|
|
|
27
|
+
def logger
|
|
28
|
+
Logger.instance
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def warn_once(message)
|
|
32
|
+
@warned_messages ||= {}
|
|
33
|
+
return if @warned_messages[message]
|
|
34
|
+
|
|
35
|
+
logger.warn(message)
|
|
36
|
+
@warned_messages[message] = true
|
|
37
|
+
end
|
|
38
|
+
|
|
26
39
|
def country_lookup_table
|
|
27
40
|
@country_lookup_table ||=
|
|
28
41
|
begin
|
|
@@ -33,9 +46,9 @@ module FreeZipcodeData
|
|
|
33
46
|
|
|
34
47
|
def select_first(sql)
|
|
35
48
|
rows = database.execute(sql)
|
|
36
|
-
rows[0]
|
|
37
|
-
rescue SQLite3::SQLException =>
|
|
38
|
-
raise "Please file an issue at #{ISSUE_URL}: [#{
|
|
49
|
+
rows[0]&.first
|
|
50
|
+
rescue SQLite3::SQLException => e
|
|
51
|
+
raise "Please file an issue at #{ISSUE_URL}: [#{e}] -> SQL: [#{sql}]"
|
|
39
52
|
end
|
|
40
53
|
|
|
41
54
|
def get_country_id(country)
|
|
@@ -43,20 +56,54 @@ module FreeZipcodeData
|
|
|
43
56
|
select_first(sql)
|
|
44
57
|
end
|
|
45
58
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
59
|
+
# Look up a state ID scoped to a country, trying progressively less specific
|
|
60
|
+
# criteria: (1) abbr + name + country, (2) abbr + country, (3) name + country.
|
|
61
|
+
# Returns nil if no match is found.
|
|
62
|
+
def get_state_id(country, state_abbr, state_name)
|
|
63
|
+
escaped_country = escape_single_quotes(country)
|
|
64
|
+
return nil if escaped_country.empty?
|
|
65
|
+
|
|
66
|
+
escaped_abbr = escape_single_quotes(state_abbr)
|
|
67
|
+
escaped_name = escape_single_quotes(state_name)
|
|
68
|
+
country_cond = "c.alpha2 = '#{escaped_country}'"
|
|
69
|
+
# Most specific lookup: abbr + name + country
|
|
70
|
+
res = find_state_where("s.abbr = '#{escaped_abbr}'", "s.name = '#{escaped_name}'", country_cond)
|
|
71
|
+
return res if res
|
|
72
|
+
|
|
73
|
+
# Fallback: abbr + country only
|
|
74
|
+
res = find_state_where("s.abbr = '#{escaped_abbr}'", country_cond)
|
|
75
|
+
if res
|
|
76
|
+
logger.verbose("State fallback: abbr '#{state_abbr}' + country '#{country}' (name mismatch)")
|
|
77
|
+
return res
|
|
78
|
+
end
|
|
79
|
+
# Fallback: name + country only
|
|
80
|
+
res = find_state_where("s.name = '#{escaped_name}'", country_cond)
|
|
81
|
+
if res
|
|
82
|
+
logger.verbose("State fallback: name '#{state_name}' + country '#{country}' (abbr mismatch)")
|
|
83
|
+
return res
|
|
84
|
+
end
|
|
85
|
+
logger.warn("State lookup failed: abbr='#{state_abbr}', name='#{state_name}', country='#{country}'")
|
|
86
|
+
nil
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def find_state_where(*conditions)
|
|
90
|
+
sql = <<-SQL
|
|
91
|
+
SELECT s.id FROM states s
|
|
92
|
+
INNER JOIN countries c ON s.country_id = c.id
|
|
93
|
+
WHERE #{conditions.join(' AND ')}
|
|
94
|
+
SQL
|
|
49
95
|
select_first(sql)
|
|
50
96
|
end
|
|
51
97
|
|
|
52
98
|
def get_county_id(county)
|
|
53
99
|
return nil if county.nil?
|
|
100
|
+
|
|
54
101
|
sql = "SELECT id FROM counties WHERE name = '#{escape_single_quotes(county)}'"
|
|
55
102
|
select_first(sql)
|
|
56
103
|
end
|
|
57
104
|
|
|
58
105
|
def escape_single_quotes(string)
|
|
59
|
-
string&.gsub(
|
|
106
|
+
string&.gsub('\'', '\'\'') || ''
|
|
60
107
|
end
|
|
61
108
|
end
|
|
62
109
|
end
|
|
@@ -13,27 +13,23 @@ module FreeZipcodeData
|
|
|
13
13
|
@log_provider = provider
|
|
14
14
|
end
|
|
15
15
|
|
|
16
|
-
def log_exception(
|
|
17
|
-
msg = "EXCEPTION : #{
|
|
16
|
+
def log_exception(error, data = {})
|
|
17
|
+
msg = "EXCEPTION : #{error.class.name} : #{error.message}"
|
|
18
18
|
msg += "\n data : #{data.inspect}" if data && !data.empty?
|
|
19
|
-
msg += "\n #{
|
|
19
|
+
msg += "\n #{error.backtrace[0, 6].join("\n ")}"
|
|
20
20
|
log_provider.error(msg)
|
|
21
21
|
end
|
|
22
22
|
|
|
23
|
-
def method_missing(meth,
|
|
23
|
+
def method_missing(meth, *, &)
|
|
24
24
|
if log_provider.respond_to?(meth)
|
|
25
|
-
log_provider.send(meth,
|
|
25
|
+
log_provider.send(meth, *, &)
|
|
26
26
|
else
|
|
27
27
|
super
|
|
28
28
|
end
|
|
29
29
|
end
|
|
30
30
|
|
|
31
|
-
def
|
|
32
|
-
|
|
33
|
-
true
|
|
34
|
-
else
|
|
35
|
-
super
|
|
36
|
-
end
|
|
31
|
+
def respond_to_missing?(meth, include_private = false)
|
|
32
|
+
log_provider.respond_to?(meth) || super
|
|
37
33
|
end
|
|
38
34
|
|
|
39
35
|
def verbose(msg)
|
|
@@ -43,7 +39,7 @@ module FreeZipcodeData
|
|
|
43
39
|
private
|
|
44
40
|
|
|
45
41
|
def default_logger
|
|
46
|
-
logger = ::Logger.new(
|
|
42
|
+
logger = ::Logger.new($stdout)
|
|
47
43
|
logger.formatter = proc do |_, _, _, msg|
|
|
48
44
|
"#{msg}\n"
|
|
49
45
|
end
|