free_zipcode_data 1.0.6 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.dockerignore +10 -0
- data/.gitignore +2 -0
- data/.rubocop.yml +25 -16
- data/.ruby-version +1 -1
- data/CHANGELOG +17 -0
- data/CLAUDE.md +89 -0
- data/Dockerfile +21 -0
- data/Gemfile +10 -0
- data/Gemfile.lock +50 -36
- data/README.md +38 -5
- data/Rakefile +1 -1
- data/docker-entrypoint.sh +14 -0
- data/free_zipcode_data.gemspec +8 -14
- data/lib/etl/common.rb +1 -0
- data/lib/etl/csv_source.rb +4 -4
- data/lib/free_zipcode_data/country_table.rb +10 -2
- data/lib/free_zipcode_data/county_table.rb +14 -6
- data/lib/free_zipcode_data/data_source.rb +2 -2
- data/lib/free_zipcode_data/db_table.rb +54 -7
- data/lib/free_zipcode_data/logger.rb +8 -12
- data/lib/free_zipcode_data/runner.rb +2 -2
- data/lib/free_zipcode_data/state_table.rb +37 -5
- data/lib/free_zipcode_data/version.rb +1 -1
- data/lib/free_zipcode_data/zipcode_table.rb +15 -5
- data/lib/free_zipcode_data.rb +3 -3
- data/lib/tasks/version.rake +27 -24
- data/spec/etl/csv_source_spec.rb +57 -0
- data/spec/etl/free_zipcode_data_job_spec.rb +135 -0
- data/spec/fixtures/.free_zipcode_data.yml +1 -0
- data/spec/fixtures/US.txt +5 -0
- data/spec/fixtures/US.zip +0 -0
- data/spec/fixtures/test_data.csv +7 -0
- data/spec/fixtures/test_data.txt +5 -0
- data/spec/free_zipcode_data/country_table_spec.rb +52 -0
- data/spec/free_zipcode_data/county_table_spec.rb +84 -0
- data/spec/free_zipcode_data/data_source_spec.rb +131 -0
- data/spec/free_zipcode_data/db_table_spec.rb +164 -0
- data/spec/free_zipcode_data/logger_spec.rb +78 -0
- data/spec/free_zipcode_data/options_spec.rb +37 -0
- data/spec/free_zipcode_data/runner_spec.rb +91 -0
- data/spec/free_zipcode_data/sqlite_ram_spec.rb +64 -0
- data/spec/free_zipcode_data/state_table_spec.rb +112 -0
- data/spec/free_zipcode_data/zipcode_table_spec.rb +102 -0
- data/spec/free_zipcode_data_spec.rb +38 -0
- data/spec/spec_helper.rb +23 -2
- data/spec/support/database_helpers.rb +48 -0
- metadata +41 -91
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 5a29b38bacdbf91fb1aad6732c0378eac3588a8bc9efd05c09b48470427da87e
|
|
4
|
+
data.tar.gz: 3b309917f7e87235ddc201f0db852d81b6f1310b85a32c18bc027600eceb34a3
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 7d8e5b4a3b22359d46f3def15e4801d6ba39d70a8ec019bb1e302c6f98510b5fe6d0166bb7f9c2d8e35a3fda66572b4f4dbd6bc78a87874819864ea6225f8727
|
|
7
|
+
data.tar.gz: e26f46c2be8dfbb7f69c85a494de27a9607276e6958c56e58b4e63d20a2c9baca9da9fe95a35d4b3cb083f00f24bf0064e1be8aa815b1c6e417ed3cf32b48edd
|
data/.dockerignore
ADDED
data/.gitignore
CHANGED
data/.rubocop.yml
CHANGED
|
@@ -1,23 +1,24 @@
|
|
|
1
1
|
AllCops:
|
|
2
|
-
TargetRubyVersion:
|
|
2
|
+
TargetRubyVersion: 3.4
|
|
3
3
|
|
|
4
4
|
# Include gemspec and Rakefile
|
|
5
5
|
Include:
|
|
6
|
-
-
|
|
7
|
-
-
|
|
8
|
-
-
|
|
9
|
-
-
|
|
10
|
-
-
|
|
11
|
-
-
|
|
12
|
-
-
|
|
13
|
-
-
|
|
14
|
-
-
|
|
15
|
-
-
|
|
16
|
-
-
|
|
6
|
+
- "**/*.rb"
|
|
7
|
+
- "**/*.gemspec"
|
|
8
|
+
- "**/*.podspec"
|
|
9
|
+
- "**/*.jbuilder"
|
|
10
|
+
- "**/*.rake"
|
|
11
|
+
- "**/Gemfile"
|
|
12
|
+
- "**/Rakefile"
|
|
13
|
+
- "**/Capfile"
|
|
14
|
+
- "**/Guardfile"
|
|
15
|
+
- "**/Podfile"
|
|
16
|
+
- "**/Thorfile"
|
|
17
|
+
- "**/Vagrantfile"
|
|
17
18
|
Exclude:
|
|
18
|
-
-
|
|
19
|
-
-
|
|
20
|
-
-
|
|
19
|
+
- "vendor/**/*"
|
|
20
|
+
- "stubs/**/*"
|
|
21
|
+
- "spec/support/shared_contexts/*"
|
|
21
22
|
|
|
22
23
|
NewCops: enable
|
|
23
24
|
|
|
@@ -51,6 +52,10 @@ Style/DoubleNegation:
|
|
|
51
52
|
Style/PerlBackrefs:
|
|
52
53
|
Enabled: false
|
|
53
54
|
|
|
55
|
+
Style/OpenStructUse:
|
|
56
|
+
Exclude:
|
|
57
|
+
- "spec/**/*"
|
|
58
|
+
|
|
54
59
|
########################################
|
|
55
60
|
# Lint Cops
|
|
56
61
|
|
|
@@ -66,6 +71,10 @@ Security/Eval:
|
|
|
66
71
|
########################################
|
|
67
72
|
# Metrics Cops
|
|
68
73
|
|
|
74
|
+
Metrics/BlockLength:
|
|
75
|
+
Exclude:
|
|
76
|
+
- "spec/**/*"
|
|
77
|
+
|
|
69
78
|
Metrics/MethodLength:
|
|
70
79
|
CountComments: false # count full line comments?
|
|
71
80
|
Max: 30
|
|
@@ -77,7 +86,7 @@ Metrics/AbcSize:
|
|
|
77
86
|
Enabled: false
|
|
78
87
|
|
|
79
88
|
########################################
|
|
80
|
-
#
|
|
89
|
+
# Naming Cops
|
|
81
90
|
|
|
82
91
|
Naming/FileName:
|
|
83
92
|
Enabled: false
|
data/.ruby-version
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
3.
|
|
1
|
+
3.4.8
|
data/CHANGELOG
CHANGED
|
@@ -1,3 +1,20 @@
|
|
|
1
|
+
*1.2.0* (February 17, 2026)
|
|
2
|
+
|
|
3
|
+
* Add Dockerfile for containerized data generation without requiring a local Ruby installation
|
|
4
|
+
* Add docker-entrypoint.sh with COUNTRY environment variable support and input validation
|
|
5
|
+
* Add .dockerignore to keep Docker build context lean
|
|
6
|
+
|
|
7
|
+
*1.1.0* (February 14, 2026)
|
|
8
|
+
|
|
9
|
+
* Fix state abbreviation uniqueness: state lookups are now scoped by country, allowing the same abbreviation (e.g., "NY") in different countries
|
|
10
|
+
* Update state and state_name indexes to include country_id for cross-country uniqueness
|
|
11
|
+
* Synthesize state entries from country data for countries without state/province subdivisions
|
|
12
|
+
* Add logging for silent failure cascade: warn when countries, states, counties, or zipcodes are skipped due to missing lookups
|
|
13
|
+
* Narrow rescue SQLite3::ConstraintException to only swallow UNIQUE violations; re-raise NOT NULL, FOREIGN KEY, and CHECK constraint errors
|
|
14
|
+
* Add comprehensive RSpec test suite with cross-country integration tests
|
|
15
|
+
* Upgrade to Ruby 3.4.8 and fix rubyzip 3.x API compatibility
|
|
16
|
+
* Update README to remove stale rake task references
|
|
17
|
+
|
|
1
18
|
*1.0.6* (September 30, 2025)
|
|
2
19
|
|
|
3
20
|
* Bump rexml from 3.3.9 to 3.4.2
|
data/CLAUDE.md
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
# CLAUDE.md
|
|
2
|
+
|
|
3
|
+
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
|
4
|
+
|
|
5
|
+
## Project Overview
|
|
6
|
+
|
|
7
|
+
A Ruby gem that downloads postal/zipcode data from GeoNames.org, processes it via an ETL pipeline, and outputs an SQLite3 database and optional CSV files. Supports single-country or all-countries processing.
|
|
8
|
+
|
|
9
|
+
## Commands
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
# Install dependencies (vendored to vendor/bundle, binstubs in stubs/)
|
|
13
|
+
bundle install
|
|
14
|
+
|
|
15
|
+
# Run all tests
|
|
16
|
+
bundle exec rspec
|
|
17
|
+
|
|
18
|
+
# Run a single test file
|
|
19
|
+
bundle exec rspec spec/path/to/file_spec.rb
|
|
20
|
+
|
|
21
|
+
# Run a specific test by line number
|
|
22
|
+
bundle exec rspec spec/path/to/file_spec.rb:42
|
|
23
|
+
|
|
24
|
+
# Lint
|
|
25
|
+
bundle exec rubocop
|
|
26
|
+
|
|
27
|
+
# Lint with auto-correct
|
|
28
|
+
bundle exec rubocop -a
|
|
29
|
+
|
|
30
|
+
# Version bumping (do on develop branch, not master)
|
|
31
|
+
bundle exec rake version:bump_patch
|
|
32
|
+
bundle exec rake version:bump_minor
|
|
33
|
+
bundle exec rake version:bump_major
|
|
34
|
+
|
|
35
|
+
# Build and install gem
|
|
36
|
+
bundle exec rake build
|
|
37
|
+
bundle exec rake install
|
|
38
|
+
|
|
39
|
+
# Release gem
|
|
40
|
+
bundle exec rake release
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
## Architecture
|
|
44
|
+
|
|
45
|
+
The gem follows an ETL (Extract, Transform, Load) pattern using the Kiba gem:
|
|
46
|
+
|
|
47
|
+
1. **Extract**: `DataSource` downloads zip files from GeoNames.org, extracts them, and prepares CSV files with headers
|
|
48
|
+
2. **Source**: `CsvSource` (Kiba source) feeds rows from the prepared CSV into the pipeline
|
|
49
|
+
3. **Load**: Four Kiba destination table classes write rows into an in-memory SQLite database
|
|
50
|
+
|
|
51
|
+
### Key Flow
|
|
52
|
+
|
|
53
|
+
`bin/free_zipcode_data` → `Runner#start` → `DataSource#download` → `DataSource#datafile` (extract zip + add CSV headers) → `SqliteRam` (in-memory DB) → `ETL::FreeZipcodeDataJob` (Kiba pipeline) → `SqliteRam#save_to_disk`
|
|
54
|
+
|
|
55
|
+
### Core Classes
|
|
56
|
+
|
|
57
|
+
- **`FreeZipcodeData::Runner`** - CLI entry point; parses args via Optimist, orchestrates the full pipeline
|
|
58
|
+
- **`FreeZipcodeData::DataSource`** - Downloads and extracts GeoNames zip files, prepares CSV with headers
|
|
59
|
+
- **`SqliteRam`** - Wraps SQLite3; works entirely in-memory then saves to disk via `SQLite3::Backup`
|
|
60
|
+
- **`FreeZipcodeData::DbTable`** - Base class for all table classes; provides progress bar, SQL helpers, and country lookup from `country_lookup_table.yml`
|
|
61
|
+
- **`FreeZipcodeData::CountryTable`/`StateTable`/`CountyTable`/`ZipcodeTable`** - Kiba destinations; each has `build` (creates schema + indexes) and `write` (inserts rows, swallows duplicate constraint violations)
|
|
62
|
+
- **`ETL::FreeZipcodeDataJob`** - Configures the Kiba pipeline with one source and four destinations
|
|
63
|
+
- **`CsvSource`** - Kiba-compatible CSV reader
|
|
64
|
+
|
|
65
|
+
### Singletons
|
|
66
|
+
|
|
67
|
+
`Options` and `Logger` are singletons (via Ruby's `Singleton` module). `Runner` has an `.instance` convenience class method (returns `new` each time, not cached).
|
|
68
|
+
|
|
69
|
+
## Configuration
|
|
70
|
+
|
|
71
|
+
- `.ruby-version`: 3.4.8
|
|
72
|
+
- Bundle path: `vendor/bundle` (binstubs in `stubs/`)
|
|
73
|
+
- Environment: `APP_ENV` controls environment (`test`, `development`)
|
|
74
|
+
- Config file: `~/.free_zipcode_data.yml` (overridable via `FZD_CONFIG_FILE` env var; uses `spec/fixtures/` version in test)
|
|
75
|
+
|
|
76
|
+
## Rubocop
|
|
77
|
+
|
|
78
|
+
Key style settings (`.rubocop.yml`):
|
|
79
|
+
- Target Ruby 3.4
|
|
80
|
+
- Max line length: 110
|
|
81
|
+
- Max method length: 30 lines
|
|
82
|
+
- `Style/ClassVars`, `Style/Documentation`, `Metrics/AbcSize`, `Lint/SuppressedException` disabled
|
|
83
|
+
- `vendor/` and `stubs/` excluded
|
|
84
|
+
|
|
85
|
+
## Git Workflow
|
|
86
|
+
|
|
87
|
+
- `master` is the release branch
|
|
88
|
+
- `develop` is the development branch
|
|
89
|
+
- Version bumps should happen on `develop`, then merge to `master` before `rake release`
|
data/Dockerfile
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
FROM ruby:3.4-slim
|
|
2
|
+
|
|
3
|
+
RUN apt-get update && \
|
|
4
|
+
apt-get install -y --no-install-recommends build-essential git pkg-config && \
|
|
5
|
+
rm -rf /var/lib/apt/lists/*
|
|
6
|
+
|
|
7
|
+
WORKDIR /app
|
|
8
|
+
|
|
9
|
+
COPY Gemfile Gemfile.lock free_zipcode_data.gemspec .ruby-version ./
|
|
10
|
+
COPY lib/free_zipcode_data/version.rb lib/free_zipcode_data/version.rb
|
|
11
|
+
RUN git init && git add . && \
|
|
12
|
+
bundle config set --local without development && \
|
|
13
|
+
bundle install
|
|
14
|
+
|
|
15
|
+
COPY . .
|
|
16
|
+
RUN git add .
|
|
17
|
+
|
|
18
|
+
ENV COUNTRY=""
|
|
19
|
+
VOLUME /output
|
|
20
|
+
|
|
21
|
+
ENTRYPOINT ["./docker-entrypoint.sh"]
|
data/Gemfile
CHANGED
|
@@ -4,3 +4,13 @@ source 'https://rubygems.org'
|
|
|
4
4
|
git_source(:github) { |repo| "https://github.com/#{repo}.git" }
|
|
5
5
|
|
|
6
6
|
gemspec
|
|
7
|
+
|
|
8
|
+
group :development do
|
|
9
|
+
gem 'bundler'
|
|
10
|
+
gem 'pry-nav', '~> 0.2'
|
|
11
|
+
gem 'rake', '~> 13.0'
|
|
12
|
+
gem 'rspec', '~> 3.7'
|
|
13
|
+
gem 'rubocop'
|
|
14
|
+
gem 'ruby-prof', '~> 0.17'
|
|
15
|
+
gem 'simplecov', '~> 0.16'
|
|
16
|
+
end
|
data/Gemfile.lock
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
free_zipcode_data (1.0
|
|
4
|
+
free_zipcode_data (1.2.0)
|
|
5
5
|
colored (~> 1.2)
|
|
6
|
+
csv
|
|
6
7
|
kiba (~> 4.0)
|
|
8
|
+
logger
|
|
7
9
|
optimist (~> 3.0)
|
|
8
10
|
ruby-progressbar (~> 1.9)
|
|
9
11
|
rubyzip (>= 1.2.2)
|
|
@@ -12,63 +14,75 @@ PATH
|
|
|
12
14
|
GEM
|
|
13
15
|
remote: https://rubygems.org/
|
|
14
16
|
specs:
|
|
15
|
-
ast (2.4.
|
|
17
|
+
ast (2.4.3)
|
|
16
18
|
coderay (1.1.3)
|
|
17
19
|
colored (1.2)
|
|
18
|
-
|
|
19
|
-
|
|
20
|
+
csv (3.3.5)
|
|
21
|
+
diff-lcs (1.6.2)
|
|
22
|
+
docile (1.4.1)
|
|
23
|
+
json (2.18.1)
|
|
20
24
|
kiba (4.0.0)
|
|
25
|
+
language_server-protocol (3.17.0.5)
|
|
26
|
+
lint_roller (1.1.0)
|
|
27
|
+
logger (1.7.0)
|
|
21
28
|
method_source (0.9.2)
|
|
22
29
|
mini_portile2 (2.8.9)
|
|
23
30
|
optimist (3.2.1)
|
|
24
|
-
parallel (1.
|
|
25
|
-
parser (3.
|
|
31
|
+
parallel (1.27.0)
|
|
32
|
+
parser (3.3.10.1)
|
|
26
33
|
ast (~> 2.4.1)
|
|
34
|
+
racc
|
|
35
|
+
prism (1.9.0)
|
|
27
36
|
pry (0.12.2)
|
|
28
37
|
coderay (~> 1.1.0)
|
|
29
38
|
method_source (~> 0.9.0)
|
|
30
39
|
pry-nav (0.3.0)
|
|
31
40
|
pry (>= 0.9.10, < 0.13.0)
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
rspec (3.
|
|
37
|
-
rspec-core (~> 3.
|
|
38
|
-
rspec-expectations (~> 3.
|
|
39
|
-
rspec-mocks (~> 3.
|
|
40
|
-
rspec-core (3.
|
|
41
|
-
rspec-support (~> 3.
|
|
42
|
-
rspec-expectations (3.
|
|
41
|
+
racc (1.8.1)
|
|
42
|
+
rainbow (3.1.1)
|
|
43
|
+
rake (13.3.1)
|
|
44
|
+
regexp_parser (2.11.3)
|
|
45
|
+
rspec (3.13.2)
|
|
46
|
+
rspec-core (~> 3.13.0)
|
|
47
|
+
rspec-expectations (~> 3.13.0)
|
|
48
|
+
rspec-mocks (~> 3.13.0)
|
|
49
|
+
rspec-core (3.13.6)
|
|
50
|
+
rspec-support (~> 3.13.0)
|
|
51
|
+
rspec-expectations (3.13.5)
|
|
43
52
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
44
|
-
rspec-support (~> 3.
|
|
45
|
-
rspec-mocks (3.
|
|
53
|
+
rspec-support (~> 3.13.0)
|
|
54
|
+
rspec-mocks (3.13.7)
|
|
46
55
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
47
|
-
rspec-support (~> 3.
|
|
48
|
-
rspec-support (3.
|
|
49
|
-
rubocop (1.
|
|
56
|
+
rspec-support (~> 3.13.0)
|
|
57
|
+
rspec-support (3.13.7)
|
|
58
|
+
rubocop (1.84.2)
|
|
59
|
+
json (~> 2.3)
|
|
60
|
+
language_server-protocol (~> 3.17.0.2)
|
|
61
|
+
lint_roller (~> 1.1.0)
|
|
50
62
|
parallel (~> 1.10)
|
|
51
|
-
parser (>= 3.
|
|
63
|
+
parser (>= 3.3.0.2)
|
|
52
64
|
rainbow (>= 2.2.2, < 4.0)
|
|
53
|
-
regexp_parser (>=
|
|
54
|
-
|
|
55
|
-
rubocop-ast (>= 1.12.0, < 2.0)
|
|
65
|
+
regexp_parser (>= 2.9.3, < 3.0)
|
|
66
|
+
rubocop-ast (>= 1.49.0, < 2.0)
|
|
56
67
|
ruby-progressbar (~> 1.7)
|
|
57
|
-
unicode-display_width (>=
|
|
58
|
-
rubocop-ast (1.
|
|
59
|
-
parser (>= 3.
|
|
68
|
+
unicode-display_width (>= 2.4.0, < 4.0)
|
|
69
|
+
rubocop-ast (1.49.0)
|
|
70
|
+
parser (>= 3.3.7.2)
|
|
71
|
+
prism (~> 1.7)
|
|
60
72
|
ruby-prof (0.18.0)
|
|
61
|
-
ruby-progressbar (1.
|
|
62
|
-
rubyzip (3.
|
|
63
|
-
simplecov (0.
|
|
73
|
+
ruby-progressbar (1.13.0)
|
|
74
|
+
rubyzip (3.2.2)
|
|
75
|
+
simplecov (0.22.0)
|
|
64
76
|
docile (~> 1.1)
|
|
65
77
|
simplecov-html (~> 0.11)
|
|
66
78
|
simplecov_json_formatter (~> 0.1)
|
|
67
|
-
simplecov-html (0.
|
|
68
|
-
simplecov_json_formatter (0.1.
|
|
79
|
+
simplecov-html (0.13.2)
|
|
80
|
+
simplecov_json_formatter (0.1.4)
|
|
69
81
|
sqlite3 (1.7.3)
|
|
70
82
|
mini_portile2 (~> 2.8.0)
|
|
71
|
-
unicode-display_width (2.
|
|
83
|
+
unicode-display_width (3.2.0)
|
|
84
|
+
unicode-emoji (~> 4.1)
|
|
85
|
+
unicode-emoji (4.2.0)
|
|
72
86
|
|
|
73
87
|
PLATFORMS
|
|
74
88
|
ruby
|
|
@@ -84,4 +98,4 @@ DEPENDENCIES
|
|
|
84
98
|
simplecov (~> 0.16)
|
|
85
99
|
|
|
86
100
|
BUNDLED WITH
|
|
87
|
-
2.
|
|
101
|
+
2.6.9
|
data/README.md
CHANGED
|
@@ -6,11 +6,11 @@ This project is an automated solution for retrieving and collating US and worldw
|
|
|
6
6
|
|
|
7
7
|
## History
|
|
8
8
|
|
|
9
|
-
In 2011, we originally pulled down all the US census data we could find, parsed it and exported it into 3 .csv files.
|
|
9
|
+
In 2011, we originally pulled down all the US census data we could find, parsed it and exported it into 3 .csv files.
|
|
10
10
|
|
|
11
11
|
In 2017 we began using [GeoNames](http://www.geonames.org) data, which is licensed under Creative Commons. We are grateful to [GeoNames](http://www.geonames.org) for sharing, and urge you to [visit their site](http://www.geonames.org) and support their work.
|
|
12
12
|
|
|
13
|
-
In 2018 we refactored the project and made it into a Ruby gem with a command-line executable
|
|
13
|
+
In 2018 we refactored the project and made it into a Ruby gem with a unified command-line executable (`free_zipcode_data`) that handles downloading, processing, and database generation in a single step.
|
|
14
14
|
|
|
15
15
|
## What's Included
|
|
16
16
|
|
|
@@ -22,7 +22,7 @@ See the GeoNames [readme.txt](http://download.geonames.org/export/zip/readme.txt
|
|
|
22
22
|
|
|
23
23
|
## Usage
|
|
24
24
|
|
|
25
|
-
First, you need to install Ruby and Rubygems. Though that is not a difficult task, it is beyond the scope of this README. A search engine of your choice will help discover how to do this. Once you have done that:
|
|
25
|
+
First, you need to install Ruby 3.4+ and Rubygems. Though that is not a difficult task, it is beyond the scope of this README. A search engine of your choice will help discover how to do this. Once you have done that:
|
|
26
26
|
|
|
27
27
|
```bash
|
|
28
28
|
$ gem install free_zipcode_data
|
|
@@ -61,8 +61,6 @@ $ free_zipcode_data --work-dir /tmp/work_dir --country US --generate-files
|
|
|
61
61
|
$ free_zipcode_data --work-dir /tmp/work_dir --generate-files
|
|
62
62
|
```
|
|
63
63
|
|
|
64
|
-
The rake tasks cascade, from the bottom up. So if you run `rake data:populate_db`, it will automatically call `rake data:build` if the .csv files are missing, which will call `rake data:download` if the .zip files are missing.
|
|
65
|
-
|
|
66
64
|
## SQLite3 Database
|
|
67
65
|
|
|
68
66
|
The executable will generate an SQLite3 database in the specified directory `--work-dir` but it will not generate the `.csv` files by default. Specify `--generate-files` if you want those as well.
|
|
@@ -107,6 +105,41 @@ create table zipcodes (
|
|
|
107
105
|
|
|
108
106
|
Both `lat` and `lon`, geocodes, are populated for each zipcode record.
|
|
109
107
|
|
|
108
|
+
## Docker
|
|
109
|
+
|
|
110
|
+
If you prefer not to install Ruby locally, you can use Docker to generate the database. You only need [Docker](https://docs.docker.com/get-docker/) installed.
|
|
111
|
+
|
|
112
|
+
### Build the image
|
|
113
|
+
|
|
114
|
+
```bash
|
|
115
|
+
$ git clone https://github.com/midwire/free_zipcode_data.git
|
|
116
|
+
$ cd free_zipcode_data
|
|
117
|
+
$ docker build -t free_zipcode_data .
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
### Generate data
|
|
121
|
+
|
|
122
|
+
Use the `COUNTRY` environment variable to specify a 2-letter country code. Omit it to process all available countries.
|
|
123
|
+
|
|
124
|
+
**Single country (e.g., US):**
|
|
125
|
+
|
|
126
|
+
```bash
|
|
127
|
+
$ docker run --rm -v $(pwd)/output:/output -e COUNTRY=US free_zipcode_data
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
**All countries:**
|
|
131
|
+
|
|
132
|
+
```bash
|
|
133
|
+
$ docker run --rm -v $(pwd)/output:/output free_zipcode_data
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
The following files will be written to the `./output/` directory on your host:
|
|
137
|
+
|
|
138
|
+
* `free_zipcode_data.sqlite3` - SQLite database with countries, states, counties, and zipcodes tables
|
|
139
|
+
* `countries.csv`, `states.csv`, `counties.csv`, `zipcodes.csv` - CSV exports of each table
|
|
140
|
+
|
|
141
|
+
Look up supported country codes at [GeoNames](http://download.geonames.org/export/zip/).
|
|
142
|
+
|
|
110
143
|
## Data License
|
|
111
144
|
|
|
112
145
|
The zipcode data is licensed under a <a rel="license" href="http://creativecommons.org/licenses/by/3.0/">Creative Commons Attribution 3.0 Unported License</a>, carried forward from [GeoNames](http://www.geonames.org).<br />
|
data/Rakefile
CHANGED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
set -euo pipefail
|
|
3
|
+
|
|
4
|
+
args=(--work-dir /output --generate-files --clobber)
|
|
5
|
+
|
|
6
|
+
if [ -n "${COUNTRY:-}" ]; then
|
|
7
|
+
if [[ ! "$COUNTRY" =~ ^[A-Z]{2}$ ]]; then
|
|
8
|
+
echo "Error: COUNTRY must be a 2-letter uppercase code (e.g., US, AD)" >&2
|
|
9
|
+
exit 1
|
|
10
|
+
fi
|
|
11
|
+
args+=(--country "$COUNTRY")
|
|
12
|
+
fi
|
|
13
|
+
|
|
14
|
+
exec bundle exec ruby bin/free_zipcode_data "${args[@]}"
|
data/free_zipcode_data.gemspec
CHANGED
|
@@ -23,18 +23,12 @@ Gem::Specification.new do |spec|
|
|
|
23
23
|
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
|
24
24
|
spec.require_paths = ['lib']
|
|
25
25
|
|
|
26
|
-
spec.
|
|
27
|
-
spec.
|
|
28
|
-
spec.
|
|
29
|
-
spec.
|
|
30
|
-
spec.
|
|
31
|
-
spec.
|
|
32
|
-
spec.
|
|
33
|
-
|
|
34
|
-
spec.add_runtime_dependency 'colored', '~> 1.2'
|
|
35
|
-
spec.add_runtime_dependency 'kiba', '~> 4.0'
|
|
36
|
-
spec.add_runtime_dependency 'optimist', '~> 3.0'
|
|
37
|
-
spec.add_runtime_dependency 'ruby-progressbar', '~> 1.9'
|
|
38
|
-
spec.add_runtime_dependency 'rubyzip', '>= 1.2.2'
|
|
39
|
-
spec.add_runtime_dependency 'sqlite3', '~> 1.3'
|
|
26
|
+
spec.add_dependency 'colored', '~> 1.2'
|
|
27
|
+
spec.add_dependency 'csv'
|
|
28
|
+
spec.add_dependency 'kiba', '~> 4.0'
|
|
29
|
+
spec.add_dependency 'logger'
|
|
30
|
+
spec.add_dependency 'optimist', '~> 3.0'
|
|
31
|
+
spec.add_dependency 'ruby-progressbar', '~> 1.9'
|
|
32
|
+
spec.add_dependency 'rubyzip', '>= 1.2.2'
|
|
33
|
+
spec.add_dependency 'sqlite3', '~> 1.3'
|
|
40
34
|
end
|
data/lib/etl/common.rb
CHANGED
data/lib/etl/csv_source.rb
CHANGED
|
@@ -14,10 +14,10 @@ class CsvSource
|
|
|
14
14
|
|
|
15
15
|
def each
|
|
16
16
|
CSV.open(filename,
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
17
|
+
col_sep: delimeter,
|
|
18
|
+
headers: headers,
|
|
19
|
+
header_converters: :symbol,
|
|
20
|
+
quote_char: quote_char) do |csv|
|
|
21
21
|
csv.each do |row|
|
|
22
22
|
yield(row.to_hash)
|
|
23
23
|
end
|
|
@@ -25,6 +25,10 @@ module FreeZipcodeData
|
|
|
25
25
|
|
|
26
26
|
def write(row)
|
|
27
27
|
country_hash = country_lookup_table[row[:country]]
|
|
28
|
+
unless country_hash
|
|
29
|
+
warn_once("Skipping unknown country '#{row[:country]}': not in country_lookup_table")
|
|
30
|
+
return update_progress
|
|
31
|
+
end
|
|
28
32
|
|
|
29
33
|
sql = <<-SQL
|
|
30
34
|
INSERT INTO countries (alpha2, alpha3, iso, name)
|
|
@@ -36,8 +40,12 @@ module FreeZipcodeData
|
|
|
36
40
|
|
|
37
41
|
begin
|
|
38
42
|
database.execute(sql)
|
|
39
|
-
rescue SQLite3::ConstraintException
|
|
40
|
-
|
|
43
|
+
rescue SQLite3::ConstraintException => e
|
|
44
|
+
unless e.message.include?('UNIQUE')
|
|
45
|
+
raise "Please file an issue at #{ISSUE_URL}: [#{e}] -> SQL: [#{sql}]"
|
|
46
|
+
end
|
|
47
|
+
rescue StandardError => e
|
|
48
|
+
raise "Please file an issue at #{ISSUE_URL}: [#{e}] -> SQL: [#{sql}]"
|
|
41
49
|
end
|
|
42
50
|
|
|
43
51
|
update_progress
|
|
@@ -26,8 +26,14 @@ module FreeZipcodeData
|
|
|
26
26
|
def write(row)
|
|
27
27
|
return nil unless row[:county]
|
|
28
28
|
|
|
29
|
-
state_id = get_state_id(row[:short_state], row[:state])
|
|
30
|
-
|
|
29
|
+
state_id = get_state_id(row[:country], row[:short_state], row[:state])
|
|
30
|
+
unless state_id
|
|
31
|
+
logger.verbose(
|
|
32
|
+
"Skipping county '#{row[:county]}': no state found for " \
|
|
33
|
+
"abbr='#{row[:short_state]}', country='#{row[:country]}'"
|
|
34
|
+
)
|
|
35
|
+
return nil
|
|
36
|
+
end
|
|
31
37
|
|
|
32
38
|
sql = <<-SQL
|
|
33
39
|
INSERT INTO counties (state_id, abbr, name)
|
|
@@ -39,10 +45,12 @@ module FreeZipcodeData
|
|
|
39
45
|
|
|
40
46
|
begin
|
|
41
47
|
database.execute(sql)
|
|
42
|
-
rescue SQLite3::ConstraintException
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
48
|
+
rescue SQLite3::ConstraintException => e
|
|
49
|
+
unless e.message.include?('UNIQUE')
|
|
50
|
+
raise "Please file an issue at #{ISSUE_URL}: [#{e}] -> SQL: [#{sql}]"
|
|
51
|
+
end
|
|
52
|
+
rescue StandardError => e
|
|
53
|
+
raise "Please file an issue at #{ISSUE_URL}: [#{e}] -> SQL: [#{sql}]"
|
|
46
54
|
end
|
|
47
55
|
|
|
48
56
|
update_progress
|
|
@@ -56,11 +56,11 @@ module FreeZipcodeData
|
|
|
56
56
|
if options[:clobber]
|
|
57
57
|
Zip.on_exists_proc = true
|
|
58
58
|
Logger.instance.verbose("Extracting: #{zipfile}...")
|
|
59
|
-
entry.extract(
|
|
59
|
+
entry.extract(destination_directory: options.work_dir)
|
|
60
60
|
end
|
|
61
61
|
else
|
|
62
62
|
Logger.instance.verbose("Extracting: #{zipfile}...")
|
|
63
|
-
entry.extract(
|
|
63
|
+
entry.extract(destination_directory: options.work_dir)
|
|
64
64
|
end
|
|
65
65
|
break
|
|
66
66
|
end
|