free_zipcode_data 1.0.6 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/.dockerignore +10 -0
  3. data/.gitignore +2 -0
  4. data/.rubocop.yml +25 -16
  5. data/.ruby-version +1 -1
  6. data/CHANGELOG +17 -0
  7. data/CLAUDE.md +89 -0
  8. data/Dockerfile +21 -0
  9. data/Gemfile +10 -0
  10. data/Gemfile.lock +50 -36
  11. data/README.md +38 -5
  12. data/Rakefile +1 -1
  13. data/docker-entrypoint.sh +14 -0
  14. data/free_zipcode_data.gemspec +8 -14
  15. data/lib/etl/common.rb +1 -0
  16. data/lib/etl/csv_source.rb +4 -4
  17. data/lib/free_zipcode_data/country_table.rb +10 -2
  18. data/lib/free_zipcode_data/county_table.rb +14 -6
  19. data/lib/free_zipcode_data/data_source.rb +2 -2
  20. data/lib/free_zipcode_data/db_table.rb +54 -7
  21. data/lib/free_zipcode_data/logger.rb +8 -12
  22. data/lib/free_zipcode_data/runner.rb +2 -2
  23. data/lib/free_zipcode_data/state_table.rb +37 -5
  24. data/lib/free_zipcode_data/version.rb +1 -1
  25. data/lib/free_zipcode_data/zipcode_table.rb +15 -5
  26. data/lib/free_zipcode_data.rb +3 -3
  27. data/lib/tasks/version.rake +27 -24
  28. data/spec/etl/csv_source_spec.rb +57 -0
  29. data/spec/etl/free_zipcode_data_job_spec.rb +135 -0
  30. data/spec/fixtures/.free_zipcode_data.yml +1 -0
  31. data/spec/fixtures/US.txt +5 -0
  32. data/spec/fixtures/US.zip +0 -0
  33. data/spec/fixtures/test_data.csv +7 -0
  34. data/spec/fixtures/test_data.txt +5 -0
  35. data/spec/free_zipcode_data/country_table_spec.rb +52 -0
  36. data/spec/free_zipcode_data/county_table_spec.rb +84 -0
  37. data/spec/free_zipcode_data/data_source_spec.rb +131 -0
  38. data/spec/free_zipcode_data/db_table_spec.rb +164 -0
  39. data/spec/free_zipcode_data/logger_spec.rb +78 -0
  40. data/spec/free_zipcode_data/options_spec.rb +37 -0
  41. data/spec/free_zipcode_data/runner_spec.rb +91 -0
  42. data/spec/free_zipcode_data/sqlite_ram_spec.rb +64 -0
  43. data/spec/free_zipcode_data/state_table_spec.rb +112 -0
  44. data/spec/free_zipcode_data/zipcode_table_spec.rb +102 -0
  45. data/spec/free_zipcode_data_spec.rb +38 -0
  46. data/spec/spec_helper.rb +23 -2
  47. data/spec/support/database_helpers.rb +48 -0
  48. metadata +41 -91
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c86afee1e7972351d5d9f827348d4a0cd5cd3607e5ee5b2b7b4bc73db723bd2c
4
- data.tar.gz: be4ee76892a7ebd900f9182360fd0b6263e9e523d35c50bafba4cf962ed74e8c
3
+ metadata.gz: 5a29b38bacdbf91fb1aad6732c0378eac3588a8bc9efd05c09b48470427da87e
4
+ data.tar.gz: 3b309917f7e87235ddc201f0db852d81b6f1310b85a32c18bc027600eceb34a3
5
5
  SHA512:
6
- metadata.gz: 067d3c28ae188431f5d6418fa195fe72cbba273b266be6cae5aade8156e52d6c99cbe5767b31c3e6ce4214dd70a5969e93b85ea3f66a5eb3fa1434c1af7a2ffc
7
- data.tar.gz: 2e80bb40c1e6b9863548a2f87630ce9e6ca0201d1520703b1b6f63d1b3020eaef8f6ce570df1d73365be3e4d7b7cd79cd5901a4677bf02956b20bbb2ce56e1c0
6
+ metadata.gz: 7d8e5b4a3b22359d46f3def15e4801d6ba39d70a8ec019bb1e302c6f98510b5fe6d0166bb7f9c2d8e35a3fda66572b4f4dbd6bc78a87874819864ea6225f8727
7
+ data.tar.gz: e26f46c2be8dfbb7f69c85a494de27a9607276e6958c56e58b4e63d20a2c9baca9da9fe95a35d4b3cb083f00f24bf0064e1be8aa815b1c6e417ed3cf32b48edd
data/.dockerignore ADDED
@@ -0,0 +1,10 @@
1
+ .git
2
+ vendor/bundle
3
+ pkg
4
+ stubs
5
+ spec
6
+ data
7
+ build
8
+ .claude
9
+ docs
10
+ *.sqlite3
data/.gitignore CHANGED
@@ -5,3 +5,5 @@
5
5
  /stubs
6
6
  /vendor/bundle/
7
7
  /pkg
8
+ .claude/
9
+ docs/plans
data/.rubocop.yml CHANGED
@@ -1,23 +1,24 @@
1
1
  AllCops:
2
- TargetRubyVersion: 2.7
2
+ TargetRubyVersion: 3.4
3
3
 
4
4
  # Include gemspec and Rakefile
5
5
  Include:
6
- - '**/*.gemspec'
7
- - '**/*.podspec'
8
- - '**/*.jbuilder'
9
- - '**/*.rake'
10
- - '**/Gemfile'
11
- - '**/Rakefile'
12
- - '**/Capfile'
13
- - '**/Guardfile'
14
- - '**/Podfile'
15
- - '**/Thorfile'
16
- - '**/Vagrantfile'
6
+ - "**/*.rb"
7
+ - "**/*.gemspec"
8
+ - "**/*.podspec"
9
+ - "**/*.jbuilder"
10
+ - "**/*.rake"
11
+ - "**/Gemfile"
12
+ - "**/Rakefile"
13
+ - "**/Capfile"
14
+ - "**/Guardfile"
15
+ - "**/Podfile"
16
+ - "**/Thorfile"
17
+ - "**/Vagrantfile"
17
18
  Exclude:
18
- - 'vendor/**/*'
19
- - 'stubs/**/*'
20
- - 'spec/support/shared_contexts/*'
19
+ - "vendor/**/*"
20
+ - "stubs/**/*"
21
+ - "spec/support/shared_contexts/*"
21
22
 
22
23
  NewCops: enable
23
24
 
@@ -51,6 +52,10 @@ Style/DoubleNegation:
51
52
  Style/PerlBackrefs:
52
53
  Enabled: false
53
54
 
55
+ Style/OpenStructUse:
56
+ Exclude:
57
+ - "spec/**/*"
58
+
54
59
  ########################################
55
60
  # Lint Cops
56
61
 
@@ -66,6 +71,10 @@ Security/Eval:
66
71
  ########################################
67
72
  # Metrics Cops
68
73
 
74
+ Metrics/BlockLength:
75
+ Exclude:
76
+ - "spec/**/*"
77
+
69
78
  Metrics/MethodLength:
70
79
  CountComments: false # count full line comments?
71
80
  Max: 30
@@ -77,7 +86,7 @@ Metrics/AbcSize:
77
86
  Enabled: false
78
87
 
79
88
  ########################################
80
- # Metrics Cops
89
+ # Naming Cops
81
90
 
82
91
  Naming/FileName:
83
92
  Enabled: false
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 3.0.2
1
+ 3.4.8
data/CHANGELOG CHANGED
@@ -1,3 +1,20 @@
1
+ *1.2.0* (February 17, 2026)
2
+
3
+ * Add Dockerfile for containerized data generation without requiring a local Ruby installation
4
+ * Add docker-entrypoint.sh with COUNTRY environment variable support and input validation
5
+ * Add .dockerignore to keep Docker build context lean
6
+
7
+ *1.1.0* (February 14, 2026)
8
+
9
+ * Fix state abbreviation uniqueness: state lookups are now scoped by country, allowing the same abbreviation (e.g., "NY") in different countries
10
+ * Update state and state_name indexes to include country_id for cross-country uniqueness
11
+ * Synthesize state entries from country data for countries without state/province subdivisions
12
+ * Add logging for silent failure cascade: warn when countries, states, counties, or zipcodes are skipped due to missing lookups
13
+ * Narrow rescue SQLite3::ConstraintException to only swallow UNIQUE violations; re-raise NOT NULL, FOREIGN KEY, and CHECK constraint errors
14
+ * Add comprehensive RSpec test suite with cross-country integration tests
15
+ * Upgrade to Ruby 3.4.8 and fix rubyzip 3.x API compatibility
16
+ * Update README to remove stale rake task references
17
+
1
18
  *1.0.6* (September 30, 2025)
2
19
 
3
20
  * Bump rexml from 3.3.9 to 3.4.2
data/CLAUDE.md ADDED
@@ -0,0 +1,89 @@
1
+ # CLAUDE.md
2
+
3
+ This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
4
+
5
+ ## Project Overview
6
+
7
+ A Ruby gem that downloads postal/zipcode data from GeoNames.org, processes it via an ETL pipeline, and outputs an SQLite3 database and optional CSV files. Supports single-country or all-countries processing.
8
+
9
+ ## Commands
10
+
11
+ ```bash
12
+ # Install dependencies (vendored to vendor/bundle, binstubs in stubs/)
13
+ bundle install
14
+
15
+ # Run all tests
16
+ bundle exec rspec
17
+
18
+ # Run a single test file
19
+ bundle exec rspec spec/path/to/file_spec.rb
20
+
21
+ # Run a specific test by line number
22
+ bundle exec rspec spec/path/to/file_spec.rb:42
23
+
24
+ # Lint
25
+ bundle exec rubocop
26
+
27
+ # Lint with auto-correct
28
+ bundle exec rubocop -a
29
+
30
+ # Version bumping (do on develop branch, not master)
31
+ bundle exec rake version:bump_patch
32
+ bundle exec rake version:bump_minor
33
+ bundle exec rake version:bump_major
34
+
35
+ # Build and install gem
36
+ bundle exec rake build
37
+ bundle exec rake install
38
+
39
+ # Release gem
40
+ bundle exec rake release
41
+ ```
42
+
43
+ ## Architecture
44
+
45
+ The gem follows an ETL (Extract, Transform, Load) pattern using the Kiba gem:
46
+
47
+ 1. **Extract**: `DataSource` downloads zip files from GeoNames.org, extracts them, and prepares CSV files with headers
48
+ 2. **Source**: `CsvSource` (Kiba source) feeds rows from the prepared CSV into the pipeline
49
+ 3. **Load**: Four Kiba destination table classes write rows into an in-memory SQLite database
50
+
51
+ ### Key Flow
52
+
53
+ `bin/free_zipcode_data` → `Runner#start` → `DataSource#download` → `DataSource#datafile` (extract zip + add CSV headers) → `SqliteRam` (in-memory DB) → `ETL::FreeZipcodeDataJob` (Kiba pipeline) → `SqliteRam#save_to_disk`
54
+
55
+ ### Core Classes
56
+
57
+ - **`FreeZipcodeData::Runner`** - CLI entry point; parses args via Optimist, orchestrates the full pipeline
58
+ - **`FreeZipcodeData::DataSource`** - Downloads and extracts GeoNames zip files, prepares CSV with headers
59
+ - **`SqliteRam`** - Wraps SQLite3; works entirely in-memory then saves to disk via `SQLite3::Backup`
60
+ - **`FreeZipcodeData::DbTable`** - Base class for all table classes; provides progress bar, SQL helpers, and country lookup from `country_lookup_table.yml`
61
+ - **`FreeZipcodeData::CountryTable`/`StateTable`/`CountyTable`/`ZipcodeTable`** - Kiba destinations; each has `build` (creates schema + indexes) and `write` (inserts rows, swallows duplicate constraint violations)
62
+ - **`ETL::FreeZipcodeDataJob`** - Configures the Kiba pipeline with one source and four destinations
63
+ - **`CsvSource`** - Kiba-compatible CSV reader
64
+
65
+ ### Singletons
66
+
67
+ `Options` and `Logger` are singletons (via Ruby's `Singleton` module). `Runner` has an `.instance` convenience class method (returns `new` each time, not cached).
68
+
69
+ ## Configuration
70
+
71
+ - `.ruby-version`: 3.4.8
72
+ - Bundle path: `vendor/bundle` (binstubs in `stubs/`)
73
+ - Environment: `APP_ENV` controls environment (`test`, `development`)
74
+ - Config file: `~/.free_zipcode_data.yml` (overridable via `FZD_CONFIG_FILE` env var; uses `spec/fixtures/` version in test)
75
+
76
+ ## Rubocop
77
+
78
+ Key style settings (`.rubocop.yml`):
79
+ - Target Ruby 3.4
80
+ - Max line length: 110
81
+ - Max method length: 30 lines
82
+ - `Style/ClassVars`, `Style/Documentation`, `Metrics/AbcSize`, `Lint/SuppressedException` disabled
83
+ - `vendor/` and `stubs/` excluded
84
+
85
+ ## Git Workflow
86
+
87
+ - `master` is the release branch
88
+ - `develop` is the development branch
89
+ - Version bumps should happen on `develop`, then merge to `master` before `rake release`
data/Dockerfile ADDED
@@ -0,0 +1,21 @@
1
+ FROM ruby:3.4-slim
2
+
3
+ RUN apt-get update && \
4
+ apt-get install -y --no-install-recommends build-essential git pkg-config && \
5
+ rm -rf /var/lib/apt/lists/*
6
+
7
+ WORKDIR /app
8
+
9
+ COPY Gemfile Gemfile.lock free_zipcode_data.gemspec .ruby-version ./
10
+ COPY lib/free_zipcode_data/version.rb lib/free_zipcode_data/version.rb
11
+ RUN git init && git add . && \
12
+ bundle config set --local without development && \
13
+ bundle install
14
+
15
+ COPY . .
16
+ RUN git add .
17
+
18
+ ENV COUNTRY=""
19
+ VOLUME /output
20
+
21
+ ENTRYPOINT ["./docker-entrypoint.sh"]
data/Gemfile CHANGED
@@ -4,3 +4,13 @@ source 'https://rubygems.org'
4
4
  git_source(:github) { |repo| "https://github.com/#{repo}.git" }
5
5
 
6
6
  gemspec
7
+
8
+ group :development do
9
+ gem 'bundler'
10
+ gem 'pry-nav', '~> 0.2'
11
+ gem 'rake', '~> 13.0'
12
+ gem 'rspec', '~> 3.7'
13
+ gem 'rubocop'
14
+ gem 'ruby-prof', '~> 0.17'
15
+ gem 'simplecov', '~> 0.16'
16
+ end
data/Gemfile.lock CHANGED
@@ -1,9 +1,11 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- free_zipcode_data (1.0.6)
4
+ free_zipcode_data (1.2.0)
5
5
  colored (~> 1.2)
6
+ csv
6
7
  kiba (~> 4.0)
8
+ logger
7
9
  optimist (~> 3.0)
8
10
  ruby-progressbar (~> 1.9)
9
11
  rubyzip (>= 1.2.2)
@@ -12,63 +14,75 @@ PATH
12
14
  GEM
13
15
  remote: https://rubygems.org/
14
16
  specs:
15
- ast (2.4.2)
17
+ ast (2.4.3)
16
18
  coderay (1.1.3)
17
19
  colored (1.2)
18
- diff-lcs (1.4.4)
19
- docile (1.4.0)
20
+ csv (3.3.5)
21
+ diff-lcs (1.6.2)
22
+ docile (1.4.1)
23
+ json (2.18.1)
20
24
  kiba (4.0.0)
25
+ language_server-protocol (3.17.0.5)
26
+ lint_roller (1.1.0)
27
+ logger (1.7.0)
21
28
  method_source (0.9.2)
22
29
  mini_portile2 (2.8.9)
23
30
  optimist (3.2.1)
24
- parallel (1.21.0)
25
- parser (3.0.2.0)
31
+ parallel (1.27.0)
32
+ parser (3.3.10.1)
26
33
  ast (~> 2.4.1)
34
+ racc
35
+ prism (1.9.0)
27
36
  pry (0.12.2)
28
37
  coderay (~> 1.1.0)
29
38
  method_source (~> 0.9.0)
30
39
  pry-nav (0.3.0)
31
40
  pry (>= 0.9.10, < 0.13.0)
32
- rainbow (3.0.0)
33
- rake (13.0.6)
34
- regexp_parser (2.1.1)
35
- rexml (3.4.2)
36
- rspec (3.10.0)
37
- rspec-core (~> 3.10.0)
38
- rspec-expectations (~> 3.10.0)
39
- rspec-mocks (~> 3.10.0)
40
- rspec-core (3.10.1)
41
- rspec-support (~> 3.10.0)
42
- rspec-expectations (3.10.1)
41
+ racc (1.8.1)
42
+ rainbow (3.1.1)
43
+ rake (13.3.1)
44
+ regexp_parser (2.11.3)
45
+ rspec (3.13.2)
46
+ rspec-core (~> 3.13.0)
47
+ rspec-expectations (~> 3.13.0)
48
+ rspec-mocks (~> 3.13.0)
49
+ rspec-core (3.13.6)
50
+ rspec-support (~> 3.13.0)
51
+ rspec-expectations (3.13.5)
43
52
  diff-lcs (>= 1.2.0, < 2.0)
44
- rspec-support (~> 3.10.0)
45
- rspec-mocks (3.10.2)
53
+ rspec-support (~> 3.13.0)
54
+ rspec-mocks (3.13.7)
46
55
  diff-lcs (>= 1.2.0, < 2.0)
47
- rspec-support (~> 3.10.0)
48
- rspec-support (3.10.3)
49
- rubocop (1.22.3)
56
+ rspec-support (~> 3.13.0)
57
+ rspec-support (3.13.7)
58
+ rubocop (1.84.2)
59
+ json (~> 2.3)
60
+ language_server-protocol (~> 3.17.0.2)
61
+ lint_roller (~> 1.1.0)
50
62
  parallel (~> 1.10)
51
- parser (>= 3.0.0.0)
63
+ parser (>= 3.3.0.2)
52
64
  rainbow (>= 2.2.2, < 4.0)
53
- regexp_parser (>= 1.8, < 3.0)
54
- rexml
55
- rubocop-ast (>= 1.12.0, < 2.0)
65
+ regexp_parser (>= 2.9.3, < 3.0)
66
+ rubocop-ast (>= 1.49.0, < 2.0)
56
67
  ruby-progressbar (~> 1.7)
57
- unicode-display_width (>= 1.4.0, < 3.0)
58
- rubocop-ast (1.12.0)
59
- parser (>= 3.0.1.1)
68
+ unicode-display_width (>= 2.4.0, < 4.0)
69
+ rubocop-ast (1.49.0)
70
+ parser (>= 3.3.7.2)
71
+ prism (~> 1.7)
60
72
  ruby-prof (0.18.0)
61
- ruby-progressbar (1.11.0)
62
- rubyzip (3.1.1)
63
- simplecov (0.21.2)
73
+ ruby-progressbar (1.13.0)
74
+ rubyzip (3.2.2)
75
+ simplecov (0.22.0)
64
76
  docile (~> 1.1)
65
77
  simplecov-html (~> 0.11)
66
78
  simplecov_json_formatter (~> 0.1)
67
- simplecov-html (0.12.3)
68
- simplecov_json_formatter (0.1.3)
79
+ simplecov-html (0.13.2)
80
+ simplecov_json_formatter (0.1.4)
69
81
  sqlite3 (1.7.3)
70
82
  mini_portile2 (~> 2.8.0)
71
- unicode-display_width (2.1.0)
83
+ unicode-display_width (3.2.0)
84
+ unicode-emoji (~> 4.1)
85
+ unicode-emoji (4.2.0)
72
86
 
73
87
  PLATFORMS
74
88
  ruby
@@ -84,4 +98,4 @@ DEPENDENCIES
84
98
  simplecov (~> 0.16)
85
99
 
86
100
  BUNDLED WITH
87
- 2.2.22
101
+ 2.6.9
data/README.md CHANGED
@@ -6,11 +6,11 @@ This project is an automated solution for retrieving and collating US and worldw
6
6
 
7
7
  ## History
8
8
 
9
- In 2011, we originally pulled down all the US census data we could find, parsed it and exported it into 3 .csv files. Later, we wrote 3 rake tasks to automate this process.
9
+ In 2011, we originally pulled down all the US census data we could find, parsed it and exported it into 3 .csv files.
10
10
 
11
11
  In 2017 we began using [GeoNames](http://www.geonames.org) data, which is licensed under Creative Commons. We are grateful to [GeoNames](http://www.geonames.org) for sharing, and urge you to [visit their site](http://www.geonames.org) and support their work.
12
12
 
13
- In 2018 we refactored the project and made it into a Ruby gem with a command-line executable for automating this process.
13
+ In 2018 we refactored the project and made it into a Ruby gem with a unified command-line executable (`free_zipcode_data`) that handles downloading, processing, and database generation in a single step.
14
14
 
15
15
  ## What's Included
16
16
 
@@ -22,7 +22,7 @@ See the GeoNames [readme.txt](http://download.geonames.org/export/zip/readme.txt
22
22
 
23
23
  ## Usage
24
24
 
25
- First, you need to install Ruby and Rubygems. Though that is not a difficult task, it is beyond the scope of this README. A search engine of your choice will help discover how to do this. Once you have done that:
25
+ First, you need to install Ruby 3.4+ and Rubygems. Though that is not a difficult task, it is beyond the scope of this README. A search engine of your choice will help discover how to do this. Once you have done that:
26
26
 
27
27
  ```bash
28
28
  $ gem install free_zipcode_data
@@ -61,8 +61,6 @@ $ free_zipcode_data --work-dir /tmp/work_dir --country US --generate-files
61
61
  $ free_zipcode_data --work-dir /tmp/work_dir --generate-files
62
62
  ```
63
63
 
64
- The rake tasks cascade, from the bottom up. So if you run `rake data:populate_db`, it will automatically call `rake data:build` if the .csv files are missing, which will call `rake data:download` if the .zip files are missing.
65
-
66
64
  ## SQLite3 Database
67
65
 
68
66
  The executable will generate an SQLite3 database in the specified directory `--work-dir` but it will not generate the `.csv` files by default. Specify `--generate-files` if you want those as well.
@@ -107,6 +105,41 @@ create table zipcodes (
107
105
 
108
106
  Both `lat` and `lon`, geocodes, are populated for each zipcode record.
109
107
 
108
+ ## Docker
109
+
110
+ If you prefer not to install Ruby locally, you can use Docker to generate the database. You only need [Docker](https://docs.docker.com/get-docker/) installed.
111
+
112
+ ### Build the image
113
+
114
+ ```bash
115
+ $ git clone https://github.com/midwire/free_zipcode_data.git
116
+ $ cd free_zipcode_data
117
+ $ docker build -t free_zipcode_data .
118
+ ```
119
+
120
+ ### Generate data
121
+
122
+ Use the `COUNTRY` environment variable to specify a 2-letter country code. Omit it to process all available countries.
123
+
124
+ **Single country (e.g., US):**
125
+
126
+ ```bash
127
+ $ docker run --rm -v $(pwd)/output:/output -e COUNTRY=US free_zipcode_data
128
+ ```
129
+
130
+ **All countries:**
131
+
132
+ ```bash
133
+ $ docker run --rm -v $(pwd)/output:/output free_zipcode_data
134
+ ```
135
+
136
+ The following files will be written to the `./output/` directory on your host:
137
+
138
+ * `free_zipcode_data.sqlite3` - SQLite database with countries, states, counties, and zipcodes tables
139
+ * `countries.csv`, `states.csv`, `counties.csv`, `zipcodes.csv` - CSV exports of each table
140
+
141
+ Look up supported country codes at [GeoNames](http://download.geonames.org/export/zip/).
142
+
110
143
  ## Data License
111
144
 
112
145
  The zipcode data is licensed under a <a rel="license" href="http://creativecommons.org/licenses/by/3.0/">Creative Commons Attribution 3.0 Unported License</a>, carried forward from [GeoNames](http://www.geonames.org).<br />
data/Rakefile CHANGED
@@ -4,7 +4,7 @@ require 'rubygems'
4
4
  require 'bundler/setup'
5
5
 
6
6
  require 'rake'
7
- Dir['lib/tasks/**/*.rake'].sort.each { |ext| load ext }
7
+ Dir['lib/tasks/**/*.rake'].each { |ext| load ext }
8
8
 
9
9
  # Install rubygem tasks
10
10
  Bundler::GemHelper.install_tasks
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ args=(--work-dir /output --generate-files --clobber)
5
+
6
+ if [ -n "${COUNTRY:-}" ]; then
7
+ if [[ ! "$COUNTRY" =~ ^[A-Z]{2}$ ]]; then
8
+ echo "Error: COUNTRY must be a 2-letter uppercase code (e.g., US, AD)" >&2
9
+ exit 1
10
+ fi
11
+ args+=(--country "$COUNTRY")
12
+ fi
13
+
14
+ exec bundle exec ruby bin/free_zipcode_data "${args[@]}"
@@ -23,18 +23,12 @@ Gem::Specification.new do |spec|
23
23
  spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
24
24
  spec.require_paths = ['lib']
25
25
 
26
- spec.add_development_dependency 'bundler'
27
- spec.add_development_dependency 'pry-nav', '~> 0.2'
28
- spec.add_development_dependency 'rake', '~> 13.0'
29
- spec.add_development_dependency 'rspec', '~> 3.7'
30
- spec.add_development_dependency 'rubocop'
31
- spec.add_development_dependency 'ruby-prof', '~> 0.17'
32
- spec.add_development_dependency 'simplecov', '~> 0.16'
33
-
34
- spec.add_runtime_dependency 'colored', '~> 1.2'
35
- spec.add_runtime_dependency 'kiba', '~> 4.0'
36
- spec.add_runtime_dependency 'optimist', '~> 3.0'
37
- spec.add_runtime_dependency 'ruby-progressbar', '~> 1.9'
38
- spec.add_runtime_dependency 'rubyzip', '>= 1.2.2'
39
- spec.add_runtime_dependency 'sqlite3', '~> 1.3'
26
+ spec.add_dependency 'colored', '~> 1.2'
27
+ spec.add_dependency 'csv'
28
+ spec.add_dependency 'kiba', '~> 4.0'
29
+ spec.add_dependency 'logger'
30
+ spec.add_dependency 'optimist', '~> 3.0'
31
+ spec.add_dependency 'ruby-progressbar', '~> 1.9'
32
+ spec.add_dependency 'rubyzip', '>= 1.2.2'
33
+ spec.add_dependency 'sqlite3', '~> 1.3'
40
34
  end
data/lib/etl/common.rb CHANGED
@@ -16,6 +16,7 @@ end
16
16
  def limit(count)
17
17
  count = Integer(count || -1)
18
18
  return if count == -1
19
+
19
20
  transform do |row|
20
21
  @counter ||= 0
21
22
  @counter += 1
@@ -14,10 +14,10 @@ class CsvSource
14
14
 
15
15
  def each
16
16
  CSV.open(filename,
17
- col_sep: delimeter,
18
- headers: headers,
19
- header_converters: :symbol,
20
- quote_char: quote_char) do |csv|
17
+ col_sep: delimeter,
18
+ headers: headers,
19
+ header_converters: :symbol,
20
+ quote_char: quote_char) do |csv|
21
21
  csv.each do |row|
22
22
  yield(row.to_hash)
23
23
  end
@@ -25,6 +25,10 @@ module FreeZipcodeData
25
25
 
26
26
  def write(row)
27
27
  country_hash = country_lookup_table[row[:country]]
28
+ unless country_hash
29
+ warn_once("Skipping unknown country '#{row[:country]}': not in country_lookup_table")
30
+ return update_progress
31
+ end
28
32
 
29
33
  sql = <<-SQL
30
34
  INSERT INTO countries (alpha2, alpha3, iso, name)
@@ -36,8 +40,12 @@ module FreeZipcodeData
36
40
 
37
41
  begin
38
42
  database.execute(sql)
39
- rescue SQLite3::ConstraintException
40
- # Swallow duplicates
43
+ rescue SQLite3::ConstraintException => e
44
+ unless e.message.include?('UNIQUE')
45
+ raise "Please file an issue at #{ISSUE_URL}: [#{e}] -> SQL: [#{sql}]"
46
+ end
47
+ rescue StandardError => e
48
+ raise "Please file an issue at #{ISSUE_URL}: [#{e}] -> SQL: [#{sql}]"
41
49
  end
42
50
 
43
51
  update_progress
@@ -26,8 +26,14 @@ module FreeZipcodeData
26
26
  def write(row)
27
27
  return nil unless row[:county]
28
28
 
29
- state_id = get_state_id(row[:short_state], row[:state])
30
- return nil unless state_id
29
+ state_id = get_state_id(row[:country], row[:short_state], row[:state])
30
+ unless state_id
31
+ logger.verbose(
32
+ "Skipping county '#{row[:county]}': no state found for " \
33
+ "abbr='#{row[:short_state]}', country='#{row[:country]}'"
34
+ )
35
+ return nil
36
+ end
31
37
 
32
38
  sql = <<-SQL
33
39
  INSERT INTO counties (state_id, abbr, name)
@@ -39,10 +45,12 @@ module FreeZipcodeData
39
45
 
40
46
  begin
41
47
  database.execute(sql)
42
- rescue SQLite3::ConstraintException
43
- # swallow duplicates
44
- rescue StandardError => err
45
- raise "Please file an issue at #{ISSUE_URL}: [#{err}] -> SQL: [#{sql}]"
48
+ rescue SQLite3::ConstraintException => e
49
+ unless e.message.include?('UNIQUE')
50
+ raise "Please file an issue at #{ISSUE_URL}: [#{e}] -> SQL: [#{sql}]"
51
+ end
52
+ rescue StandardError => e
53
+ raise "Please file an issue at #{ISSUE_URL}: [#{e}] -> SQL: [#{sql}]"
46
54
  end
47
55
 
48
56
  update_progress
@@ -56,11 +56,11 @@ module FreeZipcodeData
56
56
  if options[:clobber]
57
57
  Zip.on_exists_proc = true
58
58
  Logger.instance.verbose("Extracting: #{zipfile}...")
59
- entry.extract(country_file)
59
+ entry.extract(destination_directory: options.work_dir)
60
60
  end
61
61
  else
62
62
  Logger.instance.verbose("Extracting: #{zipfile}...")
63
- entry.extract(country_file)
63
+ entry.extract(destination_directory: options.work_dir)
64
64
  end
65
65
  break
66
66
  end