adminbounds 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. adminbounds-0.1.0/.env.example +6 -0
  2. adminbounds-0.1.0/.git +1 -0
  3. adminbounds-0.1.0/.gitignore +39 -0
  4. adminbounds-0.1.0/LICENSE +21 -0
  5. adminbounds-0.1.0/PKG-INFO +367 -0
  6. adminbounds-0.1.0/README.md +316 -0
  7. adminbounds-0.1.0/main.py +6 -0
  8. adminbounds-0.1.0/origin-data/download_districts.py +136 -0
  9. adminbounds-0.1.0/origin-data/merge_direct_districts.py +91 -0
  10. adminbounds-0.1.0/pyproject.toml +54 -0
  11. adminbounds-0.1.0/sql/functions/infer_admin_semantic_relation.sql +355 -0
  12. adminbounds-0.1.0/sql/schema/01_admin_units.sql +34 -0
  13. adminbounds-0.1.0/sql/schema/02_thematic_admin_relations.sql +34 -0
  14. adminbounds-0.1.0/src/adminbounds/__init__.py +7 -0
  15. adminbounds-0.1.0/src/adminbounds/_annotate.py +125 -0
  16. adminbounds-0.1.0/src/adminbounds/_diagnose.py +153 -0
  17. adminbounds-0.1.0/src/adminbounds/_gadm.py +243 -0
  18. adminbounds-0.1.0/src/adminbounds/_import.py +198 -0
  19. adminbounds-0.1.0/src/adminbounds/_upload.py +49 -0
  20. adminbounds-0.1.0/src/adminbounds/cli/__init__.py +158 -0
  21. adminbounds-0.1.0/src/adminbounds/client.py +136 -0
  22. adminbounds-0.1.0/src/adminbounds/config.py +31 -0
  23. adminbounds-0.1.0/src/adminbounds/data/china.geojson +1 -0
  24. adminbounds-0.1.0/src/adminbounds/data/china_city.geojson +1 -0
  25. adminbounds-0.1.0/src/adminbounds/data/china_district.geojson +1 -0
  26. adminbounds-0.1.0/src/adminbounds/data/china_state.geojson +1 -0
  27. adminbounds-0.1.0/src/adminbounds/db.py +21 -0
  28. adminbounds-0.1.0/src/adminbounds/sql/functions/infer_admin_semantic_relation.sql +355 -0
  29. adminbounds-0.1.0/src/adminbounds/sql/schema/01_admin_units.sql +34 -0
  30. adminbounds-0.1.0/src/adminbounds/sql/schema/02_thematic_admin_relations.sql +34 -0
  31. adminbounds-0.1.0/uv.lock +680 -0
  32. adminbounds-0.1.0/validation/sample_queries.sql +58 -0
@@ -0,0 +1,6 @@
1
+ ADMINBOUNDS_DB_HOST=localhost
2
+ ADMINBOUNDS_DB_PORT=5432
3
+ ADMINBOUNDS_DB_NAME=DBNAME
4
+ ADMINBOUNDS_DB_USER=USERNAME
5
+ ADMINBOUNDS_DB_PASSWORD=your_password_here
6
+ ADMINBOUNDS_DB_SCHEMA=adminbounds
adminbounds-0.1.0/.git ADDED
@@ -0,0 +1 @@
1
+ gitdir: ../.git/modules/geo-admin-v2
@@ -0,0 +1,39 @@
1
+ # Environment
2
+ .env
3
+ *.env
4
+
5
+ # Python
6
+ __pycache__/
7
+ *.py[cod]
8
+ *.pyo
9
+ .venv/
10
+ *.egg-info/
11
+ dist/
12
+ build/
13
+
14
+ # uv
15
+ .python-version
16
+
17
+ # Data files (large geospatial data — use origin-data/ only locally)
18
+ origin-data/china/
19
+ origin-data/cache/
20
+ *.geojson
21
+ # Exception: bundled package data must be tracked
22
+ !src/adminbounds/data/*.geojson
23
+ *.shp
24
+ *.shx
25
+ *.dbf
26
+ *.prj
27
+ *.cpg
28
+ *.gpkg
29
+ *.sqlite
30
+ *.db
31
+
32
+ # IDE / OS
33
+ .vscode/
34
+ .idea/
35
+ .DS_Store
36
+ Thumbs.db
37
+
38
+ # Claude Code local settings
39
+ .claude/settings.local.json
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Zihao Tang
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,367 @@
1
+ Metadata-Version: 2.4
2
+ Name: adminbounds
3
+ Version: 0.1.0
4
+ Summary: Administrative boundary semantic relation inference for geospatial datasets
5
+ Project-URL: Homepage, https://github.com/JohnnnyTang/admin-bounds
6
+ Project-URL: Repository, https://github.com/JohnnnyTang/admin-bounds
7
+ Project-URL: Bug Tracker, https://github.com/JohnnnyTang/admin-bounds/issues
8
+ Author: GeoVecPrism Contributors
9
+ License: MIT License
10
+
11
+ Copyright (c) 2026 Zihao Tang
12
+
13
+ Permission is hereby granted, free of charge, to any person obtaining a copy
14
+ of this software and associated documentation files (the "Software"), to deal
15
+ in the Software without restriction, including without limitation the rights
16
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17
+ copies of the Software, and to permit persons to whom the Software is
18
+ furnished to do so, subject to the following conditions:
19
+
20
+ The above copyright notice and this permission notice shall be included in all
21
+ copies or substantial portions of the Software.
22
+
23
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29
+ SOFTWARE.
30
+ License-File: LICENSE
31
+ Keywords: GADM,GIS,PostGIS,administrative boundaries,geospatial,spatial inference
32
+ Classifier: Development Status :: 3 - Alpha
33
+ Classifier: Intended Audience :: Developers
34
+ Classifier: Intended Audience :: Science/Research
35
+ Classifier: License :: OSI Approved :: MIT License
36
+ Classifier: Programming Language :: Python :: 3
37
+ Classifier: Programming Language :: Python :: 3.12
38
+ Classifier: Topic :: Database
39
+ Classifier: Topic :: Scientific/Engineering :: GIS
40
+ Requires-Python: >=3.12
41
+ Requires-Dist: geoalchemy2>=0.18.4
42
+ Requires-Dist: geopandas>=0.14
43
+ Requires-Dist: pip>=26.0.1
44
+ Requires-Dist: psycopg2-binary>=2.9
45
+ Requires-Dist: pydantic-settings>=2.0
46
+ Requires-Dist: python-dotenv>=1.0
47
+ Requires-Dist: shapely>=2.0
48
+ Requires-Dist: sqlalchemy>=2.0
49
+ Requires-Dist: tqdm>=4.66
50
+ Description-Content-Type: text/markdown
51
+
52
+ # adminbounds
53
+
54
+ Geospatial admin-unit semantic relation inference system for worldwide administrative boundaries.
55
+
56
+ Given any vector geometry, the system infers **how it relates to an administrative hierarchy** — whether it coincides with a known boundary, intersects multiple units, contains child units, or sits inside a parent region. Results are stored as structured JSONB annotations and are queryable at scale via PostGIS.
57
+
58
+ Bundled data covers China's four-level hierarchy. Additional countries can be downloaded on demand via GADM 4.1.
59
+
60
+ ---
61
+
62
+ ## What It Does
63
+
64
+ The core is a PostGIS function `infer_admin_semantic_relation(geom)` that classifies a geometry into four relationship types:
65
+
66
+ | Relationship | Meaning | Example |
67
+ |---|---|---|
68
+ | `coincides_with` | Substantially overlaps a known boundary (IoU ≥ 0.85) | A polygon matching Beijing municipality exactly |
69
+ | `intersects_with` | Partially overlaps units at the dominant level | A corridor crossing Nanjing and Suzhou |
70
+ | `covers_children` | The geometry contains child-level units | A province polygon covering its cities |
71
+ | `contained_by` | The ancestor chain above the matched unit | A city → its province → country |
72
+
73
+ The function returns a single JSONB blob with all four arrays plus a scalar `admin_level_match` and `confidence` score. A Python batch script stores results in a `thematic_admin_relations` table, linking any source feature table to its administrative context.
74
+
75
+ ---
76
+
77
+ ## Project Structure
78
+
79
+ ```
80
+ adminbounds/
81
+ ├── src/adminbounds/
82
+ │ ├── _import.py # DDL deploy + bundled boundary import pipeline
83
+ │ ├── _gadm.py # GADM 4.1 worldwide download + import
84
+ │ ├── _annotate.py # Batch annotation logic
85
+ │ ├── _upload.py # GeoJSON → PostGIS upload helper
86
+ │ ├── _diagnose.py # Annotation diagnostic checks
87
+ │ ├── client.py # AdminBoundsClient high-level Python API
88
+ │ ├── config.py # Pydantic settings (ADMINBOUNDS_DB_* env vars)
89
+ │ ├── db.py # SQLAlchemy engine + raw psycopg2 connection
90
+ │ ├── cli/__init__.py # CLI entry point (adminbounds command)
91
+ │ └── sql/
92
+ │ ├── schema/
93
+ │ │ ├── 01_admin_units.sql
94
+ │ │ └── 02_thematic_admin_relations.sql
95
+ │ └── functions/
96
+ │ └── infer_admin_semantic_relation.sql
97
+ ├── sql/ # Source copies of the SQL files (mirrors src/adminbounds/sql/)
98
+ ├── validation/
99
+ │ └── sample_queries.sql # Post-import validation and smoke tests
100
+ ├── .env.example
101
+ └── pyproject.toml
102
+ ```
103
+
104
+ ---
105
+
106
+ ## Prerequisites
107
+
108
+ - Python 3.12+
109
+ - [uv](https://github.com/astral-sh/uv) package manager
110
+ - PostgreSQL 14+ with PostGIS 3.x extension enabled on the target database
111
+
112
+ ---
113
+
114
+ ## Setup
115
+
116
+ **1. Install dependencies**
117
+
118
+ ```bash
119
+ uv sync
120
+ ```
121
+
122
+ **2. Configure environment**
123
+
124
+ ```bash
125
+ cp .env.example .env
126
+ ```
127
+
128
+ Edit `.env` with your database credentials:
129
+
130
+ ```dotenv
131
+ GEO_ADMIN_DB_HOST=localhost
132
+ GEO_ADMIN_DB_PORT=5432
133
+ GEO_ADMIN_DB_NAME=geo_prism
134
+ GEO_ADMIN_DB_USER=postgres
135
+ GEO_ADMIN_DB_PASSWORD=your_password_here
136
+ GEO_ADMIN_DB_SCHEMA=public
137
+ ```
138
+
139
+ **3. Ensure PostGIS is enabled**
140
+
141
+ ```sql
142
+ CREATE EXTENSION IF NOT EXISTS postgis;
143
+ ```
144
+
145
+ ---
146
+
147
+ ## Usage
148
+
149
+ ### Initialize the database
150
+
151
+ Creates the `adminbounds` schema, tables, and deploys the inference function. Safe to re-run — also applies any pending schema migrations (e.g. widening `adcode` from `VARCHAR(6)` to `TEXT` for GADM compatibility).
152
+
153
+ ```bash
154
+ adminbounds init-db
155
+ ```
156
+
157
+ ### Import bundled Chinese boundaries
158
+
159
+ ```bash
160
+ adminbounds import-boundaries
161
+ ```
162
+
163
+ Loads four GeoJSON files into `admin_units`, computes derived geometry columns (bbox, convex hull, simplified geometry, centroid, area), and deploys the inference function. Idempotent — re-running updates existing rows.
164
+
165
+ ### Download GADM worldwide boundaries
166
+
167
+ ```bash
168
+ adminbounds download-gadm Germany
169
+ adminbounds download-gadm DEU # same — ISO3 code accepted
170
+ adminbounds download-gadm USA --levels 0,1 # country + state only (level 2+ is large)
171
+ adminbounds download-gadm France --force # re-download even if cached
172
+ adminbounds download-gadm Japan --cache-dir /tmp/gadm
173
+ ```
174
+
175
+ Downloads GADM 4.1 GeoJSON zips from the UC Davis CDN, extracts, maps to the `admin_units` schema, and upserts. Files are cached in `~/.adminbounds/gadm_cache/` by default.
176
+
177
+ **GADM level → DB level mapping:**
178
+
179
+ | GADM level | Meaning | DB `level` value |
180
+ |---|---|---|
181
+ | 0 | Country | 1 |
182
+ | 1 | State / Province | 2 |
183
+ | 2 | County / City | 3 |
184
+ | 3 | Municipality / District | 4 |
185
+
186
+ **GADM field → `admin_units` column mapping:**
187
+
188
+ | `admin_units` column | GADM level 0 | GADM level 1 | GADM level 2 | GADM level 3 |
189
+ |---|---|---|---|---|
190
+ | `adcode` | `GID_0` | `GID_1` | `GID_2` | `GID_3` |
191
+ | `name` | `NAME_0` | `NAME_1` | `NAME_2` | `NAME_3` |
192
+ | `level` | `1` | `2` | `3` | `4` |
193
+ | `parent_code` | `NULL` | `GID_0` | `GID_1` | `GID_2` |
194
+ | `geom` | geometry | geometry | geometry | geometry |
195
+
196
+ GADM GIDs look like `DEU`, `DEU.1_1`, `DEU.1.2_1` — the `adcode` column is `TEXT` (not `VARCHAR`) to accommodate these.
197
+
198
+ ### Upload a GeoJSON file
199
+
200
+ ```bash
201
+ adminbounds upload path/to/file.geojson my_table
202
+ adminbounds upload path/to/file.geojson my_table --if-exists append
203
+ ```
204
+
205
+ ### Annotate a thematic table
206
+
207
+ ```bash
208
+ adminbounds annotate --source-table sample_pois --geom-col geom
209
+ adminbounds annotate --source-table sample_pois --geom-col geom --batch-size 200
210
+ ```
211
+
212
+ Resume-safe: only processes rows not yet present in `thematic_admin_relations`.
213
+
214
+ ### Diagnose annotation issues
215
+
216
+ ```bash
217
+ adminbounds diagnose --source-table sample_pois --geom-col geom
218
+ ```
219
+
220
+ ### Python API
221
+
222
+ ```python
223
+ from adminbounds import AdminBoundsClient
224
+
225
+ c = AdminBoundsClient(dbname="geo_prism", user="postgres", password="...")
226
+
227
+ # Setup
228
+ c.init_db()
229
+ c.import_boundaries() # bundled China data
230
+
231
+ # GADM worldwide
232
+ c.download_gadm("Germany") # all 4 levels
233
+ c.download_gadm("DEU") # same via ISO3 code
234
+ c.download_gadm("USA", levels=[0, 1]) # country + state only
235
+
236
+ # Inference
237
+ from shapely.geometry import box
238
+ result = c.infer(box(116.3, 39.8, 116.5, 40.0))
239
+ print(result["coincides_with"])
240
+
241
+ # Batch annotation
242
+ c.annotate("sample_pois", geom_col="geom")
243
+ ```
244
+
245
+ All CLI connection flags (`--host`, `--port`, `--dbname`, `--user`, `--password`) fall back to `GEO_ADMIN_DB_*` environment variables.
246
+
247
+ ---
248
+
249
+ ## Database Schema
250
+
251
+ ### `admin_units`
252
+
253
+ Stores administrative boundaries at four levels (1=country, 2=province/state, 3=city/county, 4=district/municipality). Supports both Chinese numeric adcodes (`100000`) and GADM GIDs (`DEU.1_1`).
254
+
255
+ | Column | Type | Description |
256
+ |---|---|---|
257
+ | `adcode` | TEXT | Unique admin code — 6-digit numeric for China, GADM GID for other countries |
258
+ | `name` | TEXT | Place name |
259
+ | `level` | INTEGER | 1=country, 2=province, 3=city, 4=district |
260
+ | `parent_code` | TEXT | Parent `adcode` (NULL for level=1) |
261
+ | `geom` | GEOMETRY | Full boundary polygon |
262
+ | `geom_bbox` | GEOMETRY | Bounding box (fast coarse filter) |
263
+ | `geom_hull` | GEOMETRY | Convex hull (medium filter) |
264
+ | `geom_simple` | GEOMETRY | Simplified geometry for complex polygons |
265
+ | `centroid` | GEOMETRY | Centroid point |
266
+ | `area_m2` | FLOAT8 | Area in square metres |
267
+ | `vertex_count` | INTEGER | Vertex count (drives simplification choice) |
268
+
269
+ ### `thematic_admin_relations`
270
+
271
+ Stores per-feature annotation results linking any source table to its administrative context.
272
+
273
+ | Column | Type | Description |
274
+ |---|---|---|
275
+ | `source_table` | TEXT | Name of the annotated table |
276
+ | `geom_hash` | TEXT | MD5 of `ST_AsEWKB(geom)` — deduplication key |
277
+ | `admin_level_match` | INTEGER | Dominant admin level of the match |
278
+ | `confidence` | FLOAT8 | 0–1 score |
279
+ | `coincides_with` | JSONB | Array of coinciding units |
280
+ | `intersects_with` | JSONB | Array of intersecting units |
281
+ | `covers_children` | JSONB | Array of child units covered |
282
+ | `contained_by` | JSONB | Ancestor chain |
283
+
284
+ ---
285
+
286
+ ## Inference Function
287
+
288
+ ```sql
289
+ SELECT adminbounds.infer_admin_semantic_relation(ST_GeomFromText('POLYGON(...)', 4326));
290
+ ```
291
+
292
+ **Example output (Chinese boundary):**
293
+
294
+ ```json
295
+ {
296
+ "coincides_with": [{"code": "110000", "name": "北京市", "level": 2, "similarity": 0.9731}],
297
+ "intersects_with": [],
298
+ "covers_children": [{"code": "110101", "name": "东城区", "level": 4}],
299
+ "contained_by": [{"code": "100000", "name": "中国", "level": 1}],
300
+ "admin_level_match": 2,
301
+ "confidence": 0.9866
302
+ }
303
+ ```
304
+
305
+ **Example output (German boundary after `download-gadm Germany`):**
306
+
307
+ ```json
308
+ {
309
+ "coincides_with": [{"code": "DEU.1_1", "name": "Baden-Württemberg", "level": 2, "similarity": 0.9812}],
310
+ "intersects_with": [],
311
+ "covers_children": [{"code": "DEU.1.1_1", "name": "Freiburg im Breisgau", "level": 3}],
312
+ "contained_by": [{"code": "DEU", "name": "Germany", "level": 1}],
313
+ "admin_level_match": 2,
314
+ "confidence": 0.9906
315
+ }
316
+ ```
317
+
318
+ **Three-layer spatial filter** (performance):
319
+ 1. Bounding box overlap — GIST index scan
320
+ 2. Convex hull intersection — narrows candidates
321
+ 3. Actual geometry intersection — precise check (uses simplified geometry for polygons with >500 vertices)
322
+
323
+ **Similarity metric** (for `coincides_with`, threshold IoU ≥ 0.85):
324
+
325
+ ```
326
+ similarity = 0.5 × IoU + 0.3 × area_ratio + 0.2 × (1 − normalised_centroid_offset)
327
+ ```
328
+
329
+ > **Note:** The `contained_by` fallback in the PL/pgSQL function uses substring-based ancestor lookup tuned for 6-digit Chinese codes. For GADM GIDs the primary parent-chain walkup (via `parent_code`) is used instead and works correctly. The substring fallback is only triggered when no parent-chain match is found, so GADM data is fully functional.
330
+
331
+ ---
332
+
333
+ ## Querying Results
334
+
335
+ **Verify imported GADM data:**
336
+
337
+ ```sql
338
+ SELECT level, COUNT(*) FROM adminbounds.admin_units GROUP BY level ORDER BY level;
339
+ SELECT adcode, name, level FROM adminbounds.admin_units WHERE adcode LIKE 'DEU%' LIMIT 10;
340
+ ```
341
+
342
+ **Find all features that coincide with a specific province:**
343
+
344
+ ```sql
345
+ SELECT source_table, geom_hash
346
+ FROM thematic_admin_relations
347
+ WHERE coincides_with @> '[{"code": "320000"}]';
348
+ ```
349
+
350
+ **Find features at city level with high confidence:**
351
+
352
+ ```sql
353
+ SELECT *
354
+ FROM thematic_admin_relations
355
+ WHERE admin_level_match = 3
356
+ AND confidence > 0.8;
357
+ ```
358
+
359
+ **Join back to source table:**
360
+
361
+ ```sql
362
+ SELECT src.*, tar.coincides_with, tar.contained_by
363
+ FROM sample_pois_pg_test src
364
+ JOIN thematic_admin_relations tar
365
+ ON tar.source_table = 'sample_pois_pg_test'
366
+ AND tar.geom_hash = md5(ST_AsEWKB(src.geom));
367
+ ```