adminbounds 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ from .client import AdminBoundsClient
2
+
3
+ # Backwards-compatible alias
4
+ GeoAdminClient = AdminBoundsClient
5
+
6
+ __version__ = "0.1.0"
7
+ __all__ = ["AdminBoundsClient", "GeoAdminClient"]
@@ -0,0 +1,125 @@
1
+ """
2
+ Batch-annotation of geometries in a source table with admin-unit semantic relations.
3
+ """
4
+
5
+ import json
6
+ import logging
7
+
8
+ from tqdm import tqdm
9
+
10
+ log = logging.getLogger(__name__)
11
+
12
+ FETCH_SQL = """
13
+ SELECT
14
+ src.uuid AS feature_uuid,
15
+ ST_AsText(src.{geom_col}) AS geom_wkt
16
+ FROM {schema}.{source_table} src
17
+ LEFT JOIN adminbounds.thematic_admin_relations tar
18
+ ON tar.source_table = %(source_table)s
19
+ AND tar.feature_uuid = src.uuid
20
+ WHERE tar.id IS NULL
21
+ AND src.{geom_col} IS NOT NULL
22
+ LIMIT %(batch_size)s
23
+ """
24
+
25
+ INFER_SQL = """
26
+ SELECT adminbounds.infer_admin_semantic_relation(ST_GeomFromText(%(wkt)s, 4326)) AS result
27
+ """
28
+
29
+ INSERT_SQL = """
30
+ INSERT INTO adminbounds.thematic_admin_relations
31
+ (source_table, feature_uuid, admin_level_match, confidence,
32
+ coincides_with, intersects_with, covers_children, contained_by)
33
+ SELECT
34
+ %(source_table)s,
35
+ %(feature_uuid)s::UUID,
36
+ (r->>'admin_level_match')::INTEGER,
37
+ (r->>'confidence')::FLOAT8,
38
+ r->'coincides_with',
39
+ r->'intersects_with',
40
+ r->'covers_children',
41
+ r->'contained_by'
42
+ FROM (SELECT %(relations)s::jsonb AS r) sub
43
+ ON CONFLICT (source_table, feature_uuid) DO NOTHING
44
+ """
45
+
46
+
47
+ def annotate_batch(
48
+ conn,
49
+ source_table: str,
50
+ geom_col: str,
51
+ schema: str,
52
+ batch_size: int,
53
+ on_progress=None,
54
+ ) -> int:
55
+ """Batch-annotate source table. Returns count of newly annotated rows."""
56
+ conn.autocommit = False
57
+ total_processed = 0
58
+
59
+ try:
60
+ with conn.cursor() as cur:
61
+ cur.execute(
62
+ f"""
63
+ SELECT COUNT(*)
64
+ FROM {schema}.{source_table} src
65
+ LEFT JOIN adminbounds.thematic_admin_relations tar
66
+ ON tar.source_table = %s
67
+ AND tar.feature_uuid = src.uuid
68
+ WHERE tar.id IS NULL AND src.{geom_col} IS NOT NULL
69
+ """,
70
+ (source_table,),
71
+ )
72
+ remaining = cur.fetchone()[0]
73
+ log.info("Rows to annotate: %d", remaining)
74
+
75
+ pbar = tqdm(total=remaining, unit="row")
76
+
77
+ while True:
78
+ fetch_sql = FETCH_SQL.format(
79
+ geom_col=geom_col,
80
+ schema=schema,
81
+ source_table=source_table,
82
+ )
83
+ with conn.cursor() as cur:
84
+ cur.execute(fetch_sql, {"source_table": source_table, "batch_size": batch_size})
85
+ rows = cur.fetchall()
86
+
87
+ if not rows:
88
+ break
89
+
90
+ for feature_uuid, geom_wkt in rows:
91
+ try:
92
+ with conn.cursor() as cur:
93
+ cur.execute(INFER_SQL, {"wkt": geom_wkt})
94
+ result = cur.fetchone()[0]
95
+ if result is None:
96
+ continue
97
+
98
+ relations_str = json.dumps(result) if isinstance(result, dict) else result
99
+
100
+ cur.execute(
101
+ INSERT_SQL,
102
+ {
103
+ "source_table": source_table,
104
+ "feature_uuid": str(feature_uuid),
105
+ "relations": relations_str,
106
+ },
107
+ )
108
+ conn.commit()
109
+ total_processed += 1
110
+ pbar.update(1)
111
+ if on_progress:
112
+ on_progress(total_processed, remaining)
113
+
114
+ except Exception as exc:
115
+ conn.rollback()
116
+ log.warning("Row %s failed: %s", feature_uuid, exc)
117
+ pbar.update(1)
118
+
119
+ pbar.close()
120
+
121
+ finally:
122
+ conn.close()
123
+
124
+ log.info("Done. Annotated %d rows.", total_processed)
125
+ return total_processed
@@ -0,0 +1,153 @@
1
+ """
2
+ Diagnostic checks for infer_admin_semantic_relation returning empty results.
3
+ """
4
+
5
+ import json
6
+
7
+ PASS = " [OK]"
8
+ FAIL = " [FAIL]"
9
+ WARN = " [WARN]"
10
+
11
+
12
+ def diagnose(conn, source_table: str, geom_col: str, schema: str) -> dict:
13
+ """Run diagnostic checks. Returns structured result dict."""
14
+ results = {}
15
+ cur = conn.cursor()
16
+
17
+ # 1. admin_units row count
18
+ cur.execute("SELECT COUNT(*) FROM adminbounds.admin_units")
19
+ total = cur.fetchone()[0]
20
+ cur.execute("SELECT COUNT(*) FROM adminbounds.admin_units WHERE geom_bbox IS NULL")
21
+ null_bbox = cur.fetchone()[0]
22
+ results["admin_units_total"] = total
23
+ results["admin_units_null_bbox"] = null_bbox
24
+
25
+ print("\n=== 1. admin_units row count ===")
26
+ print(f"{PASS if total > 0 else FAIL} Total rows: {total}")
27
+ print(f"{PASS if null_bbox == 0 else FAIL} Rows with NULL geom_bbox (derived fields missing): {null_bbox}")
28
+ if null_bbox > 0:
29
+ print(" → Run import-boundaries again; compute_derived_fields() did not complete.")
30
+
31
+ print("\n=== 2. admin_units level distribution ===")
32
+ cur.execute("SELECT level, COUNT(*) FROM adminbounds.admin_units GROUP BY level ORDER BY level")
33
+ level_dist = {}
34
+ for row in cur.fetchall():
35
+ level_dist[row[0]] = row[1]
36
+ print(f" Level {row[0]}: {row[1]} rows")
37
+ results["level_distribution"] = level_dist
38
+
39
+ print(f"\n=== 3. Source table: {schema}.{source_table} ===")
40
+ cur.execute(f"SELECT COUNT(*) FROM {schema}.{source_table} WHERE {geom_col} IS NOT NULL")
41
+ src_count = cur.fetchone()[0]
42
+ results["source_non_null_geoms"] = src_count
43
+ print(f"{PASS if src_count > 0 else FAIL} Non-null geometries: {src_count}")
44
+
45
+ cur.execute(f"SELECT DISTINCT ST_SRID({geom_col}) FROM {schema}.{source_table} WHERE {geom_col} IS NOT NULL LIMIT 5")
46
+ srids = [r[0] for r in cur.fetchall()]
47
+ results["source_srids"] = srids
48
+ print(f"{PASS if srids == [4326] else FAIL} Geometry SRIDs in source table: {srids}")
49
+ if srids and srids != [4326]:
50
+ print(" → Geometries are NOT in EPSG:4326. The function expects 4326.")
51
+
52
+ cur.execute(f"""
53
+ SELECT
54
+ ST_XMin(ST_Extent({geom_col})),
55
+ ST_YMin(ST_Extent({geom_col})),
56
+ ST_XMax(ST_Extent({geom_col})),
57
+ ST_YMax(ST_Extent({geom_col}))
58
+ FROM {schema}.{source_table}
59
+ WHERE {geom_col} IS NOT NULL
60
+ """)
61
+ row = cur.fetchone()
62
+ if row and row[0] is not None:
63
+ xmin, ymin, xmax, ymax = row
64
+ results["bbox"] = {"xmin": xmin, "ymin": ymin, "xmax": xmax, "ymax": ymax}
65
+ print(f" Bounding box: ({xmin:.4f}, {ymin:.4f}) → ({xmax:.4f}, {ymax:.4f})")
66
+ in_china = (70 <= xmin <= 140) and (15 <= ymin <= 55)
67
+ results["in_china_range"] = in_china
68
+ print(f"{PASS if in_china else FAIL} Coordinates look like China (lon 70–140, lat 15–55): {in_china}")
69
+
70
+ print("\n=== 4. Spatial overlap: source bbox vs admin_units bbox ===")
71
+ cur.execute(f"""
72
+ SELECT COUNT(*)
73
+ FROM adminbounds.admin_units au
74
+ WHERE au.geom_bbox && (
75
+ SELECT ST_Extent({geom_col}) FROM {schema}.{source_table} WHERE {geom_col} IS NOT NULL
76
+ )
77
+ """)
78
+ overlap_count = cur.fetchone()[0]
79
+ results["spatial_overlap_count"] = overlap_count
80
+ print(f"{PASS if overlap_count > 0 else FAIL} admin_units whose bbox overlaps source extent: {overlap_count}")
81
+ if overlap_count == 0:
82
+ print(" → No spatial overlap at all. Likely a CRS or coordinate system mismatch.")
83
+
84
+ print("\n=== 5. Manual function call on first source geometry ===")
85
+ cur.execute(f"""
86
+ SELECT
87
+ ST_AsText({geom_col}) AS wkt,
88
+ ST_SRID({geom_col}) AS srid,
89
+ ST_IsValid({geom_col}) AS is_valid
90
+ FROM {schema}.{source_table}
91
+ WHERE {geom_col} IS NOT NULL
92
+ LIMIT 1
93
+ """)
94
+ row = cur.fetchone()
95
+ if row:
96
+ wkt, srid, is_valid = row
97
+ results["sample_srid"] = srid
98
+ results["sample_is_valid"] = is_valid
99
+ print(f" SRID: {srid}, IsValid: {is_valid}")
100
+ print(f" WKT (first 120 chars): {wkt[:120]}...")
101
+
102
+ cur.execute(
103
+ "SELECT adminbounds.infer_admin_semantic_relation(ST_GeomFromText(%s, 4326))",
104
+ (wkt,),
105
+ )
106
+ func_result = cur.fetchone()[0]
107
+ results["function_result"] = func_result
108
+ print(f"\n Function result:\n {json.dumps(func_result, ensure_ascii=False, indent=2)}")
109
+
110
+ cur.execute(f"""
111
+ WITH input AS (
112
+ SELECT ST_GeomFromText(%s, 4326) AS g
113
+ ),
114
+ layer1 AS (
115
+ SELECT adcode FROM adminbounds.admin_units, input
116
+ WHERE geom_bbox && ST_Envelope(input.g)
117
+ ),
118
+ layer2 AS (
119
+ SELECT au.adcode FROM adminbounds.admin_units au, input
120
+ WHERE au.geom_bbox && ST_Envelope(input.g)
121
+ AND ST_Intersects(au.geom_hull, input.g)
122
+ ),
123
+ layer3 AS (
124
+ SELECT au.adcode FROM adminbounds.admin_units au, input
125
+ WHERE au.geom_bbox && ST_Envelope(input.g)
126
+ AND ST_Intersects(au.geom_hull, input.g)
127
+ AND ST_Intersects(
128
+ CASE WHEN au.vertex_count > 500 THEN au.geom_simple ELSE au.geom END,
129
+ input.g
130
+ )
131
+ )
132
+ SELECT
133
+ (SELECT COUNT(*) FROM layer1) AS after_layer1_bbox,
134
+ (SELECT COUNT(*) FROM layer2) AS after_layer2_hull,
135
+ (SELECT COUNT(*) FROM layer3) AS after_layer3_geom
136
+ """, (wkt,))
137
+ row = cur.fetchone()
138
+ results["filter_layers"] = {
139
+ "after_bbox": row[0],
140
+ "after_hull": row[1],
141
+ "after_geom": row[2],
142
+ }
143
+ print(f"\n Three-layer filter candidates (first geometry):")
144
+ print(f" After layer 1 (bbox): {row[0]}")
145
+ print(f" After layer 2 (hull): {row[1]}")
146
+ print(f" After layer 3 (geom): {row[2]}")
147
+ if row[0] == 0:
148
+ print(f" {FAIL} Nothing passes bbox filter → geom_bbox NULL or CRS mismatch")
149
+ elif row[2] == 0:
150
+ print(f" {WARN} Passes bbox/hull but not fine geometry → simplification or topology issue")
151
+
152
+ cur.close()
153
+ return results
adminbounds/_gadm.py ADDED
@@ -0,0 +1,243 @@
1
+ """
2
+ GADM 4.1 worldwide admin boundary downloader and importer.
3
+
4
+ Downloads GeoJSON zips from the GADM CDN, maps fields to the admin_units schema,
5
+ and upserts via the existing staging pipeline.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import io
11
+ import zipfile
12
+ from pathlib import Path
13
+ from urllib.request import urlopen, Request
14
+ from urllib.error import HTTPError
15
+
16
+ import geopandas as gpd
17
+ from shapely.geometry import shape
18
+ from tqdm import tqdm
19
+
20
+ from ._import import _upsert_staging, _compute_derived_fields
21
+
22
+ # ---------------------------------------------------------------------------
23
+ # Country name → ISO3 lookup (lowercase keys)
24
+ # ---------------------------------------------------------------------------
25
+ _COUNTRY_ISO3: dict[str, str] = {
26
+ "afghanistan": "AFG", "albania": "ALB", "algeria": "DZA", "andorra": "AND",
27
+ "angola": "AGO", "argentina": "ARG", "armenia": "ARM", "australia": "AUS",
28
+ "austria": "AUT", "azerbaijan": "AZE", "bahrain": "BHR", "bangladesh": "BGD",
29
+ "belarus": "BLR", "belgium": "BEL", "belize": "BLZ", "benin": "BEN",
30
+ "bhutan": "BTN", "bolivia": "BOL", "bosnia and herzegovina": "BIH",
31
+ "botswana": "BWA", "brazil": "BRA", "brunei": "BRN", "bulgaria": "BGR",
32
+ "burkina faso": "BFA", "burundi": "BDI", "cambodia": "KHM", "cameroon": "CMR",
33
+ "canada": "CAN", "central african republic": "CAF", "chad": "TCD",
34
+ "chile": "CHL", "china": "CHN", "colombia": "COL", "congo": "COG",
35
+ "democratic republic of the congo": "COD", "dr congo": "COD",
36
+ "costa rica": "CRI", "croatia": "HRV", "cuba": "CUB", "cyprus": "CYP",
37
+ "czech republic": "CZE", "czechia": "CZE", "denmark": "DNK", "djibouti": "DJI",
38
+ "dominican republic": "DOM", "ecuador": "ECU", "egypt": "EGY",
39
+ "el salvador": "SLV", "eritrea": "ERI", "estonia": "EST", "eswatini": "SWZ",
40
+ "ethiopia": "ETH", "finland": "FIN", "france": "FRA", "gabon": "GAB",
41
+ "gambia": "GMB", "georgia": "GEO", "germany": "DEU", "ghana": "GHA",
42
+ "greece": "GRC", "guatemala": "GTM", "guinea": "GIN",
43
+ "guinea-bissau": "GNB", "guyana": "GUY", "haiti": "HTI", "honduras": "HND",
44
+ "hungary": "HUN", "iceland": "ISL", "india": "IND", "indonesia": "IDN",
45
+ "iran": "IRN", "iraq": "IRQ", "ireland": "IRL", "israel": "ISR",
46
+ "italy": "ITA", "jamaica": "JAM", "japan": "JPN", "jordan": "JOR",
47
+ "kazakhstan": "KAZ", "kenya": "KEN", "kuwait": "KWT", "kyrgyzstan": "KGZ",
48
+ "laos": "LAO", "latvia": "LVA", "lebanon": "LBN", "lesotho": "LSO",
49
+ "liberia": "LBR", "libya": "LBY", "liechtenstein": "LIE", "lithuania": "LTU",
50
+ "luxembourg": "LUX", "madagascar": "MDG", "malawi": "MWI", "malaysia": "MYS",
51
+ "mali": "MLI", "malta": "MLT", "mauritania": "MRT", "mexico": "MEX",
52
+ "moldova": "MDA", "mongolia": "MNG", "montenegro": "MNE", "morocco": "MAR",
53
+ "mozambique": "MOZ", "myanmar": "MMR", "namibia": "NAM", "nepal": "NPL",
54
+ "netherlands": "NLD", "new zealand": "NZL", "nicaragua": "NIC",
55
+ "niger": "NER", "nigeria": "NGA", "north korea": "PRK",
56
+ "north macedonia": "MKD", "norway": "NOR", "oman": "OMN", "pakistan": "PAK",
57
+ "panama": "PAN", "papua new guinea": "PNG", "paraguay": "PRY", "peru": "PER",
58
+ "philippines": "PHL", "poland": "POL", "portugal": "PRT", "qatar": "QAT",
59
+ "romania": "ROU", "russia": "RUS", "rwanda": "RWA", "saudi arabia": "SAU",
60
+ "senegal": "SEN", "serbia": "SRB", "sierra leone": "SLE", "singapore": "SGP",
61
+ "slovakia": "SVK", "slovenia": "SVN", "somalia": "SOM", "south africa": "ZAF",
62
+ "south korea": "KOR", "south sudan": "SSD", "spain": "ESP", "sri lanka": "LKA",
63
+ "sudan": "SDN", "sweden": "SWE", "switzerland": "CHE", "syria": "SYR",
64
+ "taiwan": "TWN", "tajikistan": "TJK", "tanzania": "TZA", "thailand": "THA",
65
+ "timor-leste": "TLS", "east timor": "TLS", "togo": "TGO", "tunisia": "TUN",
66
+ "turkey": "TUR", "turkiye": "TUR", "turkmenistan": "TKM", "uganda": "UGA",
67
+ "ukraine": "UKR", "united arab emirates": "ARE", "uae": "ARE",
68
+ "united kingdom": "GBR", "uk": "GBR", "great britain": "GBR",
69
+ "united states": "USA", "united states of america": "USA", "usa": "USA",
70
+ "us": "USA", "uruguay": "URY", "uzbekistan": "UZB", "venezuela": "VEN",
71
+ "vietnam": "VNM", "viet nam": "VNM", "yemen": "YEM", "zambia": "ZMB",
72
+ "zimbabwe": "ZWE",
73
+ }
74
+
75
+
76
+ def _resolve_iso3(country: str) -> str:
77
+ """Resolve a country name or ISO3 code to uppercase ISO3."""
78
+ upper = country.strip().upper()
79
+ # Direct ISO3 match (3-letter alpha)
80
+ if len(upper) == 3 and upper.isalpha():
81
+ return upper
82
+ # Name lookup
83
+ lower = country.strip().lower()
84
+ if lower in _COUNTRY_ISO3:
85
+ return _COUNTRY_ISO3[lower]
86
+ # Partial suggestions
87
+ suggestions = [k for k in _COUNTRY_ISO3 if lower in k or k in lower]
88
+ msg = f"Country not recognised: {country!r}."
89
+ if suggestions:
90
+ msg += f" Did you mean: {', '.join(suggestions[:5])}?"
91
+ raise ValueError(msg)
92
+
93
+
94
+ def _gadm_url(iso3: str, level: int) -> str:
95
+ return f"https://geodata.ucdavis.edu/gadm/gadm4.1/json/gadm41_{iso3}_{level}.json.zip"
96
+
97
+
98
+ def _download_file(url: str, dest: Path, force: bool) -> Path | None:
99
+ """Download *url* to *dest*; return None on HTTP 404."""
100
+ if dest.exists() and not force:
101
+ return dest
102
+
103
+ print(f" Downloading {url} ...")
104
+ try:
105
+ req = Request(url, headers={"User-Agent": "adminbounds/1.0"})
106
+ with urlopen(req) as response:
107
+ total = int(response.headers.get("Content-Length", 0))
108
+ dest.parent.mkdir(parents=True, exist_ok=True)
109
+ buf = io.BytesIO()
110
+ with tqdm(
111
+ total=total or None,
112
+ unit="B",
113
+ unit_scale=True,
114
+ desc=dest.name,
115
+ leave=False,
116
+ ) as pbar:
117
+ while True:
118
+ chunk = response.read(65536)
119
+ if not chunk:
120
+ break
121
+ buf.write(chunk)
122
+ pbar.update(len(chunk))
123
+ dest.write_bytes(buf.getvalue())
124
+ except HTTPError as exc:
125
+ if exc.code == 404:
126
+ print(f" Level not available (HTTP 404): {url}")
127
+ return None
128
+ raise
129
+ return dest
130
+
131
+
132
+ def _extract_json(zip_path: Path) -> dict:
133
+ """Extract and parse the first .json file from a zip archive."""
134
+ import json
135
+ with zipfile.ZipFile(zip_path) as zf:
136
+ json_names = [n for n in zf.namelist() if n.endswith(".json")]
137
+ if not json_names:
138
+ raise ValueError(f"No .json file found in {zip_path}")
139
+ with zf.open(json_names[0]) as f:
140
+ return json.load(f)
141
+
142
+
143
+ def _parse_gadm_features(data: dict, gadm_level: int) -> list[dict]:
144
+ """Map GeoJSON features from a GADM file to admin_units row dicts."""
145
+ gid_key = f"GID_{gadm_level}"
146
+ name_key = f"NAME_{gadm_level}"
147
+ parent_key = f"GID_{gadm_level - 1}" if gadm_level > 0 else None
148
+ db_level = gadm_level + 1 # GADM 0→level 1, GADM 1→level 2, …
149
+
150
+ rows = []
151
+ for feature in data.get("features", []):
152
+ props = feature.get("properties", {}) or {}
153
+ geom_data = feature.get("geometry")
154
+
155
+ adcode = props.get(gid_key)
156
+ if not adcode or not geom_data:
157
+ continue
158
+
159
+ name = props.get(name_key) or adcode
160
+ parent_code = props.get(parent_key) if parent_key else None
161
+ geometry = shape(geom_data)
162
+
163
+ rows.append({
164
+ "adcode": adcode,
165
+ "name": name,
166
+ "level": db_level,
167
+ "parent_code": parent_code,
168
+ "geometry": geometry,
169
+ })
170
+ return rows
171
+
172
+
173
+ # ---------------------------------------------------------------------------
174
+ # Public API
175
+ # ---------------------------------------------------------------------------
176
+
177
+ def download_gadm(
178
+ country: str,
179
+ engine,
180
+ levels: list[int] | None = None,
181
+ cache_dir: Path | None = None,
182
+ force: bool = False,
183
+ ) -> int:
184
+ """Download and import GADM 4.1 boundaries for a country.
185
+
186
+ Args:
187
+ country: ISO3 code (e.g. "DEU") or English name (e.g. "Germany").
188
+ engine: SQLAlchemy engine connected to the adminbounds DB.
189
+ levels: GADM levels to import (0=country … 3=district).
190
+ Default: all available [0, 1, 2, 3].
191
+ cache_dir: Directory for cached zip files.
192
+ Default: ~/.adminbounds/gadm_cache/
193
+ force: Re-download even if already cached.
194
+
195
+ Returns:
196
+ Total rows upserted into adminbounds.admin_units.
197
+ """
198
+ if levels is None:
199
+ levels = [0, 1, 2, 3]
200
+
201
+ iso3 = _resolve_iso3(country)
202
+ print(f"Resolved '{country}' → ISO3={iso3}")
203
+
204
+ if cache_dir is None:
205
+ cache_dir = Path.home() / ".adminbounds" / "gadm_cache"
206
+ cache_dir = Path(cache_dir)
207
+
208
+ all_rows: list[dict] = []
209
+
210
+ for lvl in levels:
211
+ url = _gadm_url(iso3, lvl)
212
+ zip_path = cache_dir / f"gadm41_{iso3}_{lvl}.json.zip"
213
+
214
+ zip_file = _download_file(url, zip_path, force)
215
+ if zip_file is None:
216
+ continue # level not available
217
+
218
+ print(f" Parsing level {lvl} ...")
219
+ data = _extract_json(zip_file)
220
+ rows = _parse_gadm_features(data, lvl)
221
+ print(f" → {len(rows)} features")
222
+ all_rows.extend(rows)
223
+
224
+ if not all_rows:
225
+ print("No data downloaded.")
226
+ return 0
227
+
228
+ # Deduplicate by adcode
229
+ seen: dict[str, dict] = {}
230
+ for row in all_rows:
231
+ seen[row["adcode"]] = row
232
+ deduped = list(seen.values())
233
+ print(f"Total unique units: {len(deduped)}")
234
+
235
+ print("Upserting into adminbounds.admin_units ...")
236
+ gdf = gpd.GeoDataFrame(deduped, crs="EPSG:4326")
237
+ gdf = gdf.rename_geometry("geom")
238
+ _upsert_staging(engine, gdf)
239
+ print(" Upsert complete.")
240
+
241
+ _compute_derived_fields(engine)
242
+
243
+ return len(deduped)