sdgis-cli 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sdgis.py ADDED
@@ -0,0 +1,1379 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ sandag - CLI for the San Diego Regional Data Warehouse (SANDAG/SanGIS)
4
+
5
+ Access 358+ GIS datasets covering San Diego County including parcels, bikeways,
6
+ zoning, census data, transportation, ecology, and much more.
7
+
8
+ Data source: https://geo.sandag.org
9
+ """
10
+
11
+ import json
12
+ import sys
13
+ import os
14
+ import csv
15
+ import io
16
+ import re
17
+ import time
18
+ import sqlite3
19
+ from pathlib import Path
20
+ from urllib.parse import urlencode
21
+
22
+ import click
23
+ import requests
24
+ from rich.console import Console
25
+ from rich.table import Table
26
+ from rich.panel import Panel
27
+ from rich.text import Text
28
+ from rich.progress import Progress, SpinnerColumn, TextColumn
29
+ from rich import box
30
+
31
+ console = Console()
32
+ err_console = Console(stderr=True)
33
+
34
+ # ── Constants ──────────────────────────────────────────────────────────────────
35
+
36
+ BASE_URL = "https://geo.sandag.org/server/rest/services/Hosted"
37
+ PORTAL_URL = "https://geo.sandag.org/portal"
38
+ DOWNLOAD_BASE = "https://geo.sandag.org/server/rest/directories/downloads"
39
+
40
+ CATEGORIES = [
41
+ "Agriculture", "Business", "Census", "Community", "District",
42
+ "Ecology & Parks", "Elevation", "Fire", "Health & Public Safety",
43
+ "Hydrology & Geology", "Jurisdiction", "Landbase", "Land Use",
44
+ "Miscellaneous", "Place", "Transportation", "Utilities", "Zoning",
45
+ ]
46
+
47
+ EXPORT_FORMATS = {
48
+ "geojson": ".geojson",
49
+ "csv": ".csv",
50
+ "shapefile": "_shapefile.zip",
51
+ "filegdb": "_filegdb.zip",
52
+ "json": ".html", # their JSON viewer is .html
53
+ "metadata": ".pdf",
54
+ }
55
+
56
+ CACHE_DIR = Path.home() / ".cache" / "sandag-cli"
57
+ CACHE_FILE = CACHE_DIR / "datasets.json"
58
+ INDEX_FILE = CACHE_DIR / "index.db"
59
+ CACHE_TTL = 86400 # 24 hours
60
+ EMBED_MODEL = "all-MiniLM-L6-v2"
61
+
62
+
63
+ # ── Helpers ────────────────────────────────────────────────────────────────────
64
+
65
+ def get_session():
66
+ s = requests.Session()
67
+ s.headers.update({"User-Agent": "sandag-cli/1.0"})
68
+ s.timeout = 30
69
+ return s
70
+
71
+
72
+ def handle_request_error(e, dataset=None):
73
+ """Provide helpful error messages for common failures."""
74
+ msg = str(e)
75
+ if "ProxyError" in msg or "ConnectionError" in msg:
76
+ raise click.ClickException(
77
+ f"Cannot connect to geo.sandag.org. Check your internet connection."
78
+ )
79
+ elif "Timeout" in msg:
80
+ raise click.ClickException(
81
+ f"Request timed out. The server may be slow — try again."
82
+ )
83
+ elif "404" in msg:
84
+ hint = f" Check the dataset name: '{dataset}'" if dataset else ""
85
+ raise click.ClickException(f"Dataset not found.{hint}")
86
+ else:
87
+ raise click.ClickException(f"Request failed: {e}")
88
+
89
+
90
+ def feature_server_url(dataset_name, layer=0):
91
+ return f"{BASE_URL}/{dataset_name}/FeatureServer/{layer}"
92
+
93
+
94
+ def service_url(dataset_name):
95
+ return f"{BASE_URL}/{dataset_name}/FeatureServer"
96
+
97
+
98
+ def discover_datasets(session, force=False):
99
+ """Discover all hosted feature services via ArcGIS Portal REST API."""
100
+ CACHE_DIR.mkdir(parents=True, exist_ok=True)
101
+
102
+ if not force and CACHE_FILE.exists():
103
+ age = time.time() - CACHE_FILE.stat().st_mtime
104
+ if age < CACHE_TTL:
105
+ with open(CACHE_FILE) as f:
106
+ return json.load(f)
107
+
108
+ datasets = []
109
+ start = 1
110
+ num = 100
111
+
112
+ with Progress(
113
+ SpinnerColumn(),
114
+ TextColumn("[bold blue]Discovering datasets..."),
115
+ console=err_console,
116
+ ) as progress:
117
+ task = progress.add_task("Fetching", total=None)
118
+ while True:
119
+ params = {
120
+ "q": 'type:"Feature Service" AND owner:SanGIS',
121
+ "start": start,
122
+ "num": num,
123
+ "f": "json",
124
+ }
125
+ try:
126
+ r = session.get(
127
+ f"{PORTAL_URL}/sharing/rest/search",
128
+ params=params,
129
+ timeout=30,
130
+ )
131
+ r.raise_for_status()
132
+ data = r.json()
133
+ except Exception:
134
+ # Fallback: try without owner filter
135
+ params["q"] = 'type:"Feature Service"'
136
+ try:
137
+ r = session.get(
138
+ f"{PORTAL_URL}/sharing/rest/search",
139
+ params=params,
140
+ timeout=30,
141
+ )
142
+ r.raise_for_status()
143
+ data = r.json()
144
+ except Exception as e:
145
+ err_console.print(f"[red]Could not discover datasets: {e}")
146
+ err_console.print("[yellow]Using built-in dataset list instead.")
147
+ return get_builtin_datasets()
148
+
149
+ results = data.get("results", [])
150
+ if not results:
151
+ break
152
+
153
+ for item in results:
154
+ url = item.get("url", "")
155
+ name_match = re.search(r"/Hosted/([^/]+)/", url)
156
+ if name_match:
157
+ ds_name = name_match.group(1)
158
+ else:
159
+ ds_name = item.get("name", item.get("title", "")).replace(" ", "_")
160
+
161
+ datasets.append({
162
+ "name": ds_name,
163
+ "title": item.get("title", ds_name),
164
+ "description": (item.get("snippet") or item.get("description") or "")[:200],
165
+ "tags": item.get("tags", []),
166
+ "id": item.get("id", ""),
167
+ "url": url,
168
+ "modified": item.get("modified", 0),
169
+ })
170
+
171
+ next_start = data.get("nextStart", -1)
172
+ if next_start == -1 or next_start <= start:
173
+ break
174
+ start = next_start
175
+ progress.update(task, description=f"[bold blue]Found {len(datasets)} datasets...")
176
+
177
+ # Deduplicate by name
178
+ seen = set()
179
+ unique = []
180
+ for ds in datasets:
181
+ if ds["name"] not in seen:
182
+ seen.add(ds["name"])
183
+ unique.append(ds)
184
+
185
+ unique.sort(key=lambda x: x["name"].lower())
186
+
187
+ if not unique:
188
+ err_console.print("[yellow]Portal returned no datasets. Using built-in dataset list.")
189
+ return get_builtin_datasets()
190
+
191
+ with open(CACHE_FILE, "w") as f:
192
+ json.dump(unique, f, indent=2)
193
+
194
+ return unique
195
+
196
+
197
+ def get_builtin_datasets():
198
+ """Fallback list extracted from the Data Warehouse HTML."""
199
+ names = [
200
+ "ABC_Licenses", "Active_Faults_CN", "Address_Points", "Address_Points_NG911",
201
+ "Adult_Residential_Facilities", "Affordable_Housing_Inventory",
202
+ "Agricultural_Commodity_2020", "Agricultural_Preserve",
203
+ "Agricultural_Preserve_Contracts", "Airport_Influence_Area",
204
+ "Airport_Noise_Contours", "Airport_Overflight_Extents", "Airport_Runways",
205
+ "Airport_Safety_Zones", "Airspace", "Ambulance_Operating_Areas",
206
+ "Assembly_Bill_130", "Assembly_Bill_2011", "Assembly_Bill_803",
207
+ "Assessor_Book", "AWM_Certified_Producers", "AWM_Commodity",
208
+ "AWM_Organic_Producers", "Ballot_Drop_Boxes_2025_11_04", "Bays",
209
+ "Bike_Master_Plan_SD", "Bike_Plan_CN", "Bikeways", "BMP_CN",
210
+ "Broadband_Business_CPUC", "Broadband_Consumer_CPUC", "Broadband_Mobile_CPUC",
211
+ "Building_Outlines", "Business_Improvement_Districts_SD", "Business_Sites",
212
+ "CA_Bridge_Hospitals", "Call_Box", "CALTRANS_Urban_Area", "Casinos",
213
+ ]
214
+ return [{"name": n, "title": n.replace("_", " "), "description": "", "tags": [], "id": "", "url": service_url(n)} for n in names]
215
+
216
+
217
+ def fuzzy_match(query, datasets):
218
+ """Simple fuzzy search across name, title, tags, and description."""
219
+ q = query.lower()
220
+ scored = []
221
+ for ds in datasets:
222
+ score = 0
223
+ name_lower = ds["name"].lower()
224
+ title_lower = ds.get("title", "").lower()
225
+ tags_lower = " ".join(ds.get("tags", [])).lower()
226
+ desc_lower = ds.get("description", "").lower()
227
+
228
+ if q == name_lower:
229
+ score = 100
230
+ elif q in name_lower:
231
+ score = 80
232
+ elif q in title_lower:
233
+ score = 70
234
+ elif q in tags_lower:
235
+ score = 50
236
+ elif q in desc_lower:
237
+ score = 30
238
+
239
+ if score > 0:
240
+ scored.append((score, ds))
241
+
242
+ scored.sort(key=lambda x: (-x[0], x[1]["name"].lower()))
243
+ return [ds for _, ds in scored]
244
+
245
+
246
+ def query_features(session, dataset, where="1=1", out_fields="*", geometry=None,
247
+ geometry_type=None, spatial_rel=None, return_geometry=True,
248
+ result_offset=0, result_record_count=2000, out_sr=4326,
249
+ order_by=None, return_count_only=False, return_ids_only=False,
250
+ layer=0):
251
+ """Query an ArcGIS Feature Service layer."""
252
+ url = f"{feature_server_url(dataset, layer)}/query"
253
+ params = {
254
+ "where": where,
255
+ "outFields": out_fields,
256
+ "returnGeometry": str(return_geometry).lower(),
257
+ "outSR": out_sr,
258
+ "f": "json",
259
+ "resultOffset": result_offset,
260
+ "resultRecordCount": result_record_count,
261
+ }
262
+
263
+ if geometry:
264
+ params["geometry"] = geometry
265
+ if geometry_type:
266
+ params["geometryType"] = geometry_type
267
+ if spatial_rel:
268
+ params["spatialRel"] = spatial_rel
269
+ if order_by:
270
+ params["orderByFields"] = order_by
271
+ if return_count_only:
272
+ params["returnCountOnly"] = "true"
273
+ if return_ids_only:
274
+ params["returnIdsOnly"] = "true"
275
+
276
+ try:
277
+ r = session.get(url, params=params, timeout=60)
278
+ r.raise_for_status()
279
+ except requests.exceptions.RequestException as e:
280
+ handle_request_error(e, dataset)
281
+ data = r.json()
282
+
283
+ if "error" in data:
284
+ raise click.ClickException(
285
+ f"API Error {data['error'].get('code', '?')}: {data['error'].get('message', 'Unknown error')}"
286
+ )
287
+ return data
288
+
289
+
290
+ def get_layer_info(session, dataset, layer=0):
291
+ """Get field definitions and metadata for a layer."""
292
+ url = feature_server_url(dataset, layer)
293
+ r = session.get(url, params={"f": "json"}, timeout=30)
294
+ r.raise_for_status()
295
+ data = r.json()
296
+ if "error" in data:
297
+ raise click.ClickException(
298
+ f"API Error: {data['error'].get('message', 'Unknown')}"
299
+ )
300
+ return data
301
+
302
+
303
+ def get_service_info(session, dataset):
304
+ """Get service-level metadata."""
305
+ url = service_url(dataset)
306
+ r = session.get(url, params={"f": "json"}, timeout=30)
307
+ r.raise_for_status()
308
+ return r.json()
309
+
310
+
311
+ def features_to_geojson(features, geometry_type=None):
312
+ """Convert ArcGIS JSON features to GeoJSON FeatureCollection."""
313
+ geo_features = []
314
+ for feat in features:
315
+ geom = feat.get("geometry")
316
+ props = feat.get("attributes", {})
317
+ geo_geom = None
318
+
319
+ if geom:
320
+ if "x" in geom and "y" in geom:
321
+ geo_geom = {"type": "Point", "coordinates": [geom["x"], geom["y"]]}
322
+ elif "rings" in geom:
323
+ if len(geom["rings"]) == 1:
324
+ geo_geom = {"type": "Polygon", "coordinates": geom["rings"]}
325
+ else:
326
+ geo_geom = {"type": "MultiPolygon", "coordinates": [[r] for r in geom["rings"]]}
327
+ elif "paths" in geom:
328
+ if len(geom["paths"]) == 1:
329
+ geo_geom = {"type": "LineString", "coordinates": geom["paths"][0]}
330
+ else:
331
+ geo_geom = {"type": "MultiLineString", "coordinates": geom["paths"]}
332
+ elif "points" in geom:
333
+ geo_geom = {"type": "MultiPoint", "coordinates": geom["points"]}
334
+
335
+ geo_features.append({
336
+ "type": "Feature",
337
+ "geometry": geo_geom,
338
+ "properties": props,
339
+ })
340
+
341
+ return {
342
+ "type": "FeatureCollection",
343
+ "features": geo_features,
344
+ }
345
+
346
+
347
+ def features_to_csv_str(features):
348
+ """Convert features to CSV string."""
349
+ if not features:
350
+ return ""
351
+ rows = [f.get("attributes", {}) for f in features]
352
+ if not rows:
353
+ return ""
354
+ output = io.StringIO()
355
+ writer = csv.DictWriter(output, fieldnames=rows[0].keys())
356
+ writer.writeheader()
357
+ writer.writerows(rows)
358
+ return output.getvalue()
359
+
360
+
361
+ # ── SQLite index ───────────────────────────────────────────────────────────────
362
+
363
+ def _open_index():
364
+ """Open (and initialize schema if needed) the SQLite index."""
365
+ CACHE_DIR.mkdir(parents=True, exist_ok=True)
366
+ conn = sqlite3.connect(INDEX_FILE)
367
+ conn.row_factory = sqlite3.Row
368
+ conn.executescript("""
369
+ CREATE TABLE IF NOT EXISTS datasets (
370
+ name TEXT PRIMARY KEY,
371
+ title TEXT,
372
+ description TEXT,
373
+ tags TEXT,
374
+ url TEXT,
375
+ id TEXT,
376
+ modified INTEGER
377
+ );
378
+ CREATE TABLE IF NOT EXISTS embeddings (
379
+ name TEXT PRIMARY KEY,
380
+ vector BLOB
381
+ );
382
+ CREATE VIRTUAL TABLE IF NOT EXISTS datasets_fts USING fts5(
383
+ name, title, description, tags,
384
+ content='datasets', content_rowid='rowid'
385
+ );
386
+ """)
387
+ conn.commit()
388
+ return conn
389
+
390
+
391
+ def build_index(session, force=False):
392
+ """Populate index.db with dataset metadata and embeddings."""
393
+ try:
394
+ import numpy as np
395
+ from sentence_transformers import SentenceTransformer
396
+ has_embeddings = True
397
+ except ImportError:
398
+ has_embeddings = False
399
+ err_console.print("[yellow]sentence-transformers not installed — building FTS index only.")
400
+ err_console.print("[dim] Run: pip install sentence-transformers numpy")
401
+
402
+ datasets = discover_datasets(session, force=force)
403
+ conn = _open_index()
404
+
405
+ # Upsert metadata + rebuild FTS
406
+ conn.execute("DELETE FROM datasets_fts")
407
+ for ds in datasets:
408
+ conn.execute(
409
+ "INSERT OR REPLACE INTO datasets (name, title, description, tags, url, id, modified) "
410
+ "VALUES (?, ?, ?, ?, ?, ?, ?)",
411
+ (ds["name"], ds.get("title", ""), ds.get("description", ""),
412
+ json.dumps(ds.get("tags", [])), ds.get("url", ""),
413
+ ds.get("id", ""), ds.get("modified", 0))
414
+ )
415
+ conn.execute(
416
+ "INSERT INTO datasets_fts(datasets_fts) VALUES ('rebuild')"
417
+ )
418
+ conn.commit()
419
+
420
+ if not has_embeddings:
421
+ conn.close()
422
+ return len(datasets)
423
+
424
+ # Determine which datasets still need embeddings
425
+ if force:
426
+ conn.execute("DELETE FROM embeddings")
427
+ conn.commit()
428
+ to_embed = datasets
429
+ else:
430
+ existing = {row[0] for row in conn.execute("SELECT name FROM embeddings")}
431
+ to_embed = [ds for ds in datasets if ds["name"] not in existing]
432
+
433
+ if not to_embed:
434
+ conn.close()
435
+ return len(datasets)
436
+
437
+ err_console.print(f"[dim]Loading embedding model '{EMBED_MODEL}'...")
438
+ model = SentenceTransformer(EMBED_MODEL)
439
+
440
+ texts = [
441
+ f"{ds['name']} {ds.get('title', '')} {ds.get('description', '')} "
442
+ f"{' '.join(ds.get('tags', []))}"
443
+ for ds in to_embed
444
+ ]
445
+
446
+ with Progress(SpinnerColumn(), TextColumn("[bold blue]{task.description}"),
447
+ console=err_console) as progress:
448
+ task = progress.add_task(f"Embedding {len(to_embed)} datasets...", total=None)
449
+ vecs = model.encode(texts, batch_size=64, show_progress_bar=False)
450
+ progress.update(task, description=f"[bold blue]Embedded {len(to_embed)} datasets")
451
+
452
+ for ds, vec in zip(to_embed, vecs):
453
+ blob = vec.astype("float32").tobytes()
454
+ conn.execute("INSERT OR REPLACE INTO embeddings (name, vector) VALUES (?, ?)",
455
+ (ds["name"], blob))
456
+ conn.commit()
457
+ conn.close()
458
+ return len(datasets)
459
+
460
+
461
+ def _load_dataset_row(row):
462
+ return {
463
+ "name": row["name"],
464
+ "title": row["title"] or "",
465
+ "description": row["description"] or "",
466
+ "tags": json.loads(row["tags"] or "[]"),
467
+ "url": row["url"] or "",
468
+ "id": row["id"] or "",
469
+ }
470
+
471
+
472
+ def semantic_search(query, top_k=25):
473
+ """Vector cosine-similarity search against the embedding index."""
474
+ import numpy as np
475
+ from sentence_transformers import SentenceTransformer
476
+
477
+ conn = _open_index()
478
+ rows = conn.execute("SELECT e.name, e.vector, d.title, d.description, d.tags, d.url, d.id "
479
+ "FROM embeddings e JOIN datasets d ON e.name = d.name").fetchall()
480
+ conn.close()
481
+ if not rows:
482
+ return None # index empty
483
+
484
+ model = SentenceTransformer(EMBED_MODEL)
485
+ q_vec = model.encode([query])[0].astype("float32")
486
+ q_norm = q_vec / (np.linalg.norm(q_vec) + 1e-9)
487
+
488
+ scored = []
489
+ for row in rows:
490
+ vec = np.frombuffer(row["vector"], dtype="float32")
491
+ norm = vec / (np.linalg.norm(vec) + 1e-9)
492
+ score = float(q_norm @ norm)
493
+ scored.append((score, row))
494
+
495
+ scored.sort(key=lambda x: -x[0])
496
+ return [
497
+ {
498
+ "name": row["name"], "title": row["title"] or "",
499
+ "description": row["description"] or "",
500
+ "tags": json.loads(row["tags"] or "[]"),
501
+ "url": row["url"] or "", "id": row["id"] or "",
502
+ "_score": round(score, 4),
503
+ }
504
+ for score, row in scored[:top_k]
505
+ ]
506
+
507
+
508
+ def fts_search(query, top_k=25):
509
+ """FTS5 full-text search against the SQLite index."""
510
+ conn = _open_index()
511
+ # Escape FTS5 special chars
512
+ safe_q = re.sub(r'["\*\(\)]', ' ', query).strip()
513
+ try:
514
+ rows = conn.execute(
515
+ "SELECT d.name, d.title, d.description, d.tags, d.url, d.id "
516
+ "FROM datasets_fts f JOIN datasets d ON d.name = f.name "
517
+ "WHERE datasets_fts MATCH ? ORDER BY rank LIMIT ?",
518
+ (safe_q, top_k)
519
+ ).fetchall()
520
+ except sqlite3.OperationalError:
521
+ rows = []
522
+ conn.close()
523
+ return [_load_dataset_row(r) for r in rows]
524
+
525
+
526
+ # ── CLI ────────────────────────────────────────────────────────────────────────
527
+
528
+ @click.group()
529
+ @click.version_option("1.0.0", prog_name="sandag")
530
+ @click.pass_context
531
+ def cli(ctx):
532
+ """
533
+ 🌊 sandag - San Diego Regional Data Warehouse CLI
534
+
535
+ Access 358+ GIS datasets from SANDAG/SanGIS covering San Diego County.
536
+
537
+ \b
538
+ Datasets include: parcels, bikeways, zoning, census tracts, fire stations,
539
+ transit routes, hydrology, affordable housing, business sites, and more.
540
+
541
+ \b
542
+ Quick start:
543
+ sandag search bikeways # Find datasets
544
+ sandag info Bikeways # Get dataset details & fields
545
+ sandag query Bikeways --limit 5 # Fetch features
546
+ sandag count Bikeways # Count total features
547
+ sandag download Bikeways # Download pre-built exports
548
+ """
549
+ ctx.ensure_object(dict)
550
+ ctx.obj["session"] = get_session()
551
+
552
+
553
+ # ── list ───────────────────────────────────────────────────────────────────────
554
+
555
+ @cli.command("list")
556
+ @click.option("--refresh", is_flag=True, help="Force refresh the dataset cache")
557
+ @click.option("--json-output", "as_json", is_flag=True, help="Output as JSON")
558
+ @click.pass_context
559
+ def list_datasets(ctx, refresh, as_json):
560
+ """List all available datasets."""
561
+ session = ctx.obj["session"]
562
+ datasets = discover_datasets(session, force=refresh)
563
+
564
+ if as_json:
565
+ click.echo(json.dumps(datasets, indent=2))
566
+ return
567
+
568
+ table = Table(
569
+ title=f"SANDAG Data Warehouse — {len(datasets)} Datasets",
570
+ box=box.ROUNDED,
571
+ show_lines=False,
572
+ )
573
+ table.add_column("#", style="dim", width=4)
574
+ table.add_column("Dataset Name", style="cyan bold", max_width=40)
575
+ table.add_column("Title", max_width=45)
576
+ table.add_column("Tags", style="dim", max_width=35)
577
+
578
+ for i, ds in enumerate(datasets, 1):
579
+ tags = ", ".join(ds.get("tags", [])[:3])
580
+ table.add_row(str(i), ds["name"], ds.get("title", ""), tags)
581
+
582
+ console.print(table)
583
+
584
+
585
+ # ── index ──────────────────────────────────────────────────────────────────────
586
+
587
+ @cli.command("index")
588
+ @click.option("--force", is_flag=True, help="Rebuild index and re-embed all datasets")
589
+ @click.pass_context
590
+ def build_index_cmd(ctx, force):
591
+ """Build local SQLite search index with embeddings.
592
+
593
+ \b
594
+ Downloads dataset catalog and computes semantic embeddings using
595
+ a local sentence-transformers model (all-MiniLM-L6-v2, ~22MB).
596
+ Run once before using 'search' for semantic results.
597
+
598
+ \b
599
+ Examples:
600
+ sandag index # build or update index
601
+ sandag index --force # force full rebuild
602
+ """
603
+ session = ctx.obj["session"]
604
+ n = build_index(session, force=force)
605
+ console.print(f"[green]✓[/] Index built — [bold]{n}[/] datasets indexed at [dim]{INDEX_FILE}[/]")
606
+
607
+
608
+ # ── search ─────────────────────────────────────────────────────────────────────
609
+
610
+ @cli.command()
611
+ @click.argument("query")
612
+ @click.option("--json-output", "as_json", is_flag=True, help="Output as JSON")
613
+ @click.option("--fts", "force_fts", is_flag=True, help="Force FTS keyword search (skip semantic)")
614
+ @click.option("--fuzzy", "force_fuzzy", is_flag=True, help="Force fuzzy string match (skip index)")
615
+ @click.pass_context
616
+ def search(ctx, query, as_json, force_fts, force_fuzzy):
617
+ """Search datasets by name, tags, or description.
618
+
619
+ \b
620
+ Search priority (auto-detected):
621
+ 1. Semantic vector search (if 'sandag index' has been run)
622
+ 2. FTS5 keyword search (if index exists without embeddings)
623
+ 3. Fuzzy string match (fallback, no index required)
624
+ """
625
+ session = ctx.obj["session"]
626
+ matches = None
627
+ mode = "fuzzy"
628
+
629
+ if not force_fuzzy and INDEX_FILE.exists():
630
+ if not force_fts:
631
+ # Try semantic search
632
+ try:
633
+ import numpy # noqa: F401
634
+ from sentence_transformers import SentenceTransformer # noqa: F401
635
+ with err_console.status("Searching (semantic)..."):
636
+ matches = semantic_search(query)
637
+ if matches is not None:
638
+ mode = "semantic"
639
+ except ImportError:
640
+ pass
641
+
642
+ if matches is None:
643
+ # Fall back to FTS
644
+ with err_console.status("Searching (FTS)..."):
645
+ matches = fts_search(query) or None
646
+ if matches:
647
+ mode = "fts"
648
+
649
+ if matches is None:
650
+ # Final fallback: fuzzy match against live/cached catalog
651
+ datasets = discover_datasets(session)
652
+ matches = fuzzy_match(query, datasets)
653
+ mode = "fuzzy"
654
+
655
+ if not matches:
656
+ err_console.print(f"[yellow]No datasets matching '{query}'")
657
+ return
658
+
659
+ if as_json:
660
+ click.echo(json.dumps(matches, indent=2))
661
+ return
662
+
663
+ mode_label = {"semantic": "semantic", "fts": "keyword", "fuzzy": "fuzzy"}.get(mode, mode)
664
+ table = Table(
665
+ title=f"Search: '{query}' — {len(matches)} results [{mode_label}]",
666
+ box=box.ROUNDED,
667
+ )
668
+ table.add_column("Dataset", style="cyan bold", max_width=40)
669
+ table.add_column("Title", max_width=45)
670
+ table.add_column("Description", max_width=50, style="dim")
671
+
672
+ for ds in matches[:25]:
673
+ desc = ds.get("description", "")[:80]
674
+ table.add_row(ds["name"], ds.get("title", ""), desc)
675
+
676
+ console.print(table)
677
+ if len(matches) > 25:
678
+ err_console.print(f"[dim] ...and {len(matches) - 25} more results")
679
+ if mode == "fuzzy" and not force_fuzzy:
680
+ err_console.print("[dim] Tip: run [bold]sandag index[/] for semantic search")
681
+
682
+
683
+ # ── info ───────────────────────────────────────────────────────────────────────
684
+
685
+ @cli.command()
686
+ @click.argument("dataset")
687
+ @click.option("--layer", default=0, help="Layer index (default: 0)")
688
+ @click.option("--json-output", "as_json", is_flag=True, help="Output as JSON")
689
+ @click.pass_context
690
+ def info(ctx, dataset, layer, as_json):
691
+ """Show detailed info and fields for a dataset."""
692
+ session = ctx.obj["session"]
693
+
694
+ with err_console.status(f"Fetching info for {dataset}..."):
695
+ try:
696
+ svc = get_service_info(session, dataset)
697
+ lyr = get_layer_info(session, dataset, layer)
698
+ except Exception as e:
699
+ raise click.ClickException(f"Failed to fetch info: {e}")
700
+
701
+ if as_json:
702
+ click.echo(json.dumps({"service": svc, "layer": lyr}, indent=2, default=str))
703
+ return
704
+
705
+ desc = svc.get("serviceDescription") or lyr.get("description") or "No description"
706
+ name = lyr.get("name", dataset)
707
+ geom_type = lyr.get("geometryType", "Unknown")
708
+ feature_count_est = lyr.get("maxRecordCount", "?")
709
+
710
+ panel_text = Text()
711
+ panel_text.append(f"Name: ", style="bold")
712
+ panel_text.append(f"{name}\n")
713
+ panel_text.append(f"Geometry: ", style="bold")
714
+ panel_text.append(f"{geom_type}\n")
715
+ panel_text.append(f"Max Records/Query: ", style="bold")
716
+ panel_text.append(f"{svc.get('maxRecordCount', '?')}\n")
717
+ panel_text.append(f"Spatial Ref: ", style="bold")
718
+ sr = lyr.get("extent", {}).get("spatialReference", {})
719
+ panel_text.append(f"WKID {sr.get('latestWkid', sr.get('wkid', '?'))}\n\n")
720
+ panel_text.append(f"{desc[:500]}", style="dim")
721
+
722
+ console.print(Panel(panel_text, title=f"[bold cyan]{dataset}", border_style="blue"))
723
+
724
+ # Fields table
725
+ fields = lyr.get("fields", [])
726
+ if fields:
727
+ ftable = Table(title="Fields", box=box.SIMPLE, show_lines=False)
728
+ ftable.add_column("Name", style="green")
729
+ ftable.add_column("Alias", style="dim")
730
+ ftable.add_column("Type", style="yellow")
731
+ ftable.add_column("Length", style="dim", justify="right")
732
+
733
+ for f in fields:
734
+ ftype = f.get("type", "").replace("esriFieldType", "")
735
+ length = str(f.get("length", "")) if f.get("length") else ""
736
+ ftable.add_row(f["name"], f.get("alias", ""), ftype, length)
737
+
738
+ console.print(ftable)
739
+
740
+ # Links
741
+ console.print()
742
+ console.print(f"[dim]REST URL:[/] {service_url(dataset)}")
743
+ console.print(f"[dim]Portal:[/] {PORTAL_URL}/home/item.html?id={svc.get('serviceItemId', '')}")
744
+ console.print(f"[dim]Map:[/] {PORTAL_URL}/apps/mapviewer/index.html?layers={svc.get('serviceItemId', '')}")
745
+
746
+
747
+ # ── count ──────────────────────────────────────────────────────────────────────
748
+
749
+ @cli.command()
750
+ @click.argument("dataset")
751
+ @click.option("--where", default="1=1", help="SQL WHERE clause")
752
+ @click.option("--layer", default=0, help="Layer index")
753
+ @click.option("--json-output", "as_json", is_flag=True, help="Output as JSON")
754
+ @click.pass_context
755
+ def count(ctx, dataset, where, layer, as_json):
756
+ """Count features in a dataset."""
757
+ session = ctx.obj["session"]
758
+
759
+ with err_console.status(f"Counting features in {dataset}..."):
760
+ data = query_features(session, dataset, where=where, return_count_only=True, layer=layer)
761
+
762
+ n = data.get("count", "?")
763
+
764
+ if as_json:
765
+ click.echo(json.dumps({"dataset": dataset, "where": where, "count": n}))
766
+ return
767
+
768
+ console.print(f"[bold cyan]{dataset}[/]: [bold green]{n:,}[/] features" if isinstance(n, int) else f"{dataset}: {n} features")
769
+ if where != "1=1":
770
+ console.print(f"[dim] WHERE: {where}")
771
+
772
+
773
+ # ── query ──────────────────────────────────────────────────────────────────────
774
+
775
+ @cli.command()
776
+ @click.argument("dataset")
777
+ @click.option("--where", default="1=1", help="SQL WHERE clause (e.g. \"NAME='Oceanside'\")")
778
+ @click.option("--fields", default="*", help="Comma-separated field names (default: all)")
779
+ @click.option("--limit", default=10, type=int, help="Max features to return (default: 10)")
780
+ @click.option("--offset", default=0, type=int, help="Result offset for pagination")
781
+ @click.option("--order-by", default=None, help="ORDER BY clause (e.g. 'NAME ASC')")
782
+ @click.option("--geometry/--no-geometry", default=False, help="Include geometry in output")
783
+ @click.option("--srid", default=4326, type=int, help="Output spatial reference (default: 4326/WGS84)")
784
+ @click.option("--bbox", default=None, help="Bounding box filter: xmin,ymin,xmax,ymax (WGS84)")
785
+ @click.option("--layer", default=0, type=int, help="Layer index")
786
+ @click.option("-f", "--format", "fmt", default="table",
787
+ type=click.Choice(["table", "json", "geojson", "csv"]),
788
+ help="Output format")
789
+ @click.pass_context
790
+ def query(ctx, dataset, where, fields, limit, offset, order_by, geometry,
791
+ srid, bbox, layer, fmt):
792
+ """Query features from a dataset.
793
+
794
+ \b
795
+ Examples:
796
+ sandag query Bikeways --limit 5
797
+ sandag query Bikeways --where "RD_NAME='Coast Blvd'" --fields "RD_NAME,CLASS"
798
+ sandag query Affordable_Housing_Inventory -f geojson --geometry > housing.geojson
799
+ sandag query ABC_Licenses --bbox "-117.2,32.7,-117.1,32.8" --limit 50
800
+ """
801
+ session = ctx.obj["session"]
802
+
803
+ geom_param = None
804
+ geom_type = None
805
+ spatial_rel = None
806
+ if bbox:
807
+ try:
808
+ parts = [float(x.strip()) for x in bbox.split(",")]
809
+ geom_param = json.dumps({
810
+ "xmin": parts[0], "ymin": parts[1],
811
+ "xmax": parts[2], "ymax": parts[3],
812
+ "spatialReference": {"wkid": 4326}
813
+ })
814
+ geom_type = "esriGeometryEnvelope"
815
+ spatial_rel = "esriSpatialRelIntersects"
816
+ except (ValueError, IndexError):
817
+ raise click.ClickException("Invalid bbox format. Use: xmin,ymin,xmax,ymax")
818
+
819
+ with err_console.status(f"Querying {dataset}..."):
820
+ data = query_features(
821
+ session, dataset, where=where, out_fields=fields,
822
+ return_geometry=(geometry or fmt == "geojson"),
823
+ result_record_count=limit, result_offset=offset,
824
+ out_sr=srid, order_by=order_by, layer=layer,
825
+ geometry=geom_param, geometry_type=geom_type, spatial_rel=spatial_rel,
826
+ )
827
+
828
+ features = data.get("features", [])
829
+ if not features:
830
+ err_console.print("[yellow]No features returned.")
831
+ return
832
+
833
+ if fmt == "json":
834
+ click.echo(json.dumps(data, indent=2))
835
+ elif fmt == "geojson":
836
+ click.echo(json.dumps(features_to_geojson(features), indent=2))
837
+ elif fmt == "csv":
838
+ click.echo(features_to_csv_str(features))
839
+ else:
840
+ # Table output
841
+ all_attrs = features[0].get("attributes", {})
842
+ field_names = list(all_attrs.keys())
843
+
844
+ table = Table(
845
+ title=f"{dataset} — {len(features)} features",
846
+ box=box.ROUNDED,
847
+ show_lines=False,
848
+ )
849
+ for fname in field_names[:15]: # cap columns for readability
850
+ table.add_column(fname, max_width=30, overflow="ellipsis")
851
+
852
+ for feat in features:
853
+ attrs = feat.get("attributes", {})
854
+ row = [str(attrs.get(fn, "")) if attrs.get(fn) is not None else "" for fn in field_names[:15]]
855
+ table.add_row(*row)
856
+
857
+ console.print(table)
858
+ if len(field_names) > 15:
859
+ err_console.print(f"[dim] ({len(field_names) - 15} additional fields hidden — use -f json to see all)")
860
+ if data.get("exceededTransferLimit"):
861
+ err_console.print(f"[yellow] ⚠ More features available. Use --offset {offset + limit} to paginate.")
862
+
863
+
864
+ # ── bbox ────────────────────────────────────────────────────────────────────────
865
+
866
+ @cli.command()
867
+ @click.argument("dataset")
868
+ @click.option("--where", default="1=1", help="SQL WHERE clause to filter features")
869
+ @click.option("--layer", default=0, type=int, help="Layer index")
870
+ @click.pass_context
871
+ def bbox(ctx, dataset, where, layer):
872
+ """Output the bounding box of a dataset (or filtered subset) as xmin,ymin,xmax,ymax.
873
+
874
+ \b
875
+ Useful for piping into --bbox of another query:
876
+ BBOX=$(sandag bbox Dam_Inundation_DSOD --where "downstreamhazard='Extremely High'")
877
+ sandag query Community_Points --bbox "$BBOX" -f json
878
+
879
+ Output is plain text: xmin,ymin,xmax,ymax (WGS84)
880
+ """
881
+ session = ctx.obj["session"]
882
+ with err_console.status(f"Fetching geometry for {dataset}..."):
883
+ data = query_features(
884
+ session, dataset, where=where, out_fields="objectid",
885
+ return_geometry=True, out_sr=4326, result_record_count=2000, layer=layer,
886
+ )
887
+
888
+ features = data.get("features", [])
889
+ if not features:
890
+ raise click.ClickException("No features returned — cannot compute bbox.")
891
+
892
+ all_x, all_y = [], []
893
+ for feat in features:
894
+ geom = feat.get("geometry") or {}
895
+ if "x" in geom and "y" in geom:
896
+ all_x.append(geom["x"])
897
+ all_y.append(geom["y"])
898
+ elif "rings" in geom:
899
+ for ring in geom["rings"]:
900
+ for pt in ring:
901
+ all_x.append(pt[0])
902
+ all_y.append(pt[1])
903
+ elif "paths" in geom:
904
+ for path in geom["paths"]:
905
+ for pt in path:
906
+ all_x.append(pt[0])
907
+ all_y.append(pt[1])
908
+ elif "points" in geom:
909
+ for pt in geom["points"]:
910
+ all_x.append(pt[0])
911
+ all_y.append(pt[1])
912
+
913
+ if not all_x:
914
+ raise click.ClickException("No coordinates found in features.")
915
+
916
+ result = f"{min(all_x)},{min(all_y)},{max(all_x)},{max(all_y)}"
917
+ err_console.print(f"[dim]bbox from {len(features)} features[/]")
918
+ click.echo(result)
919
+
920
+
921
+ # ── query-all ──────────────────────────────────────────────────────────────────
922
+
923
+ @cli.command("query-all")
924
+ @click.argument("dataset")
925
+ @click.option("--where", default="1=1", help="SQL WHERE clause")
926
+ @click.option("--fields", default="*", help="Comma-separated field names")
927
+ @click.option("--geometry/--no-geometry", default=False, help="Include geometry")
928
+ @click.option("--srid", default=4326, type=int, help="Output SRID")
929
+ @click.option("--layer", default=0, type=int, help="Layer index")
930
+ @click.option("-f", "--format", "fmt", default="geojson",
931
+ type=click.Choice(["json", "geojson", "csv"]),
932
+ help="Output format (default: geojson)")
933
+ @click.option("--max-features", default=None, type=int,
934
+ help="Stop after N features (default: all)")
935
+ @click.pass_context
936
+ def query_all(ctx, dataset, where, fields, geometry, srid, layer, fmt, max_features):
937
+ """Fetch ALL features with automatic pagination.
938
+
939
+ \b
940
+ Automatically pages through the entire dataset:
941
+ sandag query-all Bikeways -f geojson > bikeways.geojson
942
+ sandag query-all ABC_Licenses -f csv > licenses.csv
943
+ """
944
+ session = ctx.obj["session"]
945
+ page_size = 2000
946
+ offset = 0
947
+ all_features = []
948
+
949
+ with Progress(
950
+ SpinnerColumn(),
951
+ TextColumn("[bold blue]{task.description}"),
952
+ console=err_console,
953
+ ) as progress:
954
+ task = progress.add_task(f"Fetching {dataset}...", total=None)
955
+
956
+ while True:
957
+ remaining = None
958
+ if max_features:
959
+ remaining = max_features - len(all_features)
960
+ if remaining <= 0:
961
+ break
962
+ page_size = min(2000, remaining)
963
+
964
+ data = query_features(
965
+ session, dataset, where=where, out_fields=fields,
966
+ return_geometry=(geometry or fmt == "geojson"),
967
+ result_record_count=page_size, result_offset=offset,
968
+ out_sr=srid, layer=layer,
969
+ )
970
+
971
+ features = data.get("features", [])
972
+ if not features:
973
+ break
974
+
975
+ all_features.extend(features)
976
+ offset += len(features)
977
+ progress.update(task, description=f"[bold blue]Fetched {len(all_features)} features...")
978
+
979
+ if not data.get("exceededTransferLimit", False):
980
+ break
981
+
982
+ err_console.print(f"[green]Total: {len(all_features)} features")
983
+
984
+ if fmt == "geojson":
985
+ click.echo(json.dumps(features_to_geojson(all_features), indent=2))
986
+ elif fmt == "csv":
987
+ click.echo(features_to_csv_str(all_features))
988
+ else:
989
+ click.echo(json.dumps({"features": all_features}, indent=2))
990
+
991
+
992
+ # ── fields ─────────────────────────────────────────────────────────────────────
993
+
994
+ @cli.command()
995
+ @click.argument("dataset")
996
+ @click.option("--layer", default=0, help="Layer index")
997
+ @click.option("--json-output", "as_json", is_flag=True, help="Output as JSON")
998
+ @click.pass_context
999
+ def fields(ctx, dataset, layer, as_json):
1000
+ """List fields/columns for a dataset."""
1001
+ session = ctx.obj["session"]
1002
+
1003
+ with err_console.status(f"Fetching fields for {dataset}..."):
1004
+ lyr = get_layer_info(session, dataset, layer)
1005
+
1006
+ field_list = lyr.get("fields", [])
1007
+
1008
+ if as_json:
1009
+ click.echo(json.dumps(field_list, indent=2))
1010
+ return
1011
+
1012
+ table = Table(title=f"{dataset} — Fields", box=box.ROUNDED)
1013
+ table.add_column("Name", style="green bold")
1014
+ table.add_column("Alias")
1015
+ table.add_column("Type", style="yellow")
1016
+ table.add_column("Nullable", style="dim")
1017
+ table.add_column("Domain", style="dim")
1018
+
1019
+ for f in field_list:
1020
+ ftype = f.get("type", "").replace("esriFieldType", "")
1021
+ nullable = "✓" if f.get("nullable", True) else "✗"
1022
+ domain = f.get("domain", {})
1023
+ domain_str = domain.get("name", "") if domain else ""
1024
+ table.add_row(f["name"], f.get("alias", ""), ftype, nullable, domain_str)
1025
+
1026
+ console.print(table)
1027
+
1028
+
1029
+ # ── describe ───────────────────────────────────────────────────────────────────
1030
+
1031
+ @cli.command()
1032
+ @click.argument("dataset")
1033
+ @click.option("--layer", default=0, help="Layer index")
1034
+ @click.option("-n", "--sample-count", default=3, type=int, help="Number of sample records")
1035
+ @click.pass_context
1036
+ def describe(ctx, dataset, layer, sample_count):
1037
+ """Full schema + count + sample records in one JSON call.
1038
+
1039
+ \b
1040
+ Designed for AI agents that need to understand a dataset before querying.
1041
+ Returns fields, geometry type, feature count, and sample rows as JSON.
1042
+
1043
+ \b
1044
+ Example:
1045
+ sandag describe Bikeways
1046
+ sandag describe ABC_Licenses -n 5
1047
+ """
1048
+ session = ctx.obj["session"]
1049
+
1050
+ with err_console.status(f"Describing {dataset}..."):
1051
+ try:
1052
+ lyr = get_layer_info(session, dataset, layer)
1053
+ count_data = query_features(session, dataset, where="1=1", return_count_only=True, layer=layer)
1054
+ sample_data = query_features(session, dataset, result_record_count=sample_count, layer=layer)
1055
+ except Exception as e:
1056
+ raise click.ClickException(f"Failed to describe {dataset}: {e}")
1057
+
1058
+ fields_info = [
1059
+ {
1060
+ "name": f["name"],
1061
+ "alias": f.get("alias", ""),
1062
+ "type": f.get("type", "").replace("esriFieldType", ""),
1063
+ "nullable": f.get("nullable", True),
1064
+ "domain": f.get("domain", {}).get("name") if f.get("domain") else None,
1065
+ }
1066
+ for f in lyr.get("fields", [])
1067
+ ]
1068
+
1069
+ click.echo(json.dumps({
1070
+ "dataset": dataset,
1071
+ "name": lyr.get("name", dataset),
1072
+ "geometry_type": lyr.get("geometryType", ""),
1073
+ "feature_count": count_data.get("count"),
1074
+ "max_record_count": lyr.get("maxRecordCount"),
1075
+ "description": lyr.get("description", ""),
1076
+ "fields": fields_info,
1077
+ "sample": [f.get("attributes", {}) for f in sample_data.get("features", [])],
1078
+ }, indent=2))
1079
+
1080
+
1081
+ # ── sample ─────────────────────────────────────────────────────────────────────
1082
+
1083
+ @cli.command()
1084
+ @click.argument("dataset")
1085
+ @click.option("-n", "--count", "n", default=5, help="Number of sample records")
1086
+ @click.option("--layer", default=0, help="Layer index")
1087
+ @click.option("-f", "--format", "fmt", default="table",
1088
+ type=click.Choice(["table", "json", "geojson", "csv"]),
1089
+ help="Output format")
1090
+ @click.pass_context
1091
+ def sample(ctx, dataset, n, layer, fmt):
1092
+ """Show a quick sample of data from a dataset."""
1093
+ ctx.invoke(query, dataset=dataset, limit=n, layer=layer, fmt=fmt)
1094
+
1095
+
1096
+ # ── download ───────────────────────────────────────────────────────────────────
1097
+
1098
+ @cli.command()
1099
+ @click.argument("dataset")
1100
+ @click.option("-f", "--format", "fmt", default="geojson",
1101
+ type=click.Choice(["geojson", "csv", "shapefile", "filegdb", "metadata"]),
1102
+ help="Download format")
1103
+ @click.option("-o", "--output", default=None, help="Output file path")
1104
+ @click.pass_context
1105
+ def download(ctx, dataset, fmt, output):
1106
+ """Download pre-built dataset exports from SANDAG.
1107
+
1108
+ \b
1109
+ These are static pre-generated files hosted by SANDAG:
1110
+ sandag download Bikeways -f geojson
1111
+ sandag download Bikeways -f shapefile -o bikeways.zip
1112
+ sandag download Bikeways -f csv
1113
+ """
1114
+ session = ctx.obj["session"]
1115
+ suffix = EXPORT_FORMATS[fmt]
1116
+ url = f"{DOWNLOAD_BASE}/{dataset}{suffix}"
1117
+
1118
+ if output is None:
1119
+ output = f"{dataset}{suffix}"
1120
+
1121
+ err_console.print(f"[dim]Downloading {url}")
1122
+
1123
+ with err_console.status(f"Downloading {dataset} ({fmt})..."):
1124
+ r = session.get(url, timeout=120, stream=True)
1125
+ if r.status_code == 404 or "Download_not_available" in r.url:
1126
+ raise click.ClickException(
1127
+ f"Format '{fmt}' is not available for {dataset}. Try a different format."
1128
+ )
1129
+ r.raise_for_status()
1130
+
1131
+ with open(output, "wb") as f:
1132
+ for chunk in r.iter_content(chunk_size=8192):
1133
+ f.write(chunk)
1134
+
1135
+ size = os.path.getsize(output)
1136
+ size_str = f"{size / 1024:.1f} KB" if size < 1048576 else f"{size / 1048576:.1f} MB"
1137
+ console.print(f"[green]✓[/] Saved to [bold]{output}[/] ({size_str})")
1138
+
1139
+
1140
+ # ── categories ─────────────────────────────────────────────────────────────────
1141
+
1142
+ @cli.command()
1143
+ @click.option("--json-output", "as_json", is_flag=True, help="Output as JSON")
1144
+ def categories(as_json):
1145
+ """List dataset categories used in the Data Warehouse."""
1146
+ if as_json:
1147
+ click.echo(json.dumps(CATEGORIES))
1148
+ return
1149
+
1150
+ table = Table(title="Dataset Categories", box=box.ROUNDED)
1151
+ table.add_column("#", style="dim", width=4)
1152
+ table.add_column("Category", style="cyan bold")
1153
+
1154
+ for i, cat in enumerate(CATEGORIES, 1):
1155
+ table.add_row(str(i), cat)
1156
+
1157
+ console.print(table)
1158
+ console.print(f"\n[dim]Use [bold]sandag search <category>[/bold] to find datasets in a category.")
1159
+
1160
+
1161
+ # ── url ────────────────────────────────────────────────────────────────────────
1162
+
1163
+ @cli.command()
1164
+ @click.argument("dataset")
1165
+ @click.option("--type", "url_type", default="rest",
1166
+ type=click.Choice(["rest", "portal", "map", "geojson", "csv", "shapefile", "metadata"]),
1167
+ help="URL type to generate")
1168
+ @click.pass_context
1169
+ def url(ctx, dataset, url_type):
1170
+ """Generate URLs for a dataset.
1171
+
1172
+ \b
1173
+ Examples:
1174
+ sandag url Bikeways --type rest
1175
+ sandag url Bikeways --type map
1176
+ sandag url Bikeways --type geojson
1177
+ """
1178
+ urls = {
1179
+ "rest": service_url(dataset),
1180
+ "portal": f"{PORTAL_URL}/home/item.html?id=",
1181
+ "map": f"{PORTAL_URL}/apps/mapviewer/index.html?layers=",
1182
+ "geojson": f"{DOWNLOAD_BASE}/{dataset}.geojson",
1183
+ "csv": f"{DOWNLOAD_BASE}/{dataset}.csv",
1184
+ "shapefile": f"{DOWNLOAD_BASE}/{dataset}_shapefile.zip",
1185
+ "metadata": f"{DOWNLOAD_BASE}/{dataset}.pdf",
1186
+ }
1187
+
1188
+ click.echo(urls[url_type])
1189
+
1190
+
1191
+ # ── head ───────────────────────────────────────────────────────────────────────
1192
+
1193
+ @cli.command()
1194
+ @click.argument("dataset")
1195
+ @click.option("--layer", default=0, help="Layer index")
1196
+ @click.option("-f", "--format", "fmt", default="table",
1197
+ type=click.Choice(["table", "json", "csv"]),
1198
+ help="Output format")
1199
+ @click.pass_context
1200
+ def head(ctx, dataset, layer, fmt):
1201
+ """Show first 3 records plus field info (quick preview)."""
1202
+ session = ctx.obj["session"]
1203
+
1204
+ with err_console.status(f"Previewing {dataset}..."):
1205
+ lyr = get_layer_info(session, dataset, layer)
1206
+ data = query_features(session, dataset, result_record_count=3, layer=layer)
1207
+
1208
+ features = data.get("features", [])
1209
+
1210
+ if fmt == "json":
1211
+ fields_info = [
1212
+ {"name": f["name"], "alias": f.get("alias", ""), "type": f.get("type", "").replace("esriFieldType", "")}
1213
+ for f in lyr.get("fields", [])
1214
+ ]
1215
+ click.echo(json.dumps({
1216
+ "name": lyr.get("name", dataset),
1217
+ "geometry_type": lyr.get("geometryType", ""),
1218
+ "max_record_count": lyr.get("maxRecordCount"),
1219
+ "fields": fields_info,
1220
+ "sample": [f.get("attributes", {}) for f in features],
1221
+ }, indent=2))
1222
+ return
1223
+
1224
+ if fmt == "csv":
1225
+ click.echo(features_to_csv_str(features))
1226
+ return
1227
+
1228
+ # Table output
1229
+ geom_type = lyr.get("geometryType", "?").replace("esriGeometry", "")
1230
+ num_fields = len(lyr.get("fields", []))
1231
+
1232
+ console.print(Panel(
1233
+ f"[bold]{lyr.get('name', dataset)}[/]\n"
1234
+ f"Geometry: {geom_type} | Fields: {num_fields} | "
1235
+ f"Max/query: {lyr.get('maxRecordCount', '?')}",
1236
+ border_style="blue",
1237
+ ))
1238
+
1239
+ if features:
1240
+ attrs = features[0].get("attributes", {})
1241
+ field_names = list(attrs.keys())
1242
+
1243
+ table = Table(box=box.SIMPLE, show_lines=False)
1244
+ for fn in field_names[:12]:
1245
+ table.add_column(fn, max_width=25, overflow="ellipsis")
1246
+
1247
+ for feat in features:
1248
+ a = feat.get("attributes", {})
1249
+ table.add_row(*[str(a.get(fn, ""))[:25] if a.get(fn) is not None else "" for fn in field_names[:12]])
1250
+
1251
+ console.print(table)
1252
+
1253
+
1254
+ # ── sql (convenience) ──────────────────────────────────────────────────────────
1255
+
1256
+ @cli.command()
1257
+ @click.argument("dataset")
1258
+ @click.argument("where_clause")
1259
+ @click.option("--fields", default="*", help="Fields to return")
1260
+ @click.option("--limit", default=50, type=int, help="Max results")
1261
+ @click.option("-f", "--format", "fmt", default="table",
1262
+ type=click.Choice(["table", "json", "geojson", "csv"]))
1263
+ @click.pass_context
1264
+ def sql(ctx, dataset, where_clause, fields, limit, fmt):
1265
+ """Run a WHERE clause query (shorthand).
1266
+
1267
+ \b
1268
+ Examples:
1269
+ sandag sql Bikeways "CLASS=1" --fields "RD_NAME,CLASS" --limit 20
1270
+ sandag sql ABC_Licenses "LICENSE_TYPE='21'" -f csv
1271
+ """
1272
+ ctx.invoke(query, dataset=dataset, where=where_clause, fields=fields,
1273
+ limit=limit, fmt=fmt)
1274
+
1275
+
1276
+ # ── map ────────────────────────────────────────────────────────────────────────
1277
+
1278
+ @cli.command("map")
1279
+ @click.argument("dataset")
1280
+ @click.option("--where", default="1=1", help="SQL WHERE clause")
1281
+ @click.option("--limit", default=500, type=int, help="Max features to render (default: 500)")
1282
+ @click.option("--layer", default=0, type=int, help="Layer index")
1283
+ @click.option("--width", default=1200, type=int, help="Image width in pixels")
1284
+ @click.option("--height", default=800, type=int, help="Image height in pixels")
1285
+ @click.option("--color", default=None, help="Feature color (default: red for points, blue for lines, orange for polygons)")
1286
+ @click.option("-o", "--output", default=None, help="Output PNG path (default: <dataset>.png)")
1287
+ @click.option("--open", "open_after", is_flag=True, help="Open image after saving")
1288
+ @click.pass_context
1289
+ def map_cmd(ctx, dataset, where, limit, layer, width, height, color, output, open_after):
1290
+ """Render dataset features as a PNG map image.
1291
+
1292
+ \b
1293
+ Uses OpenStreetMap tiles as basemap. Requires: pip install staticmap
1294
+ Supports points, lines, and polygons.
1295
+
1296
+ \b
1297
+ Examples:
1298
+ sandag map Bikeways
1299
+ sandag map Bikeways --where "bike_class=1" --color "#e63946"
1300
+ sandag map Affordable_Housing_Inventory -o housing.png --open
1301
+ sandag map Hydrological_Basins --limit 200 --width 1600 --height 1000
1302
+ """
1303
+ try:
1304
+ from staticmap import StaticMap, CircleMarker, Line, Polygon
1305
+ except ImportError:
1306
+ raise click.ClickException(
1307
+ "staticmap is required: pip install staticmap"
1308
+ )
1309
+
1310
+ session = ctx.obj["session"]
1311
+
1312
+ if output is None:
1313
+ output = f"{dataset}.png"
1314
+
1315
+ with err_console.status(f"Fetching {dataset} features..."):
1316
+ data = query_features(
1317
+ session, dataset, where=where,
1318
+ return_geometry=True, result_record_count=limit,
1319
+ out_sr=4326, layer=layer,
1320
+ )
1321
+
1322
+ features = data.get("features", [])
1323
+ if not features:
1324
+ raise click.ClickException("No features returned — nothing to map.")
1325
+
1326
+ m = StaticMap(width, height, url_template="https://tile.openstreetmap.org/{z}/{x}/{y}.png")
1327
+
1328
+ point_color = color or "red"
1329
+ line_color = color or "#1a6fad"
1330
+ poly_fill = color or "#f4a22680"
1331
+ poly_outline = color or "#e07b00"
1332
+
1333
+ rendered = 0
1334
+ for feat in features:
1335
+ geom = feat.get("geometry")
1336
+ if not geom:
1337
+ continue
1338
+
1339
+ if "x" in geom and "y" in geom:
1340
+ m.add_marker(CircleMarker((geom["x"], geom["y"]), point_color, 6))
1341
+ rendered += 1
1342
+ elif "paths" in geom:
1343
+ for path in geom["paths"]:
1344
+ coords = [tuple(c[:2]) for c in path]
1345
+ if len(coords) >= 2:
1346
+ m.add_line(Line(coords, line_color, 2))
1347
+ rendered += 1
1348
+ elif "rings" in geom:
1349
+ for ring in geom["rings"]:
1350
+ coords = [tuple(c[:2]) for c in ring]
1351
+ if len(coords) >= 3:
1352
+ m.add_polygon(Polygon(coords, poly_outline, poly_fill, 1))
1353
+ rendered += 1
1354
+ elif "points" in geom:
1355
+ for pt in geom["points"]:
1356
+ m.add_marker(CircleMarker(tuple(pt[:2]), point_color, 6))
1357
+ rendered += 1
1358
+
1359
+ if rendered == 0:
1360
+ raise click.ClickException("No renderable geometries found in features.")
1361
+
1362
+ with err_console.status("Rendering map..."):
1363
+ image = m.render()
1364
+ image.save(output)
1365
+
1366
+ size = os.path.getsize(output)
1367
+ size_str = f"{size / 1024:.0f} KB"
1368
+ console.print(f"[green]✓[/] Saved [bold]{output}[/] ({width}×{height}px, {rendered} features, {size_str})")
1369
+
1370
+ if data.get("exceededTransferLimit"):
1371
+ err_console.print(f"[yellow] ⚠ Only first {limit} features shown. Use --limit to increase.")
1372
+
1373
+ if open_after:
1374
+ import subprocess
1375
+ subprocess.run(["open", output], check=False)
1376
+
1377
+
1378
+ if __name__ == "__main__":
1379
+ cli()