pulse-code 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. pulse/__init__.py +1 -0
  2. pulse/__main__.py +4 -0
  3. pulse/catalog.py +102 -0
  4. pulse/cli.py +984 -0
  5. pulse/data/catalog.json +1599 -0
  6. pulse/data/queries_index.json +328 -0
  7. pulse/data/variable_labels.json +1338 -0
  8. pulse/llm_builder.py +732 -0
  9. pulse/matcher.py +180 -0
  10. pulse/queries/aids-cases-by-year-1981-1999-req.xml +178 -0
  11. pulse/queries/births-by-year-1995-2002-req.xml +226 -0
  12. pulse/queries/births-by-year-2003-2006-req.xml +306 -0
  13. pulse/queries/births-by-year-2007-2024-req.xml +334 -0
  14. pulse/queries/cancer-incidence-by-site-by-year-1999-2022-req.xml +174 -0
  15. pulse/queries/cancer-mortality-by-site-by-year-2018-2023-req.xml +166 -0
  16. pulse/queries/covid-deaths-by-race-2020-2023-req.xml +529 -0
  17. pulse/queries/drug-deaths-by-month-1999-2020-req.xml +436 -0
  18. pulse/queries/drug-deaths-by-month-2018-2024-req.xml +544 -0
  19. pulse/queries/drug-deaths-by-year-1999-2020-req.xml +436 -0
  20. pulse/queries/drug-deaths-by-year-2018-2024-req.xml +536 -0
  21. pulse/queries/fentanyl-deaths-by-month-1999-2020-req.xml +430 -0
  22. pulse/queries/fentanyl-deaths-by-month-2018-2024-req.xml +530 -0
  23. pulse/queries/fetal-deaths-by-cause-by-year-2014-2024-req.xml +530 -0
  24. pulse/queries/fetal-deaths-by-year-2005-2024-req.xml +322 -0
  25. pulse/queries/heart-vs-cancer-by-sex-2018-2023-req.xml +532 -0
  26. pulse/queries/heat-wave-days-by-county-req.xml +154 -0
  27. pulse/queries/infant-mortality-2018-2023-req.xml +531 -0
  28. pulse/queries/infant-mortality-by-cause-by-year-2007-2023-req.xml +290 -0
  29. pulse/queries/maternal-mortality-by-year-1999-2020-req.xml +351 -0
  30. pulse/queries/maternal-mortality-by-year-2018-2024-req.xml +413 -0
  31. pulse/queries/mortality-by-race-sex-2018-2023-req.xml +490 -0
  32. pulse/queries/mortality-by-year-cause-1979-1998-req.xml +222 -0
  33. pulse/queries/mortality-by-year-cause-1999-2020-req.xml +434 -0
  34. pulse/queries/mortality-by-year-cause-2021-2024-req.xml +529 -0
  35. pulse/queries/opioid-overdose-deaths-2018-2024-req.xml +544 -0
  36. pulse/queries/pm25-by-year-2003-2011-req.xml +194 -0
  37. pulse/queries/provisional-births-by-month-2023-req.xml +854 -0
  38. pulse/queries/racial-mortality-gap-2018-2023-req.xml +531 -0
  39. pulse/queries/std-cases-by-disease-by-year-1984-2014-req.xml +178 -0
  40. pulse/queries/suicide-by-sex-1999-2020-req.xml +411 -0
  41. pulse/queries/suicide-by-sex-2021-2024-req.xml +551 -0
  42. pulse/queries/tb-cases-by-year-1993-2023-req.xml +206 -0
  43. pulse/queries/tick-borne-diseases-by-year-2016-2023-req.xml +125 -0
  44. pulse/queries/underlying-cause-mortality-by-year-1999-2020-req.xml +350 -0
  45. pulse/queries/unintentional-injuries-by-age-2018-2023-req.xml +531 -0
  46. pulse/templates/D10-base.xml +226 -0
  47. pulse/templates/D104-base.xml +142 -0
  48. pulse/templates/D117-base.xml +110 -0
  49. pulse/templates/D128-base.xml +182 -0
  50. pulse/templates/D140-base.xml +318 -0
  51. pulse/templates/D141-base.xml +454 -0
  52. pulse/templates/D149-base.xml +878 -0
  53. pulse/templates/D157-base.xml +490 -0
  54. pulse/templates/D158-base.xml +406 -0
  55. pulse/templates/D159-base.xml +774 -0
  56. pulse/templates/D16-base.xml +266 -0
  57. pulse/templates/D176-base.xml +526 -0
  58. pulse/templates/D178-base.xml +158 -0
  59. pulse/templates/D18-base.xml +262 -0
  60. pulse/templates/D192-base.xml +854 -0
  61. pulse/templates/D204-base.xml +142 -0
  62. pulse/templates/D23-base.xml +258 -0
  63. pulse/templates/D27-base.xml +342 -0
  64. pulse/templates/D31-base.xml +262 -0
  65. pulse/templates/D60-base.xml +274 -0
  66. pulse/templates/D61-base.xml +250 -0
  67. pulse/templates/D66-base.xml +378 -0
  68. pulse/templates/D69-base.xml +278 -0
  69. pulse/templates/D73-base.xml +182 -0
  70. pulse/templates/D74-base.xml +254 -0
  71. pulse/templates/D76-base.xml +350 -0
  72. pulse/templates/D77-base.xml +434 -0
  73. pulse/templates/D8-base.xml +314 -0
  74. pulse/templates/D80-base.xml +174 -0
  75. pulse/templates/D81-base.xml +178 -0
  76. pulse/wonder_client.py +161 -0
  77. pulse_code-1.0.1.dist-info/METADATA +249 -0
  78. pulse_code-1.0.1.dist-info/RECORD +82 -0
  79. pulse_code-1.0.1.dist-info/WHEEL +5 -0
  80. pulse_code-1.0.1.dist-info/entry_points.txt +2 -0
  81. pulse_code-1.0.1.dist-info/licenses/LICENSE +121 -0
  82. pulse_code-1.0.1.dist-info/top_level.txt +1 -0
pulse/cli.py ADDED
@@ -0,0 +1,984 @@
1
+ """pulse CLI — CDC WONDER query explorer, builder, and refiner."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import csv
6
+ import io
7
+ import json
8
+ import time
9
+ from pathlib import Path
10
+ from typing import Annotated, Optional
11
+
12
+ import typer
13
+ from rich import box
14
+ from rich.console import Console
15
+ from rich.panel import Panel
16
+ from rich.prompt import Prompt
17
+ from rich.table import Table
18
+ from rich.text import Text
19
+
20
+ from pulse.catalog import Catalog
21
+ from pulse.matcher import match_datasets, match_queries
22
+ from pulse.wonder_client import WonderClient
23
+
24
+ app = typer.Typer(
25
+ name="pulse",
26
+ help="CDC WONDER public health data query CLI — explore, build, and refine.",
27
+ add_completion=False,
28
+ no_args_is_help=True,
29
+ )
30
+ console = Console()
31
+ err = Console(stderr=True)
32
+
33
+ _QUERIES_DIR = Path(__file__).parent / "queries"
34
+ _catalog = None
35
+
36
+
37
+ def _get_catalog() -> Catalog:
38
+ global _catalog
39
+ if _catalog is None:
40
+ _catalog = Catalog()
41
+ return _catalog
42
+
43
+
44
+ def _print_missing_provider_package(error: ImportError) -> None:
45
+ err.print(f"[red]Missing package for the configured LLM provider: {error}[/red]")
46
+ err.print(
47
+ "[dim]Anthropic needs `anthropic`; Azure OpenAI needs `openai` "
48
+ "(both are pulse dependencies — try `uv sync`).[/dim]"
49
+ )
50
+
51
+
52
+ def _print_missing_api_key() -> None:
53
+ err.print("[red]No credentials found for the configured LLM provider.[/red]")
54
+ err.print(
55
+ "[dim]Set [bold]ANTHROPIC_API_KEY[/bold] (default provider), or "
56
+ "[bold]LLM_PROVIDER=azure_openai[/bold] plus [bold]AZURE_OPENAI_API_KEY[/bold], "
57
+ "[bold]AZURE_OPENAI_ENDPOINT[/bold], [bold]AZURE_OPENAI_DEPLOYMENT[/bold], "
58
+ "[bold]AZURE_OPENAI_API_VERSION[/bold].[/dim]"
59
+ )
60
+
61
+
62
+ def _reference_queries(
63
+ prompt: str, catalog: Catalog, top_n: int = 2, min_score: float = 0.10
64
+ ) -> list[tuple[str, str]]:
65
+ """Find the closest bundled queries to a prompt and load their XML as few-shot context."""
66
+ matches = match_queries(prompt, catalog, top_n=top_n)
67
+ refs = []
68
+ for m in matches:
69
+ if m.score < min_score:
70
+ continue
71
+ path = _QUERIES_DIR / m.query.filename
72
+ if path.exists():
73
+ refs.append((m.query.description, path.read_text()))
74
+ return refs
75
+
76
+
77
+ # ── datasets ──────────────────────────────────────────────────────────────────
78
+
79
+
80
+ @app.command("datasets")
81
+ def cmd_datasets(
82
+ topic: Annotated[
83
+ Optional[str], typer.Option("--topic", "-t", help="Filter by topic")
84
+ ] = None,
85
+ json_out: Annotated[bool, typer.Option("--json", help="Output as JSON")] = False,
86
+ ):
87
+ """List all CDC WONDER datasets — what they cover and when."""
88
+ catalog = _get_catalog()
89
+ datasets = catalog.datasets()
90
+
91
+ if topic:
92
+ datasets = [d for d in datasets if topic.lower() in d.topic.lower()]
93
+
94
+ if json_out:
95
+ out = []
96
+ for d in datasets:
97
+ q_count = len(catalog.queries_for_dataset(d.id))
98
+ out.append(
99
+ {
100
+ "id": d.id,
101
+ "title": d.title,
102
+ "topic": d.topic,
103
+ "year_range": d.year_range_label,
104
+ "subject": d.subject,
105
+ "has_aar": d.has_aar,
106
+ "bundled_queries": q_count,
107
+ }
108
+ )
109
+ print(json.dumps(out, indent=2))
110
+ return
111
+
112
+ table = Table(
113
+ box=box.ROUNDED,
114
+ show_header=True,
115
+ header_style="bold cyan",
116
+ border_style="dim",
117
+ expand=True,
118
+ )
119
+ table.add_column("Dataset", style="bold yellow", width=9, no_wrap=True)
120
+ table.add_column("Topic", width=18)
121
+ table.add_column("Years", width=13, no_wrap=True)
122
+ table.add_column("Subject", ratio=1)
123
+ table.add_column("Queries", justify="right", width=7)
124
+ table.add_column("AAR", justify="center", width=5)
125
+
126
+ topic_colors = {
127
+ "Mortality": "red",
128
+ "Infant Mortality": "orange3",
129
+ "Fetal Deaths": "dark_orange",
130
+ "Natality": "green",
131
+ "Cancer": "bright_magenta",
132
+ "Infectious Disease": "cyan",
133
+ "STI / Sexual Health": "bright_cyan",
134
+ "Tuberculosis": "yellow",
135
+ "HIV/AIDS": "bright_red",
136
+ "Vaccine Safety": "magenta",
137
+ "Environment": "blue",
138
+ "Population": "dim",
139
+ }
140
+
141
+ for d in datasets:
142
+ q_count = len(catalog.queries_for_dataset(d.id))
143
+ color = topic_colors.get(d.topic, "white")
144
+ table.add_row(
145
+ d.id,
146
+ Text(d.topic, style=color),
147
+ d.year_range_label,
148
+ d.subject[:120] + ("…" if len(d.subject) > 120 else ""),
149
+ str(q_count) if q_count else "—",
150
+ "✓" if d.has_aar else "",
151
+ )
152
+
153
+ console.print()
154
+ console.print(table)
155
+ all_topics = catalog.topics()
156
+ console.print(
157
+ f"\n[dim]{len(datasets)} datasets across {len(all_topics)} topics | "
158
+ f"[bold]pulse topics[/bold] to list topics | "
159
+ f"[bold]pulse datasets --topic Cancer[/bold] | "
160
+ f"[bold]pulse info <ID>[/bold] | "
161
+ f'[bold]pulse search "<topic>"[/bold][/dim]'
162
+ )
163
+ if not topic:
164
+ console.print(
165
+ "[dim]Note: Immunization coverage data (NIS, VaxView, school vaccination) "
166
+ "is not in WONDER — it is available through CDC Open Data. "
167
+ "WONDER does include VAERS vaccine adverse events (D8).[/dim]"
168
+ )
169
+
170
+
171
+ # ── info ──────────────────────────────────────────────────────────────────────
172
+
173
+
174
+ @app.command("info")
175
+ def cmd_info(
176
+ dataset_id: Annotated[str, typer.Argument(help="Dataset ID (e.g. D176)")],
177
+ json_out: Annotated[bool, typer.Option("--json", help="Output as JSON")] = False,
178
+ ):
179
+ """Show detailed information about a dataset — measures, groupings, bundled queries."""
180
+ catalog = _get_catalog()
181
+ ds = catalog.dataset(dataset_id)
182
+ if not ds:
183
+ err.print(f"[red]Dataset {dataset_id!r} not found.[/red]")
184
+ err.print("Run [bold]pulse datasets[/bold] to see all available datasets.")
185
+ raise typer.Exit(1)
186
+
187
+ bundled = catalog.queries_for_dataset(ds.id)
188
+
189
+ if json_out:
190
+ print(
191
+ json.dumps(
192
+ {
193
+ "id": ds.id,
194
+ "title": ds.title,
195
+ "topic": ds.topic,
196
+ "subject": ds.subject,
197
+ "year_range": ds.year_range_label,
198
+ "has_aar": ds.has_aar,
199
+ "has_template": ds.has_template,
200
+ "notes": ds.notes,
201
+ "tags": ds.tags,
202
+ "measures": [
203
+ {"code": m.code, "label": m.label} for m in ds.measures
204
+ ],
205
+ "key_groupings": ds.key_groupings,
206
+ "bundled_queries": [
207
+ {
208
+ "filename": q.filename,
209
+ "description": q.description,
210
+ "groupings": q.groupings,
211
+ "year_range": q.year_range,
212
+ }
213
+ for q in bundled
214
+ ],
215
+ },
216
+ indent=2,
217
+ )
218
+ )
219
+ return
220
+
221
+ console.print()
222
+ console.print(
223
+ Panel(
224
+ f"[bold cyan]{ds.id}[/bold cyan] [bold]{ds.title}[/bold]\n"
225
+ f"[dim]{ds.topic} · {ds.year_range_label}[/dim]",
226
+ border_style="cyan",
227
+ expand=False,
228
+ )
229
+ )
230
+
231
+ console.print("\n[bold]Subject[/bold]")
232
+ console.print(f" {ds.subject}\n")
233
+
234
+ if ds.notes:
235
+ console.print(f"[dim italic]Note: {ds.notes}[/dim italic]\n")
236
+
237
+ console.print("[bold]Measures[/bold]")
238
+ for m in ds.measures:
239
+ console.print(f" [cyan]{m.code}[/cyan] {m.label}")
240
+
241
+ console.print(f"\n[bold]Key Grouping Dimensions ({len(ds.key_groupings)})[/bold]")
242
+ for g in ds.key_groupings:
243
+ console.print(f" · {g}")
244
+
245
+ if ds.has_aar:
246
+ console.print("\n [green]✓ Age-adjusted rates (AAR) available[/green]")
247
+ else:
248
+ console.print("\n [dim]✗ No age-adjusted rates[/dim]")
249
+
250
+ if bundled:
251
+ console.print(f"\n[bold]Bundled Example Queries ({len(bundled)})[/bold]")
252
+ qt = Table(box=box.SIMPLE, show_header=True, header_style="bold")
253
+ qt.add_column("File", style="dim")
254
+ qt.add_column("Description")
255
+ qt.add_column("Groups By")
256
+ qt.add_column("Years")
257
+ for q in bundled:
258
+ qt.add_row(
259
+ q.filename,
260
+ q.description,
261
+ ", ".join(q.groupings),
262
+ q.year_range,
263
+ )
264
+ console.print(qt)
265
+ console.print(
266
+ f"[dim]Run a bundled query: [bold]pulse run {bundled[0].filename}[/bold][/dim]"
267
+ )
268
+ else:
269
+ console.print("\n[dim]No bundled example queries for this dataset.[/dim]")
270
+ if ds.has_template:
271
+ console.print(
272
+ '[dim]Template available — use [bold]pulse build "<prompt>"[/bold] to generate a query.[/dim]'
273
+ )
274
+
275
+ console.print()
276
+
277
+
278
+ # ── search ────────────────────────────────────────────────────────────────────
279
+
280
+
281
+ @app.command("search")
282
+ def cmd_search(
283
+ prompt: Annotated[str, typer.Argument(help="Natural language query topic")],
284
+ top: Annotated[int, typer.Option("--top", "-n", help="Number of results")] = 5,
285
+ queries_only: Annotated[
286
+ bool, typer.Option("--queries", "-q", help="Show only bundled queries")
287
+ ] = False,
288
+ datasets_only: Annotated[
289
+ bool, typer.Option("--datasets", "-d", help="Show only datasets")
290
+ ] = False,
291
+ json_out: Annotated[bool, typer.Option("--json", help="Output as JSON")] = False,
292
+ ):
293
+ """Find the best matching datasets and bundled queries for a topic."""
294
+ catalog = _get_catalog()
295
+
296
+ ds_matches = match_datasets(prompt, catalog, top_n=top) if not queries_only else []
297
+ q_matches = match_queries(prompt, catalog, top_n=top) if not datasets_only else []
298
+
299
+ if json_out:
300
+ print(
301
+ json.dumps(
302
+ {
303
+ "prompt": prompt,
304
+ "dataset_matches": [
305
+ {
306
+ "id": m.dataset.id,
307
+ "title": m.dataset.title,
308
+ "score": round(m.score, 3),
309
+ "reason": m.reason,
310
+ }
311
+ for m in ds_matches
312
+ ],
313
+ "query_matches": [
314
+ {
315
+ "filename": m.query.filename,
316
+ "dataset_id": m.query.dataset_id,
317
+ "description": m.query.description,
318
+ "score": round(m.score, 3),
319
+ }
320
+ for m in q_matches
321
+ ],
322
+ },
323
+ indent=2,
324
+ )
325
+ )
326
+ return
327
+
328
+ console.print()
329
+ console.print(f"[bold]Search:[/bold] {prompt!r}\n")
330
+
331
+ if ds_matches and not queries_only:
332
+ console.print("[bold cyan]Best Matching Datasets[/bold cyan]")
333
+ t = Table(box=box.SIMPLE, show_header=True, header_style="bold")
334
+ t.add_column("Score", justify="right", width=7)
335
+ t.add_column("Dataset", width=9)
336
+ t.add_column("Topic", width=16)
337
+ t.add_column("Years", width=13)
338
+ t.add_column("Title / Reason")
339
+ for m in ds_matches:
340
+ pct = int(m.score * 100)
341
+ color = "green" if pct >= 30 else "yellow" if pct >= 15 else "dim"
342
+ t.add_row(
343
+ Text(f"{pct}%", style=color),
344
+ m.dataset.id,
345
+ m.dataset.topic,
346
+ m.dataset.year_range_label,
347
+ f"{m.dataset.title}\n[dim]{m.reason}[/dim]",
348
+ )
349
+ console.print(t)
350
+
351
+ if q_matches and not datasets_only:
352
+ console.print("[bold cyan]Best Matching Bundled Queries[/bold cyan]")
353
+ t = Table(box=box.SIMPLE, show_header=True, header_style="bold")
354
+ t.add_column("Score", justify="right", width=7)
355
+ t.add_column("Dataset", width=8)
356
+ t.add_column("File", width=42)
357
+ t.add_column("Description")
358
+ for m in q_matches:
359
+ pct = int(m.score * 100)
360
+ color = "green" if pct >= 30 else "yellow" if pct >= 15 else "dim"
361
+ t.add_row(
362
+ Text(f"{pct}%", style=color),
363
+ m.query.dataset_id,
364
+ m.query.filename,
365
+ m.query.description,
366
+ )
367
+ console.print(t)
368
+
369
+ console.print(
370
+ f"\n[dim]Run a query: [bold]pulse run <filename>[/bold] · "
371
+ f'Build new: [bold]pulse build "{prompt}"[/bold][/dim]\n'
372
+ )
373
+
374
+
375
+ # ── build ─────────────────────────────────────────────────────────────────────
376
+
377
+
378
+ @app.command("build")
379
+ def cmd_build(
380
+ prompt: Annotated[str, typer.Argument(help="Natural language query description")],
381
+ output: Annotated[
382
+ Optional[Path], typer.Option("-o", "--output", help="Save XML to file")
383
+ ] = None,
384
+ suggest: Annotated[
385
+ bool,
386
+ typer.Option(
387
+ "--suggest/--no-suggest", help="Show closest existing queries first"
388
+ ),
389
+ ] = True,
390
+ verbose: Annotated[bool, typer.Option("-v", "--verbose")] = False,
391
+ ):
392
+ """Build a CDC WONDER XML query from natural language using Claude."""
393
+ catalog = _get_catalog()
394
+
395
+ if suggest:
396
+ q_matches = match_queries(prompt, catalog, top_n=3)
397
+ if q_matches and q_matches[0].score > 0.10:
398
+ console.print(
399
+ "\n[dim]Closest existing queries — run these directly with [bold]pulse run <file>[/bold]:[/dim]"
400
+ )
401
+ for m in q_matches[:3]:
402
+ pct = int(m.score * 100)
403
+ console.print(
404
+ f" [yellow]{pct}%[/yellow] {m.query.filename} [dim]{m.query.description}[/dim]"
405
+ )
406
+ console.print()
407
+
408
+ console.print(f"[bold]Building query:[/bold] {prompt!r}")
409
+ console.print("[dim]Calling the LLM…[/dim]\n")
410
+
411
+ def _on_thinking(text: str) -> None:
412
+ if verbose and text.strip():
413
+ console.print(f"[dim italic]{text[:200]}…[/dim italic]")
414
+
415
+ refs = _reference_queries(prompt, catalog)
416
+ try:
417
+ from pulse.llm_builder import get_query_builder
418
+
419
+ builder = get_query_builder()
420
+ request = builder.build(
421
+ prompt, reference_queries=refs, on_thinking=_on_thinking
422
+ )
423
+ except ImportError as e:
424
+ _print_missing_provider_package(e)
425
+ raise typer.Exit(1)
426
+ except (RuntimeError, ValueError) as e:
427
+ err.print(f"[red]{e}[/red]")
428
+ raise typer.Exit(1)
429
+ except TypeError as e:
430
+ if "api_key" in str(e) or "authentication" in str(e).lower():
431
+ _print_missing_api_key()
432
+ raise typer.Exit(1)
433
+ raise
434
+ xml = request.to_xml()
435
+
436
+ if output:
437
+ output.write_text(xml)
438
+ console.print(f"[green]✓[/green] Saved to [bold]{output}[/bold]")
439
+ console.print(f"[dim]Run it: [bold]pulse run {output}[/bold][/dim]\n")
440
+ else:
441
+ print(xml)
442
+
443
+
444
+ # ── run ───────────────────────────────────────────────────────────────────────
445
+
446
+
447
+ @app.command("run")
448
+ def cmd_run(
449
+ query_file: Annotated[
450
+ str, typer.Argument(help="Path to XML query file, or bundled query filename")
451
+ ],
452
+ format: Annotated[
453
+ str, typer.Option("-f", "--format", help="Output format: table|csv|json|xml")
454
+ ] = "table",
455
+ timeout: Annotated[
456
+ int, typer.Option("-t", "--timeout", help="Request timeout in seconds")
457
+ ] = 120,
458
+ no_totals: Annotated[
459
+ bool, typer.Option("--no-totals", help="Exclude total rows")
460
+ ] = False,
461
+ output: Annotated[
462
+ Optional[Path], typer.Option("-o", "--output", help="Save output to file")
463
+ ] = None,
464
+ ):
465
+ """Execute a CDC WONDER XML query and display results."""
466
+ path = Path(query_file)
467
+ if not path.exists():
468
+ bundled = _QUERIES_DIR / query_file
469
+ if bundled.exists():
470
+ path = bundled
471
+ else:
472
+ err.print(f"[red]File not found: {query_file}[/red]")
473
+ err.print(f"[dim]Bundled queries are in {_QUERIES_DIR}[/dim]")
474
+ raise typer.Exit(1)
475
+
476
+ err.print(f"[bold]Executing:[/bold] {path.name}")
477
+ err.print("[dim]Querying CDC WONDER API…[/dim]\n")
478
+
479
+ client = WonderClient(timeout=timeout)
480
+ try:
481
+ response_xml = client.execute_file(path)
482
+ except RuntimeError as e:
483
+ err.print(f"[red]Error from CDC WONDER:[/red] {e}")
484
+ raise typer.Exit(1)
485
+
486
+ _output_response(client, response_xml, format, output, no_totals)
487
+
488
+
489
+ # ── query ─────────────────────────────────────────────────────────────────────
490
+
491
+
492
+ @app.command("query")
493
+ def cmd_query(
494
+ prompt: Annotated[str, typer.Argument(help="Natural language query")],
495
+ format: Annotated[
496
+ str, typer.Option("-f", "--format", help="Output: table|csv|json|xml")
497
+ ] = "table",
498
+ save_xml: Annotated[
499
+ Optional[Path], typer.Option("--save-xml", help="Save generated XML")
500
+ ] = None,
501
+ timeout: Annotated[int, typer.Option("-t", "--timeout")] = 120,
502
+ no_totals: Annotated[bool, typer.Option("--no-totals")] = False,
503
+ ):
504
+ """Build a query from natural language and execute it immediately."""
505
+ err.print(f"[bold]Building query:[/bold] {prompt!r}")
506
+
507
+ catalog = _get_catalog()
508
+ refs = _reference_queries(prompt, catalog)
509
+ try:
510
+ from pulse.llm_builder import get_query_builder
511
+
512
+ builder = get_query_builder()
513
+ request = builder.build(prompt, reference_queries=refs)
514
+ except ImportError as e:
515
+ _print_missing_provider_package(e)
516
+ raise typer.Exit(1)
517
+ except (RuntimeError, ValueError) as e:
518
+ err.print(f"[red]{e}[/red]")
519
+ raise typer.Exit(1)
520
+ except TypeError as e:
521
+ if "api_key" in str(e) or "authentication" in str(e).lower():
522
+ _print_missing_api_key()
523
+ raise typer.Exit(1)
524
+ raise
525
+ xml = request.to_xml()
526
+
527
+ if save_xml:
528
+ save_xml.write_text(xml)
529
+ err.print(f"[green]✓[/green] Saved XML to {save_xml}")
530
+
531
+ err.print(f"[dim]Executing against {request.dataset_id}…[/dim]\n")
532
+
533
+ client = WonderClient(timeout=timeout)
534
+ try:
535
+ response_xml = client.query_from_xml(request.dataset_id, xml)
536
+ except RuntimeError as e:
537
+ err.print(f"[red]Error from CDC WONDER:[/red] {e}")
538
+ raise typer.Exit(1)
539
+
540
+ _output_response(client, response_xml, format, None, no_totals)
541
+
542
+
543
+ # ── compare ───────────────────────────────────────────────────────────────────
544
+
545
+ _WONDER_RATE_LIMIT_SECONDS = 15
546
+
547
+
548
+ @app.command("compare")
549
+ def cmd_compare(
550
+ prompt: Annotated[
551
+ str,
552
+ typer.Argument(
553
+ help="Natural language comparison, e.g. 'opioid deaths vs suicide deaths by state'"
554
+ ),
555
+ ],
556
+ format: Annotated[
557
+ str, typer.Option("-f", "--format", help="Output: table|csv|json|xml")
558
+ ] = "table",
559
+ save_xml_dir: Annotated[
560
+ Optional[Path],
561
+ typer.Option("--save-xml-dir", help="Directory to save each sub-query's XML"),
562
+ ] = None,
563
+ timeout: Annotated[int, typer.Option("-t", "--timeout")] = 120,
564
+ no_totals: Annotated[bool, typer.Option("--no-totals")] = False,
565
+ ):
566
+ """Build and run a comparison across two or more causes/datasets from natural language."""
567
+ catalog = _get_catalog()
568
+
569
+ console.print(f"[bold]Building comparison:[/bold] {prompt!r}\n")
570
+
571
+ refs = _reference_queries(prompt, catalog)
572
+ try:
573
+ from pulse.llm_builder import get_query_builder, WonderRequestSet
574
+
575
+ builder = get_query_builder()
576
+ result = builder.build_any(prompt, reference_queries=refs)
577
+ except ImportError as e:
578
+ _print_missing_provider_package(e)
579
+ raise typer.Exit(1)
580
+ except (RuntimeError, ValueError) as e:
581
+ err.print(f"[red]{e}[/red]")
582
+ raise typer.Exit(1)
583
+ except TypeError as e:
584
+ if "api_key" in str(e) or "authentication" in str(e).lower():
585
+ _print_missing_api_key()
586
+ raise typer.Exit(1)
587
+ raise
588
+
589
+ if not isinstance(result, WonderRequestSet):
590
+ console.print(
591
+ "[yellow]This didn't look like a comparison — running it as a single query.[/yellow]\n"
592
+ )
593
+ requests, labels = [result], [result.dataset_id]
594
+ else:
595
+ requests, labels = result.requests, result.labels
596
+
597
+ client = WonderClient(timeout=timeout)
598
+ if save_xml_dir:
599
+ save_xml_dir.mkdir(parents=True, exist_ok=True)
600
+
601
+ for i, (request, label) in enumerate(zip(requests, labels)):
602
+ xml = request.to_xml()
603
+
604
+ if save_xml_dir:
605
+ safe_name = "".join(c if c.isalnum() or c in "-_" else "_" for c in label)
606
+ xml_path = save_xml_dir / f"{safe_name}.xml"
607
+ xml_path.write_text(xml)
608
+ console.print(f"[green]✓[/green] Saved {xml_path}")
609
+
610
+ console.print(f"\n[bold cyan]── {label} ──[/bold cyan]")
611
+ console.print(f"[dim]Executing against {request.dataset_id}…[/dim]\n")
612
+
613
+ try:
614
+ response_xml = client.query_from_xml(request.dataset_id, xml)
615
+ except RuntimeError as e:
616
+ err.print(f"[red]Error from CDC WONDER:[/red] {e}")
617
+ raise typer.Exit(1)
618
+
619
+ _output_response(client, response_xml, format, None, no_totals)
620
+
621
+ if i < len(requests) - 1:
622
+ console.print(
623
+ f"\n[dim]Waiting {_WONDER_RATE_LIMIT_SECONDS}s (CDC WONDER rate limit)…[/dim]"
624
+ )
625
+ time.sleep(_WONDER_RATE_LIMIT_SECONDS)
626
+
627
+
628
+ # ── refine ────────────────────────────────────────────────────────────────────
629
+
630
+
631
+ @app.command("refine")
632
+ def cmd_refine(
633
+ query_file: Annotated[str, typer.Argument(help="Existing XML query to refine")],
634
+ feedback: Annotated[
635
+ str, typer.Argument(help="What to change (e.g. 'break down by state')")
636
+ ],
637
+ output: Annotated[
638
+ Optional[Path], typer.Option("-o", "--output", help="Save refined XML")
639
+ ] = None,
640
+ execute: Annotated[
641
+ bool, typer.Option("--run", help="Also execute the refined query")
642
+ ] = False,
643
+ format: Annotated[str, typer.Option("-f", "--format")] = "table",
644
+ ):
645
+ """Refine an existing query using natural language feedback."""
646
+ path = Path(query_file)
647
+ if not path.exists():
648
+ bundled = _QUERIES_DIR / query_file
649
+ if bundled.exists():
650
+ path = bundled
651
+ else:
652
+ err.print(f"[red]File not found: {query_file}[/red]")
653
+ raise typer.Exit(1)
654
+
655
+ base_xml = path.read_text()
656
+
657
+ console.print(f"[bold]Refining:[/bold] {path.name}")
658
+ console.print(f"[bold]Feedback:[/bold] {feedback!r}\n")
659
+
660
+ try:
661
+ from pulse.llm_builder import get_query_builder
662
+
663
+ builder = get_query_builder()
664
+ request = builder.build(feedback, base_xml=base_xml)
665
+ except ImportError as e:
666
+ _print_missing_provider_package(e)
667
+ raise typer.Exit(1)
668
+ except (RuntimeError, ValueError) as e:
669
+ err.print(f"[red]{e}[/red]")
670
+ raise typer.Exit(1)
671
+ except TypeError as e:
672
+ if "api_key" in str(e) or "authentication" in str(e).lower():
673
+ _print_missing_api_key()
674
+ raise typer.Exit(1)
675
+ raise
676
+ xml = request.to_xml()
677
+
678
+ if output:
679
+ output.write_text(xml)
680
+ console.print(f"[green]✓[/green] Saved refined query to [bold]{output}[/bold]")
681
+ else:
682
+ print(xml)
683
+
684
+ if execute:
685
+ console.print(
686
+ f"\n[dim]Executing refined query against {request.dataset_id}…[/dim]\n"
687
+ )
688
+ client = WonderClient()
689
+ try:
690
+ response_xml = client.query_from_xml(request.dataset_id, xml)
691
+ except RuntimeError as e:
692
+ err.print(f"[red]Error:[/red] {e}")
693
+ raise typer.Exit(1)
694
+ _output_response(client, response_xml, format, None, False)
695
+
696
+
697
+ # ── chat ──────────────────────────────────────────────────────────────────────
698
+
699
+
700
+ @app.command("chat")
701
+ def cmd_chat(
702
+ initial_prompt: Annotated[
703
+ Optional[str], typer.Argument(help="Optional first request to start with")
704
+ ] = None,
705
+ ):
706
+ """Interactively build and refine a CDC WONDER query over multiple turns."""
707
+ catalog = _get_catalog()
708
+
709
+ try:
710
+ from pulse.llm_builder import get_query_builder
711
+
712
+ builder = get_query_builder()
713
+ except ImportError as e:
714
+ _print_missing_provider_package(e)
715
+ raise typer.Exit(1)
716
+ except (RuntimeError, ValueError) as e:
717
+ err.print(f"[red]{e}[/red]")
718
+ raise typer.Exit(1)
719
+
720
+ current_xml: Optional[str] = None
721
+ current_dataset_id: Optional[str] = None
722
+
723
+ console.print(
724
+ "\n[bold]pulse chat[/bold] — describe a query, then refine it turn by turn."
725
+ )
726
+ console.print("[dim]Commands: :xml :run :save <path> :reset :exit[/dim]\n")
727
+
728
+ def _build_turn(text: str) -> None:
729
+ nonlocal current_xml, current_dataset_id
730
+ try:
731
+ if current_xml is None:
732
+ refs = _reference_queries(text, catalog)
733
+ request = builder.build(text, reference_queries=refs)
734
+ else:
735
+ request = builder.build(text, base_xml=current_xml)
736
+ except TypeError as e:
737
+ if "api_key" in str(e) or "authentication" in str(e).lower():
738
+ _print_missing_api_key()
739
+ return
740
+ raise
741
+ current_xml = request.to_xml()
742
+ current_dataset_id = request.dataset_id
743
+ console.print(f"\n[dim]Dataset:[/dim] {current_dataset_id}")
744
+ console.print(current_xml)
745
+ console.print()
746
+
747
+ if initial_prompt:
748
+ console.print(f"[bold]>[/bold] {initial_prompt}")
749
+ _build_turn(initial_prompt)
750
+
751
+ while True:
752
+ try:
753
+ text = Prompt.ask("[bold cyan]pulse>[/bold cyan]").strip()
754
+ except EOFError, KeyboardInterrupt:
755
+ console.print()
756
+ break
757
+
758
+ if not text:
759
+ continue
760
+
761
+ if text in (":exit", ":quit"):
762
+ break
763
+
764
+ if text == ":xml":
765
+ if current_xml:
766
+ console.print(current_xml)
767
+ else:
768
+ console.print("[yellow]No query built yet.[/yellow]")
769
+ continue
770
+
771
+ if text == ":reset":
772
+ current_xml = None
773
+ current_dataset_id = None
774
+ console.print("[dim]Reset.[/dim]")
775
+ continue
776
+
777
+ if text.startswith(":save"):
778
+ parts = text.split(maxsplit=1)
779
+ if not current_xml:
780
+ console.print("[yellow]No query built yet.[/yellow]")
781
+ elif len(parts) < 2:
782
+ console.print("[yellow]Usage: :save <path>[/yellow]")
783
+ else:
784
+ out_path = Path(parts[1])
785
+ out_path.write_text(current_xml)
786
+ console.print(f"[green]✓[/green] Saved to {out_path}")
787
+ continue
788
+
789
+ if text == ":run":
790
+ if not current_xml or not current_dataset_id:
791
+ console.print("[yellow]No query built yet.[/yellow]")
792
+ continue
793
+ client = WonderClient()
794
+ try:
795
+ response_xml = client.query_from_xml(current_dataset_id, current_xml)
796
+ except RuntimeError as e:
797
+ err.print(f"[red]Error from CDC WONDER:[/red] {e}")
798
+ continue
799
+ _output_response(client, response_xml, "table", None, False)
800
+ continue
801
+
802
+ _build_turn(text)
803
+
804
+ console.print("[dim]Bye.[/dim]")
805
+
806
+
807
+ # ── topics ────────────────────────────────────────────────────────────────────
808
+
809
+
810
+ @app.command("topics")
811
+ def cmd_topics():
812
+ """List all dataset topics and dataset counts."""
813
+ catalog = _get_catalog()
814
+ from collections import Counter
815
+
816
+ counts = Counter(d.topic for d in catalog.datasets())
817
+
818
+ topic_colors = {
819
+ "Mortality": "red",
820
+ "Infant Mortality": "orange3",
821
+ "Fetal Deaths": "dark_orange",
822
+ "Natality": "green",
823
+ "Cancer": "bright_magenta",
824
+ "Infectious Disease": "cyan",
825
+ "STI / Sexual Health": "bright_cyan",
826
+ "Tuberculosis": "yellow",
827
+ "HIV/AIDS": "bright_red",
828
+ "Vaccine Safety": "magenta",
829
+ "Environment": "blue",
830
+ "Population": "dim",
831
+ }
832
+
833
+ t = Table(
834
+ box=box.ROUNDED, show_header=True, header_style="bold cyan", border_style="dim"
835
+ )
836
+ t.add_column("Topic", ratio=1)
837
+ t.add_column("Datasets", justify="right", width=9)
838
+ t.add_column("Filter command", style="dim")
839
+
840
+ for topic, count in sorted(counts.items(), key=lambda x: -x[1]):
841
+ color = topic_colors.get(topic, "white")
842
+ t.add_row(
843
+ Text(topic, style=color),
844
+ str(count),
845
+ f'pulse datasets --topic "{topic}"',
846
+ )
847
+
848
+ console.print()
849
+ console.print(t)
850
+ console.print(f"\n[dim]{sum(counts.values())} total datasets[/dim]\n")
851
+
852
+
853
+ # ── list-queries ──────────────────────────────────────────────────────────────
854
+
855
+
856
+ @app.command("list-queries")
857
+ def cmd_list_queries(
858
+ dataset_id: Annotated[Optional[str], typer.Option("--dataset", "-d")] = None,
859
+ json_out: Annotated[bool, typer.Option("--json")] = False,
860
+ ):
861
+ """List all bundled example queries."""
862
+ catalog = _get_catalog()
863
+ queries = catalog.queries()
864
+
865
+ if dataset_id:
866
+ queries = [q for q in queries if q.dataset_id.upper() == dataset_id.upper()]
867
+
868
+ if json_out:
869
+ print(
870
+ json.dumps(
871
+ [
872
+ {
873
+ "filename": q.filename,
874
+ "dataset_id": q.dataset_id,
875
+ "description": q.description,
876
+ "groupings": q.groupings,
877
+ "year_range": q.year_range,
878
+ }
879
+ for q in queries
880
+ ],
881
+ indent=2,
882
+ )
883
+ )
884
+ return
885
+
886
+ t = Table(
887
+ box=box.ROUNDED, show_header=True, header_style="bold cyan", border_style="dim"
888
+ )
889
+ t.add_column("Dataset", width=9, style="yellow")
890
+ t.add_column("File")
891
+ t.add_column("Description")
892
+ t.add_column("Groups By")
893
+ t.add_column("Years", width=12)
894
+
895
+ for q in queries:
896
+ t.add_row(
897
+ q.dataset_id,
898
+ q.filename,
899
+ q.description,
900
+ ", ".join(q.groupings),
901
+ q.year_range,
902
+ )
903
+
904
+ console.print()
905
+ console.print(t)
906
+ console.print(
907
+ f"\n[dim]{len(queries)} bundled queries · Run: [bold]pulse run <filename>[/bold][/dim]\n"
908
+ )
909
+
910
+
911
+ # ── helpers ───────────────────────────────────────────────────────────────────
912
+
913
+
914
+ def _output_response(
915
+ client: WonderClient,
916
+ response_xml: str,
917
+ format: str,
918
+ output: Optional[Path],
919
+ no_totals: bool,
920
+ ) -> None:
921
+ if format == "xml":
922
+ text = response_xml
923
+ if output:
924
+ output.write_text(text)
925
+ else:
926
+ print(text)
927
+ return
928
+
929
+ if format == "json":
930
+ records = client.to_records(response_xml)
931
+ text = json.dumps(records, indent=2)
932
+ if output:
933
+ output.write_text(text)
934
+ else:
935
+ print(text)
936
+ return
937
+
938
+ headers, data = client.to_arrays(response_xml)
939
+ rows = client.parse_rows(response_xml)
940
+
941
+ if no_totals:
942
+ data = [row for row, r in zip(data, rows) if not r.is_total]
943
+
944
+ if format == "csv":
945
+ buf = io.StringIO()
946
+ writer = csv.writer(buf)
947
+ writer.writerow(headers)
948
+ writer.writerows(data)
949
+ text = buf.getvalue()
950
+ if output:
951
+ output.write_text(text)
952
+ else:
953
+ print(text, end="")
954
+ return
955
+
956
+ if format == "table":
957
+ if not data:
958
+ console.print("[yellow]No data returned.[/yellow]")
959
+ return
960
+ t = Table(
961
+ box=box.ROUNDED, show_header=True, header_style="bold", border_style="dim"
962
+ )
963
+ for h in headers:
964
+ t.add_column(h)
965
+ for i, (row, row_obj) in enumerate(zip(data, rows)):
966
+ style = "bold" if row_obj.is_total and not no_totals else None
967
+ t.add_row(*[str(v) if v is not None else "—" for v in row], style=style)
968
+ console.print(t)
969
+ console.print(f"[dim]{len(data)} rows[/dim]")
970
+ if output:
971
+ buf = io.StringIO()
972
+ writer = csv.writer(buf)
973
+ writer.writerow(headers)
974
+ writer.writerows(data)
975
+ output.write_text(buf.getvalue())
976
+ console.print(f"[green]✓[/green] Saved to {output}")
977
+ return
978
+
979
+ err.print(f"[red]Unknown format: {format!r}. Use: table|csv|json|xml[/red]")
980
+ raise typer.Exit(1)
981
+
982
+
983
+ if __name__ == "__main__":
984
+ app()