astra-tools 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
astra/cli.py ADDED
@@ -0,0 +1,1241 @@
1
+ """Command-line interface for ASTRA."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import subprocess
7
+ import sys
8
+ from pathlib import Path
9
+ from typing import Any
10
+
11
+ import click
12
+ from rich.console import Console
13
+ from rich.table import Table
14
+ from rich.tree import Tree
15
+
16
+ from astra.helpers import (
17
+ _collect_node_decisions,
18
+ create_universe_from_defaults,
19
+ get_analysis_decisions,
20
+ get_decisions,
21
+ get_inputs,
22
+ get_outputs,
23
+ load_yaml,
24
+ save_yaml,
25
+ )
26
+ from astra.validation.schema import (
27
+ validate_analysis_schema,
28
+ validate_universe_schema,
29
+ )
30
+ from astra.validation.semantic import validate_analysis_file, validate_universe_file
31
+
32
+ console = Console()
33
+
34
+
35
+ def find_analysis_file(start_path: Path | None = None) -> Path | None:
36
+ """Find the astra.yaml file in the current or parent directories."""
37
+ if start_path is None:
38
+ start_path = Path.cwd()
39
+
40
+ # Resolve to absolute path to ensure parent traversal works correctly
41
+ current = start_path.resolve()
42
+ while current != current.parent:
43
+ astra_file = current / "astra.yaml"
44
+ if astra_file.exists():
45
+ return astra_file
46
+ current = current.parent
47
+
48
+ return None
49
+
50
+
51
+ def _require_analysis(analysis: Path | None, start_path: Path | None = None) -> Path:
52
+ """Find or validate analysis file, exit with error if not found."""
53
+ if analysis is not None:
54
+ return analysis
55
+ found = find_analysis_file(start_path)
56
+ if found is None:
57
+ console.print("[red]Error:[/red] No astra.yaml found.")
58
+ raise SystemExit(1)
59
+ return found
60
+
61
+
62
+ @click.group()
63
+ @click.version_option(package_name="astra-tools")
64
+ def main() -> None:
65
+ """ASTRA - Agentic Schema for Transparent Research Analysis CLI."""
66
+ pass
67
+
68
+
69
+ @main.command()
70
+ @click.argument("directory", type=click.Path(path_type=Path), default=".")
71
+ @click.option("--no-git", is_flag=True, help="Don't initialize git repository")
72
+ def init(directory: Path, no_git: bool) -> None:
73
+ """Create a minimal ASTRA analysis scaffold.
74
+
75
+ Creates astra.yaml, universes/baseline.yaml, and .gitignore.
76
+
77
+ DIRECTORY is the project folder to create (default: current directory).
78
+
79
+ For full agentic scaffolding (Claude Code config, venv, HPC),
80
+ use 'prism init' instead.
81
+
82
+ Examples:
83
+ astra init my-analysis
84
+ astra init my-analysis --no-git
85
+ """
86
+ # Check if this is already an ASTRA project
87
+ if (directory / "astra.yaml").exists():
88
+ console.print(
89
+ f"[red]Error:[/red] [cyan]{directory}[/cyan] is already an ASTRA project "
90
+ f"(astra.yaml exists)."
91
+ )
92
+ console.print(
93
+ "Use [cyan]astra validate[/cyan] to check it, or delete astra.yaml to re-init."
94
+ )
95
+ raise SystemExit(1)
96
+
97
+ # Create project directory
98
+ if directory != Path("."):
99
+ if directory.exists() and any(directory.iterdir()):
100
+ console.print(
101
+ f"[red]Error:[/red] [cyan]{directory}[/cyan] already exists and is not empty. "
102
+ "Please specify an empty or non-existing directory."
103
+ )
104
+ raise SystemExit(1)
105
+ directory.mkdir(parents=True, exist_ok=True)
106
+
107
+ # Create directory structure
108
+ (directory / "universes").mkdir(parents=True, exist_ok=True)
109
+ (directory / "outputs").mkdir(parents=True, exist_ok=True)
110
+ (directory / "src").mkdir(parents=True, exist_ok=True)
111
+
112
+ # Create .gitignore
113
+ gitignore = """# ASTRA Analysis
114
+ outputs/
115
+ __pycache__/
116
+ *.py[cod]
117
+ .venv/
118
+ .ipynb_checkpoints/
119
+ .DS_Store
120
+ """
121
+ (directory / ".gitignore").write_text(gitignore)
122
+
123
+ # Create boilerplate astra.yaml
124
+ _create_boilerplate_astra_yaml(directory)
125
+
126
+ # Initialize git repository
127
+ _init_git_repo(directory, no_git)
128
+
129
+ # Print success message
130
+ console.print(f"[green]✓[/green] Created ASTRA analysis scaffold: [cyan]{directory}[/cyan]")
131
+ console.print("\nFor full agentic scaffolding, use [cyan]prism init[/cyan] instead.")
132
+
133
+
134
+ def _create_boilerplate_astra_yaml(directory: Path) -> None:
135
+ """Create boilerplate astra.yaml with TODOs."""
136
+ name = directory.name if directory != Path(".") else "My Analysis"
137
+
138
+ astra_yaml = f"""# ASTRA Analysis Specification
139
+
140
+ version: "1.0"
141
+ name: "{name}"
142
+ description: |
143
+ TODO: Describe the goal of this analysis.
144
+
145
+ inputs:
146
+ - id: primary_data
147
+ type: data
148
+ description: "TODO: Describe your primary data source"
149
+
150
+ outputs:
151
+ - id: main_result
152
+ type: metric
153
+ description: "TODO: Describe your primary output metric"
154
+ recipe:
155
+ command: python src/main.py
156
+
157
+ - id: conclusion
158
+ type: report
159
+ description: "Summary of analysis findings"
160
+ recipe:
161
+ command: python src/main.py
162
+ inputs: [main_result]
163
+
164
+ decisions:
165
+ example_method:
166
+ label: "Example Method Choice"
167
+ rationale: "TODO: Explain why this decision matters"
168
+ default: option_a
169
+ options:
170
+ option_a:
171
+ label: "Option A"
172
+ description: "TODO: Describe option A"
173
+ option_b:
174
+ label: "Option B"
175
+ description: "TODO: Describe option B"
176
+ """
177
+ (directory / "astra.yaml").write_text(astra_yaml)
178
+
179
+ # Create baseline universe
180
+ baseline_universe = """# Baseline Universe
181
+ # Default configuration using standard practices
182
+
183
+ id: baseline
184
+ description: "Default configuration using standard practices"
185
+
186
+ decisions:
187
+ example_method: option_a
188
+ """
189
+ (directory / "universes" / "baseline.yaml").write_text(baseline_universe)
190
+
191
+
192
+ def _init_git_repo(directory: Path, no_git: bool) -> None:
193
+ """Initialize git repository if requested."""
194
+ if no_git or (directory / ".git").exists():
195
+ return
196
+
197
+ try:
198
+ subprocess.run(
199
+ ["git", "init"],
200
+ cwd=directory,
201
+ capture_output=True,
202
+ check=True,
203
+ )
204
+ console.print("[green]✓[/green] Initialized git repository")
205
+ # Try to create initial commit
206
+ try:
207
+ subprocess.run(["git", "add", "."], cwd=directory, capture_output=True, check=True)
208
+ subprocess.run(
209
+ ["git", "commit", "-m", "Initial ASTRA analysis structure"],
210
+ cwd=directory,
211
+ capture_output=True,
212
+ check=True,
213
+ )
214
+ except subprocess.CalledProcessError:
215
+ pass # Commit failed, but repo is initialized
216
+ except (subprocess.CalledProcessError, FileNotFoundError):
217
+ pass # Git not available
218
+
219
+
220
+ @main.command()
221
+ @click.argument("file", type=click.Path(exists=True, path_type=Path))
222
+ @click.option(
223
+ "--analysis",
224
+ "-a",
225
+ type=click.Path(exists=True, path_type=Path),
226
+ help="Analysis file for universe validation",
227
+ )
228
+ @click.option(
229
+ "--verify-evidence",
230
+ "-e",
231
+ is_flag=True,
232
+ help="Verify evidence quotes exist in source papers (requires papers to be cached)",
233
+ )
234
+ @click.option(
235
+ "--skip-evidence",
236
+ is_flag=True,
237
+ help="Skip evidence verification even if prior insights are present",
238
+ )
239
+ def validate(file: Path, analysis: Path | None, verify_evidence: bool, skip_evidence: bool) -> None:
240
+ """Validate an ASTRA specification file.
241
+
242
+ FILE can be an analysis (astra.yaml) or universe file.
243
+ For universe files, use --analysis to specify the analysis file.
244
+
245
+ Evidence verification (--verify-evidence) checks that quotes in prior insights
246
+ actually exist in the source papers. Papers must be cached first using
247
+ 'astra paper add'.
248
+ """
249
+ # Determine file type
250
+ is_universe = "universe" in file.stem.lower() or file.parent.name == "universes"
251
+
252
+ if is_universe and analysis is None:
253
+ # Try to find analysis file
254
+ analysis = find_analysis_file(file.parent)
255
+ if analysis is None:
256
+ console.print("[red]Error:[/red] Universe validation requires an analysis file.")
257
+ console.print("Use --analysis to specify the analysis file.")
258
+ raise SystemExit(1)
259
+
260
+ console.print(f"Validating [cyan]{file}[/cyan]...")
261
+
262
+ # Schema validation
263
+ if is_universe:
264
+ schema_errors = validate_universe_schema(file)
265
+ else:
266
+ schema_errors = validate_analysis_schema(file)
267
+
268
+ if schema_errors:
269
+ console.print("\n[red]Schema validation errors:[/red]")
270
+ for schema_err in schema_errors:
271
+ console.print(f" • {schema_err}")
272
+ raise SystemExit(1)
273
+
274
+ console.print("[green]✓[/green] Schema validation passed")
275
+
276
+ # Semantic validation
277
+ if is_universe:
278
+ assert analysis is not None
279
+ semantic_errors = validate_universe_file(file, analysis)
280
+ else:
281
+ semantic_errors = validate_analysis_file(file)
282
+
283
+ if semantic_errors:
284
+ console.print("\n[red]Semantic validation errors:[/red]")
285
+ for semantic_err in semantic_errors:
286
+ console.print(f" • {semantic_err}")
287
+ raise SystemExit(1)
288
+
289
+ console.print("[green]✓[/green] Semantic validation passed")
290
+
291
+ # Evidence verification (for analysis files with prior insights)
292
+ if not is_universe and not skip_evidence:
293
+ data = load_yaml(file)
294
+ prior_insights = data.get("prior_insights", {})
295
+
296
+ if prior_insights:
297
+ if not verify_evidence:
298
+ # Show hint about evidence verification
299
+ evidence_count = sum(
300
+ len(insight.get("evidence", [])) for insight in prior_insights.values()
301
+ )
302
+ if evidence_count > 0:
303
+ console.print(
304
+ f"\n[dim]Note: {len(prior_insights)} prior insight(s) with "
305
+ f"{evidence_count} evidence item(s) found.[/dim]"
306
+ )
307
+ console.print(
308
+ "[dim]Run with --verify-evidence to verify quotes exist in papers.[/dim]"
309
+ )
310
+ else:
311
+ console.print("\n[bold]Verifying evidence...[/bold]")
312
+ _verify_insights_evidence(prior_insights)
313
+
314
+ console.print("\n[green]Validation successful![/green]")
315
+
316
+
317
+ def _verify_insights_evidence(prior_insights: dict[str, Any]) -> None:
318
+ """Verify evidence for all prior insights."""
319
+ from astra.papers.cache import PaperCache
320
+ from astra.verification.cache import VerificationCache
321
+ from astra.verification.core import VerificationStatus, verify_all_insights
322
+
323
+ paper_cache = PaperCache()
324
+ verification_cache = VerificationCache()
325
+
326
+ results = verify_all_insights(prior_insights, paper_cache, verification_cache)
327
+
328
+ has_errors = False
329
+ verified_count = 0
330
+ cached_count = 0
331
+ skipped_count = 0
332
+ failed_count = 0
333
+
334
+ for insight_id, result in results.items():
335
+ for ev_result in result.evidence_results:
336
+ status = ev_result.status
337
+ if status in (VerificationStatus.VERIFIED, VerificationStatus.CACHED):
338
+ verified_count += 1
339
+ if status == VerificationStatus.CACHED:
340
+ cached_count += 1
341
+ elif status == VerificationStatus.SKIPPED:
342
+ skipped_count += 1
343
+ else:
344
+ failed_count += 1
345
+ has_errors = True
346
+ if status == VerificationStatus.ERROR:
347
+ icon = "[yellow]![/yellow]"
348
+ else:
349
+ icon = "[red]✗[/red]"
350
+ console.print(
351
+ f" {icon} [{insight_id}] {ev_result.evidence_id}: {ev_result.message}"
352
+ )
353
+
354
+ # Summary
355
+ total = verified_count + skipped_count + failed_count
356
+ if cached_count > 0:
357
+ console.print(
358
+ f"[green]✓[/green] Evidence: {verified_count}/{total} verified "
359
+ f"({cached_count} from cache), {skipped_count} skipped"
360
+ )
361
+ else:
362
+ console.print(
363
+ f"[green]✓[/green] Evidence: {verified_count}/{total} verified, {skipped_count} skipped"
364
+ )
365
+
366
+ if has_errors:
367
+ console.print(f"\n[red]Error:[/red] {failed_count} evidence item(s) failed verification")
368
+ console.print("\nTo fix:")
369
+ console.print(" 1. Check that quotes are exact copies from the paper")
370
+ console.print(" 2. Verify the DOI and version are correct")
371
+ console.print(" 3. Ensure the paper is cached: astra paper add <doi>")
372
+ raise SystemExit(1)
373
+
374
+
375
+ @main.command()
376
+ @click.option(
377
+ "--file",
378
+ "-f",
379
+ type=click.Path(exists=True, path_type=Path),
380
+ help="Analysis file (default: astra.yaml in current/parent dir)",
381
+ )
382
+ @click.option("--decisions", "-d", is_flag=True, help="Show decision details")
383
+ @click.option("--inputs", "-i", is_flag=True, help="Show input details")
384
+ @click.option("--outputs", "-o", is_flag=True, help="Show output details")
385
+ def info(
386
+ file: Path | None,
387
+ decisions: bool,
388
+ inputs: bool,
389
+ outputs: bool,
390
+ ) -> None:
391
+ """Show information about an analysis."""
392
+ file = _require_analysis(file)
393
+ data = load_yaml(file)
394
+
395
+ # Header
396
+ console.print(f"\n[bold]{data.get('name', 'Unknown')}[/bold]")
397
+ console.print(f"Version: {data.get('version', 'Unknown')}")
398
+ if data.get("description"):
399
+ console.print(f"\n{data['description']}")
400
+
401
+ # Summary stats
402
+ input_list = get_inputs(data)
403
+ output_list = get_outputs(data)
404
+ decision_dict = get_decisions(data)
405
+ console.print(
406
+ f"\n[dim]Inputs: {len(input_list)} | "
407
+ f"Outputs: {len(output_list)} | "
408
+ f"Decisions: {len(decision_dict)}[/dim]"
409
+ )
410
+
411
+ # Show all by default if no flags
412
+ show_all = not (decisions or inputs or outputs)
413
+
414
+ # Inputs
415
+ if inputs or show_all:
416
+ console.print("\n[bold]Inputs:[/bold]")
417
+ table = Table(show_header=True)
418
+ table.add_column("ID")
419
+ table.add_column("Type")
420
+ table.add_column("Description")
421
+
422
+ for inp in input_list:
423
+ table.add_row(inp.get("id", ""), inp.get("type", ""), inp.get("description", ""))
424
+ console.print(table)
425
+
426
+ # Outputs
427
+ if outputs or show_all:
428
+ console.print("\n[bold]Outputs:[/bold]")
429
+ table = Table(show_header=True)
430
+ table.add_column("ID")
431
+ table.add_column("Type")
432
+ table.add_column("Recipe")
433
+ table.add_column("Description")
434
+
435
+ for out in output_list:
436
+ recipe = out.get("recipe")
437
+ if recipe:
438
+ recipe_str = recipe.get("command", "yes")
439
+ else:
440
+ recipe_str = "[dim]-[/dim]"
441
+ table.add_row(
442
+ out.get("id", ""),
443
+ out.get("type", ""),
444
+ recipe_str,
445
+ out.get("description", ""),
446
+ )
447
+ console.print(table)
448
+
449
+ # Decisions (recursive tree)
450
+ if decisions or show_all:
451
+ console.print("\n[bold]Decisions:[/bold]")
452
+ decision_tree = get_analysis_decisions(data)
453
+ _display_decisions(decision_tree.get("decisions", {}))
454
+ _display_analysis_decisions(decision_tree.get("analyses", {}))
455
+
456
+
457
+ def _display_decisions(decisions: dict[str, Any], indent: str = "") -> None:
458
+ """Display decisions as Rich trees."""
459
+ for decision_id, decision in decisions.items():
460
+ tree = Tree(f"{indent}[cyan]{decision_id}[/cyan]: {decision.get('label', '')}")
461
+ tags = decision.get("tags") or []
462
+ if tags:
463
+ tree.add(f"[dim]Tags:[/dim] {', '.join(tags)}")
464
+ if decision.get("rationale"):
465
+ tree.add(f"[dim]Rationale:[/dim] {decision['rationale']}")
466
+
467
+ options_branch = tree.add("[dim]Options:[/dim]")
468
+ options = decision.get("options", {})
469
+ default = decision.get("default")
470
+ for option_id, option in options.items():
471
+ default_marker = " [yellow](default)[/yellow]" if option_id == default else ""
472
+ option_text = f"{option_id}: {option.get('label', '')}{default_marker}"
473
+ if option.get("description"):
474
+ option_text += f" - [dim]{option['description']}[/dim]"
475
+ options_branch.add(option_text)
476
+
477
+ console.print(tree)
478
+ console.print()
479
+
480
+
481
+ def _display_analysis_decisions(analyses: dict[str, Any], depth: int = 0) -> None:
482
+ """Recursively display decisions grouped by sub-analysis."""
483
+ for analysis_id, analysis_tree in analyses.items():
484
+ console.print(f"\n [bold magenta]{' ' * depth}Analysis: {analysis_id}[/bold magenta]")
485
+ _display_decisions(analysis_tree.get("decisions", {}), indent=" " * (depth + 1))
486
+ _display_analysis_decisions(analysis_tree.get("analyses", {}), depth + 1)
487
+
488
+
489
+ @main.group()
490
+ def universe() -> None:
491
+ """Universe management commands."""
492
+ pass
493
+
494
+
495
+ @universe.command("generate")
496
+ @click.option("--name", "-n", default="baseline", help="Universe name/ID")
497
+ @click.option(
498
+ "--analysis",
499
+ "-a",
500
+ type=click.Path(exists=True, path_type=Path),
501
+ help="Analysis file",
502
+ )
503
+ @click.option(
504
+ "--output",
505
+ "-o",
506
+ type=click.Path(path_type=Path),
507
+ help="Output file (default: universes/<name>.yaml)",
508
+ )
509
+ @click.option("--description", "-d", help="Universe description")
510
+ def generate_universe(
511
+ name: str,
512
+ analysis: Path | None,
513
+ output: Path | None,
514
+ description: str | None,
515
+ ) -> None:
516
+ """Generate a universe from analysis defaults."""
517
+ analysis_path = _require_analysis(analysis)
518
+ data = load_yaml(analysis_path)
519
+
520
+ # Check all decisions have defaults (across entire tree)
521
+ missing_defaults: list[str] = []
522
+ _check_missing_defaults(data, missing_defaults, "")
523
+ if missing_defaults:
524
+ console.print("[red]Error:[/red] Some decisions don't have defaults:")
525
+ for d_id in missing_defaults:
526
+ console.print(f" • {d_id}")
527
+ raise SystemExit(1)
528
+
529
+ uni = create_universe_from_defaults(data, name, description)
530
+
531
+ if output is None:
532
+ output = analysis_path.parent / "universes" / f"{name}.yaml"
533
+
534
+ output.parent.mkdir(parents=True, exist_ok=True)
535
+ save_yaml(uni, output)
536
+
537
+ console.print(f"[green]✓[/green] Generated universe at [cyan]{output}[/cyan]")
538
+ console.print("\nDecisions:")
539
+ _print_universe_decisions(uni)
540
+
541
+
542
+ def _check_missing_defaults(node: dict[str, Any], missing: list[str], prefix: str = "") -> None:
543
+ """Recursively check for decisions without defaults."""
544
+ for d_id, d in _collect_node_decisions(node).items():
545
+ if d.get("default") is None:
546
+ missing.append(f"{prefix}{d_id}")
547
+ for analysis_id, sub_node in (node.get("analyses") or {}).items():
548
+ _check_missing_defaults(sub_node, missing, f"{prefix}{analysis_id}.")
549
+
550
+
551
+ def _print_universe_decisions(uni: dict[str, Any], indent: str = " ") -> None:
552
+ """Recursively print universe decisions."""
553
+ for d_id, opt_id in (uni.get("decisions") or {}).items():
554
+ console.print(f"{indent}{d_id}: {opt_id}")
555
+ for analysis_id, sub in (uni.get("analyses") or {}).items():
556
+ console.print(f"{indent}[magenta]{analysis_id}:[/magenta]")
557
+ _print_universe_decisions(sub, indent + " ")
558
+
559
+
560
+ @universe.command("check")
561
+ @click.argument("universe_file", type=click.Path(exists=True, path_type=Path))
562
+ @click.option(
563
+ "--analysis",
564
+ "-a",
565
+ type=click.Path(exists=True, path_type=Path),
566
+ help="Analysis file",
567
+ )
568
+ def check_universe(universe_file: Path, analysis: Path | None) -> None:
569
+ """Check a universe against its analysis constraints."""
570
+ analysis_path = _require_analysis(analysis, universe_file.parent)
571
+ errors = validate_universe_file(universe_file, analysis_path)
572
+
573
+ if errors:
574
+ console.print("[red]Universe validation errors:[/red]")
575
+ for error in errors:
576
+ console.print(f" • {error}")
577
+ raise SystemExit(1)
578
+
579
+ console.print("[green]✓[/green] Universe is valid")
580
+
581
+
582
+ @main.command()
583
+ @click.option(
584
+ "--file",
585
+ "-f",
586
+ type=click.Path(exists=True, path_type=Path),
587
+ help="Analysis file",
588
+ )
589
+ @click.option(
590
+ "--format",
591
+ "fmt",
592
+ type=click.Choice(["mermaid", "ascii"]),
593
+ default="ascii",
594
+ help="Output format",
595
+ )
596
+ def viz(file: Path | None, fmt: str) -> None:
597
+ """Visualize the decision space."""
598
+ file = _require_analysis(file)
599
+ data = load_yaml(file)
600
+
601
+ if fmt == "mermaid":
602
+ _viz_mermaid(data)
603
+ else:
604
+ _viz_ascii(data)
605
+
606
+
607
+ def _viz_ascii(data: dict[str, Any]) -> None:
608
+ """Visualize decisions as ASCII tree."""
609
+ tree = Tree(f"[bold]{data.get('name', 'Unknown')}[/bold]")
610
+ _viz_ascii_node(tree, data)
611
+ console.print(tree)
612
+
613
+
614
+ def _viz_ascii_node(parent_tree: Tree, node: dict[str, Any]) -> None:
615
+ """Recursively add decisions to an ASCII tree."""
616
+ decisions = _collect_node_decisions(node)
617
+ for decision_id, decision in decisions.items():
618
+ tags = decision.get("tags") or []
619
+ tag_str = f" [{', '.join(tags)}]" if tags else ""
620
+ branch = parent_tree.add(f"[cyan]{decision_id}[/cyan]{tag_str}")
621
+
622
+ options = decision.get("options", {})
623
+ default = decision.get("default")
624
+ for option_id, option in options.items():
625
+ default_marker = " [default]" if option_id == default else ""
626
+ constraints = []
627
+ if option.get("incompatible_with"):
628
+ constraints.append(f"\u2717 {', '.join(option['incompatible_with'])}")
629
+ if option.get("requires"):
630
+ constraints.append(f"\u2192 {', '.join(option['requires'])}")
631
+
632
+ option_text = f"{option_id}: {option.get('label', '')}{default_marker}"
633
+ if constraints:
634
+ option_text += f" [dim]({'; '.join(constraints)})[/dim]"
635
+ branch.add(option_text)
636
+
637
+ for analysis_id, sub_node in (node.get("analyses") or {}).items():
638
+ sub_tree = parent_tree.add(f"[bold magenta]{analysis_id}[/bold magenta]")
639
+ _viz_ascii_node(sub_tree, sub_node)
640
+
641
+
642
+ def _viz_mermaid(data: dict[str, Any]) -> None:
643
+ """Generate Mermaid diagram for decisions."""
644
+ lines = ["graph TD"]
645
+
646
+ _viz_mermaid_node(lines, data, "root")
647
+
648
+ lines.append("")
649
+ lines.append(" classDef default fill:#90EE90")
650
+
651
+ console.print("\n".join(lines))
652
+
653
+
654
+ def _viz_mermaid_node(lines: list[str], node: dict[str, Any], node_prefix: str) -> None:
655
+ """Recursively generate Mermaid subgraphs for an analysis node."""
656
+ decisions = _collect_node_decisions(node)
657
+ sub_analyses = node.get("analyses") or {}
658
+
659
+ # If this node has decisions or sub-analyses, wrap in subgraph
660
+ has_content = decisions or sub_analyses
661
+ if has_content and node_prefix != "root":
662
+ lines.append(f" subgraph {node_prefix}[{node_prefix}]")
663
+
664
+ for decision_id, decision in decisions.items():
665
+ qualified = f"{node_prefix}__{decision_id}"
666
+ lines.append(f" {qualified}[{decision.get('label', decision_id)}]")
667
+
668
+ options = decision.get("options", {})
669
+ default = decision.get("default")
670
+ for option_id, option in options.items():
671
+ node_id = f"{qualified}_{option_id}"
672
+ style = ":::default" if option_id == default else ""
673
+ lines.append(f" {node_id}(({option.get('label', option_id)})){style}")
674
+ lines.append(f" {qualified} --> {node_id}")
675
+
676
+ if option.get("incompatible_with"):
677
+ for ref in option["incompatible_with"]:
678
+ target = f"{node_prefix}__{ref.replace('.', '_')}"
679
+ lines.append(f" {node_id} -.->|incompatible| {target}")
680
+
681
+ if option.get("requires"):
682
+ for ref in option["requires"]:
683
+ target = f"{node_prefix}__{ref.replace('.', '_')}"
684
+ lines.append(f" {node_id} -->|requires| {target}")
685
+
686
+ for analysis_id, sub_node in sub_analyses.items():
687
+ _viz_mermaid_node(lines, sub_node, f"{node_prefix}__{analysis_id}")
688
+
689
+ if has_content and node_prefix != "root":
690
+ lines.append(" end")
691
+
692
+
693
+ @main.group()
694
+ def schema() -> None:
695
+ """Schema commands."""
696
+ pass
697
+
698
+
699
+ @schema.command("export")
700
+ @click.option(
701
+ "--output",
702
+ "-o",
703
+ type=click.Path(path_type=Path),
704
+ default="schemas",
705
+ help="Output directory (default: schemas/)",
706
+ )
707
+ def schema_export(output: Path) -> None:
708
+ """Export LinkML schemas to files."""
709
+ import shutil
710
+
711
+ from astra.datamodel import SCHEMA_DIRECTORY
712
+
713
+ output.mkdir(parents=True, exist_ok=True)
714
+ exported = []
715
+ for schema_file in sorted(SCHEMA_DIRECTORY.glob("*.yaml")):
716
+ dest = output / schema_file.name
717
+ shutil.copy2(schema_file, dest)
718
+ exported.append(schema_file.name)
719
+
720
+ console.print(f"[green]✓[/green] Exported schemas to [cyan]{output}/[/cyan]")
721
+ for name in exported:
722
+ console.print(f" • {output}/{name}")
723
+
724
+
725
+ @schema.command("show")
726
+ @click.argument("schema_type", type=click.Choice(["analysis", "universe", "insights"]))
727
+ def schema_show(schema_type: str) -> None:
728
+ """Print a LinkML schema to stdout."""
729
+ from astra.datamodel import SCHEMA_DIRECTORY
730
+
731
+ name_map = {
732
+ "analysis": "analysis.yaml",
733
+ "universe": "universe.yaml",
734
+ "insights": "insight.yaml",
735
+ }
736
+ schema_path = SCHEMA_DIRECTORY / name_map[schema_type]
737
+ console.print(schema_path.read_text())
738
+
739
+
740
+ # =============================================================================
741
+ # Paper commands
742
+ # =============================================================================
743
+
744
+
745
+ @main.group()
746
+ def paper() -> None:
747
+ """Paper management commands for evidence verification."""
748
+ pass
749
+
750
+
751
+ @paper.command("add")
752
+ @click.argument("doi")
753
+ @click.option("--version", "-v", type=int, help="Paper version (for arXiv papers)")
754
+ @click.option(
755
+ "--pdf",
756
+ type=click.Path(exists=True, path_type=Path),
757
+ help="Use local PDF instead of downloading",
758
+ )
759
+ def paper_add(doi: str, version: int | None, pdf: Path | None) -> None:
760
+ """Add a paper to the cache by DOI.
761
+
762
+ DOI can be any valid DOI. For arXiv papers, use the format:
763
+ 10.48550/arXiv.1706.03762
764
+
765
+ Examples:
766
+ astra paper add 10.48550/arXiv.1706.03762 --version 7
767
+ astra paper add 10.1038/s41586-023-06221-2
768
+ astra paper add 10.1234/example --pdf ./local_paper.pdf
769
+ """
770
+ from astra.papers.cache import PaperCache
771
+ from astra.papers.download import download_paper
772
+
773
+ cache = PaperCache()
774
+
775
+ # Check if already cached
776
+ if cache.has(doi, version):
777
+ paper = cache.get(doi, version)
778
+ if paper:
779
+ console.print(f"[yellow]Paper already cached:[/yellow] {doi}")
780
+ console.print(f" Path: {paper.pdf_path}")
781
+ if paper.metadata.title:
782
+ console.print(f" Title: {paper.metadata.title}")
783
+ return
784
+
785
+ # Add from local file or download
786
+ if pdf:
787
+ console.print(f"Adding paper from local file: [cyan]{pdf}[/cyan]")
788
+ paper = cache.add_from_file(doi, pdf, version=version)
789
+ console.print("[green]✓[/green] Paper added to cache")
790
+ console.print(f" DOI: {doi}")
791
+ if version:
792
+ console.print(f" Version: {version}")
793
+ console.print(f" Path: {paper.pdf_path}")
794
+ console.print(f" SHA-256: {paper.metadata.sha256[:16]}...")
795
+ else:
796
+ console.print(f"Downloading paper: [cyan]{doi}[/cyan]")
797
+ if version:
798
+ console.print(f" Version: {version}")
799
+
800
+ result = download_paper(doi, version)
801
+
802
+ if not result.success:
803
+ console.print(f"[red]Error:[/red] {result.error}")
804
+ raise SystemExit(1)
805
+
806
+ if result.content is None:
807
+ console.print("[red]Error:[/red] No content received")
808
+ raise SystemExit(1)
809
+
810
+ paper = cache.add(
811
+ doi=doi,
812
+ pdf_content=result.content,
813
+ version=version,
814
+ title=result.title,
815
+ authors=result.authors,
816
+ source_url=result.url,
817
+ )
818
+
819
+ console.print("[green]✓[/green] Paper downloaded and cached")
820
+ console.print(f" DOI: {doi}")
821
+ if version:
822
+ console.print(f" Version: {version}")
823
+ if paper.metadata.title:
824
+ console.print(f" Title: {paper.metadata.title}")
825
+ console.print(f" Path: {paper.pdf_path}")
826
+ console.print(f" SHA-256: {paper.metadata.sha256[:16]}...")
827
+
828
+
829
+ @paper.command("list")
830
+ def paper_list() -> None:
831
+ """List all cached papers."""
832
+ from astra.papers.cache import PaperCache
833
+
834
+ cache = PaperCache()
835
+ papers = cache.list_papers()
836
+
837
+ if not papers:
838
+ console.print("[dim]No papers cached[/dim]")
839
+ return
840
+
841
+ table = Table(show_header=True, expand=True)
842
+ table.add_column("DOI", no_wrap=True)
843
+ table.add_column("Ver", no_wrap=True)
844
+ table.add_column("Title", ratio=2)
845
+ table.add_column("Retrieved", no_wrap=True)
846
+
847
+ for paper in papers:
848
+ meta = paper.metadata
849
+ version_str = str(meta.version) if meta.version else "-"
850
+ title = meta.title or "[dim](unknown)[/dim]"
851
+ retrieved = meta.retrieved_at[:10] if meta.retrieved_at else "-"
852
+ table.add_row(meta.doi, version_str, title, retrieved)
853
+
854
+ console.print(table)
855
+ console.print(f"\n[dim]{len(papers)} paper(s) cached[/dim]")
856
+
857
+
858
+ @paper.command("show")
859
+ @click.argument("doi")
860
+ @click.option("--version", "-v", type=int, help="Paper version (for arXiv papers)")
861
+ def paper_show(doi: str, version: int | None) -> None:
862
+ """Show details of a cached paper."""
863
+ from astra.papers.cache import PaperCache
864
+
865
+ cache = PaperCache()
866
+ paper = cache.get(doi, version)
867
+
868
+ if not paper:
869
+ console.print(f"[red]Error:[/red] Paper not found in cache: {doi}")
870
+ if version:
871
+ console.print(f" (version {version})")
872
+ console.print("\nUse [cyan]astra paper add[/cyan] to download the paper first.")
873
+ raise SystemExit(1)
874
+
875
+ meta = paper.metadata
876
+ console.print(f"\n[bold]DOI:[/bold] {meta.doi}")
877
+ if meta.version:
878
+ console.print(f"[bold]Version:[/bold] {meta.version}")
879
+ if meta.title:
880
+ console.print(f"[bold]Title:[/bold] {meta.title}")
881
+ if meta.authors:
882
+ console.print(f"[bold]Authors:[/bold] {', '.join(meta.authors)}")
883
+ console.print(f"[bold]SHA-256:[/bold] {meta.sha256}")
884
+ console.print(f"[bold]Retrieved:[/bold] {meta.retrieved_at}")
885
+ if meta.source_url:
886
+ console.print(f"[bold]Source:[/bold] {meta.source_url}")
887
+ console.print(f"[bold]Path:[/bold] {paper.pdf_path}")
888
+
889
+
890
+ @paper.command("path")
891
+ @click.argument("doi")
892
+ @click.option("--version", "-v", type=int, help="Paper version (for arXiv papers)")
893
+ def paper_path(doi: str, version: int | None) -> None:
894
+ """Print the path to a cached paper's PDF.
895
+
896
+ Useful for piping to other tools or agents that need to read the PDF.
897
+ """
898
+ from astra.papers.cache import PaperCache
899
+
900
+ cache = PaperCache()
901
+ path = cache.get_path(doi, version)
902
+
903
+ if not path:
904
+ console.print(f"[red]Error:[/red] Paper not found: {doi}")
905
+ raise SystemExit(1)
906
+
907
+ # Print just the path (no formatting) for easy piping
908
+ print(path)
909
+
910
+
911
+ @paper.command("remove")
912
+ @click.argument("doi")
913
+ @click.option("--version", "-v", type=int, help="Paper version (for arXiv papers)")
914
+ def paper_remove(doi: str, version: int | None) -> None:
915
+ """Remove a paper from the cache."""
916
+ from astra.papers.cache import PaperCache
917
+
918
+ cache = PaperCache()
919
+
920
+ if not cache.has(doi, version):
921
+ console.print(f"[red]Error:[/red] Paper not found: {doi}")
922
+ raise SystemExit(1)
923
+
924
+ cache.remove(doi, version)
925
+ console.print("[green]✓[/green] Paper removed from cache")
926
+
927
+
928
+ @paper.command("fetch-metadata")
929
+ @click.argument("doi", required=False)
930
+ @click.option("--version", "-v", type=int, help="Paper version (for arXiv papers)")
931
+ @click.option("--all", "fetch_all", is_flag=True, help="Fetch metadata for all cached papers")
932
+ def paper_fetch_metadata(doi: str | None, version: int | None, fetch_all: bool) -> None:
933
+ """Fetch metadata (title, authors) for cached papers.
934
+
935
+ Uses DOI content negotiation to retrieve metadata from DOI.org.
936
+
937
+ Examples:
938
+
939
+ astra paper fetch-metadata 10.48550/arXiv.1706.03762
940
+
941
+ astra paper fetch-metadata --all
942
+ """
943
+ from astra.papers.cache import PaperCache
944
+ from astra.papers.download import fetch_doi_metadata
945
+
946
+ cache = PaperCache()
947
+
948
+ if fetch_all:
949
+ papers = cache.list_papers()
950
+ if not papers:
951
+ console.print("[dim]No papers cached[/dim]")
952
+ return
953
+
954
+ updated = 0
955
+ for paper in papers:
956
+ meta = paper.metadata
957
+ if meta.title and meta.authors:
958
+ continue
959
+
960
+ console.print(f"Fetching metadata for {meta.doi}...", end=" ")
961
+ doi_meta = fetch_doi_metadata(meta.doi)
962
+
963
+ if doi_meta.title or doi_meta.authors:
964
+ cache.update_metadata(
965
+ meta.doi,
966
+ meta.version,
967
+ title=doi_meta.title,
968
+ authors=doi_meta.authors,
969
+ )
970
+ console.print(f"[green]✓[/green] {doi_meta.title or '(no title)'}")
971
+ updated += 1
972
+ else:
973
+ console.print("[yellow]⚠[/yellow] No metadata found")
974
+
975
+ console.print(f"\n[dim]Updated {updated} paper(s)[/dim]")
976
+ return
977
+
978
+ if not doi:
979
+ console.print("[red]Error:[/red] Provide a DOI or use --all")
980
+ raise SystemExit(1)
981
+
982
+ if not cache.has(doi, version):
983
+ console.print(f"[red]Error:[/red] Paper not found in cache: {doi}")
984
+ raise SystemExit(1)
985
+
986
+ console.print(f"Fetching metadata for {doi}...")
987
+ doi_meta = fetch_doi_metadata(doi)
988
+
989
+ if not doi_meta.title and not doi_meta.authors:
990
+ console.print("[yellow]⚠[/yellow] No metadata found for this DOI")
991
+ raise SystemExit(1)
992
+
993
+ cache.update_metadata(doi, version, title=doi_meta.title, authors=doi_meta.authors)
994
+
995
+ console.print("[green]✓[/green] Metadata updated:")
996
+ if doi_meta.title:
997
+ console.print(f" Title: {doi_meta.title}")
998
+ if doi_meta.authors:
999
+ console.print(f" Authors: {', '.join(doi_meta.authors)}")
1000
+
1001
+
1002
+ @paper.command("verify-quotes")
1003
+ @click.argument("doi")
1004
+ @click.option("--version", "-v", type=int, help="Paper version (for arXiv papers)")
1005
+ def paper_verify_quotes(doi: str, version: int | None) -> None:
1006
+ """Verify multiple quotes from a cached paper in a single operation.
1007
+
1008
+ Reads quote list from stdin as JSON. Extracts PDF text once and
1009
+ verifies all quotes against it.
1010
+
1011
+ Input format (stdin):
1012
+ {"quotes": [{"text": "...", "page": N, "prefix": "...", "suffix": "..."}, ...]}
1013
+
1014
+ Output format (stdout, JSON):
1015
+ {"doi": "...", "results": [...], "summary": {...}}
1016
+
1017
+ Exit codes:
1018
+ 0 - All quotes verified
1019
+ 1 - Some quotes not found
1020
+ 2 - Error (paper not cached, invalid input, etc.)
1021
+ """
1022
+ from astra.papers.cache import PaperCache
1023
+ from astra.verification.core import VerificationStatus, verify_quote_in_pdf
1024
+ from astra.verification.pdf import extract_text_from_pdf
1025
+
1026
+ # Read JSON input from stdin
1027
+ try:
1028
+ input_data = sys.stdin.read()
1029
+ if not input_data.strip():
1030
+ print(
1031
+ json.dumps(
1032
+ {
1033
+ "doi": doi,
1034
+ "version": version,
1035
+ "results": [],
1036
+ "summary": {"total": 0, "verified": 0, "not_found": 0, "errors": 1},
1037
+ "error": "No input provided on stdin",
1038
+ }
1039
+ )
1040
+ )
1041
+ raise SystemExit(2)
1042
+
1043
+ data = json.loads(input_data)
1044
+ quotes = data.get("quotes", [])
1045
+ except json.JSONDecodeError as e:
1046
+ print(
1047
+ json.dumps(
1048
+ {
1049
+ "doi": doi,
1050
+ "version": version,
1051
+ "results": [],
1052
+ "summary": {"total": 0, "verified": 0, "not_found": 0, "errors": 1},
1053
+ "error": f"Invalid JSON input: {e}",
1054
+ }
1055
+ )
1056
+ )
1057
+ raise SystemExit(2)
1058
+
1059
+ # Get paper from cache
1060
+ cache = PaperCache()
1061
+ cached_paper = cache.get(doi, version)
1062
+
1063
+ if not cached_paper:
1064
+ print(
1065
+ json.dumps(
1066
+ {
1067
+ "doi": doi,
1068
+ "version": version,
1069
+ "results": [],
1070
+ "summary": {"total": len(quotes), "verified": 0, "not_found": 0, "errors": 1},
1071
+ "error": f"Paper not in cache: {doi}",
1072
+ }
1073
+ )
1074
+ )
1075
+ raise SystemExit(2)
1076
+
1077
+ # Extract text from PDF (ONCE)
1078
+ try:
1079
+ pdf = extract_text_from_pdf(cached_paper.pdf_path)
1080
+ except Exception as e:
1081
+ print(
1082
+ json.dumps(
1083
+ {
1084
+ "doi": doi,
1085
+ "version": version,
1086
+ "results": [],
1087
+ "summary": {"total": len(quotes), "verified": 0, "not_found": 0, "errors": 1},
1088
+ "error": f"Failed to extract text from PDF: {e}",
1089
+ }
1090
+ )
1091
+ )
1092
+ raise SystemExit(2)
1093
+
1094
+ # Verify each quote
1095
+ results = []
1096
+ verified_count = 0
1097
+ not_found_count = 0
1098
+
1099
+ for idx, quote_data in enumerate(quotes):
1100
+ quote_text = quote_data.get("text", "")
1101
+ page_hint = quote_data.get("page")
1102
+ prefix = quote_data.get("prefix")
1103
+ suffix = quote_data.get("suffix")
1104
+
1105
+ if not quote_text:
1106
+ results.append(
1107
+ {
1108
+ "index": idx,
1109
+ "text": "",
1110
+ "status": "error",
1111
+ "found_pages": [],
1112
+ "message": "Empty quote text",
1113
+ }
1114
+ )
1115
+ continue
1116
+
1117
+ status, found_pages, message = verify_quote_in_pdf(
1118
+ quote_text, pdf, page_hint, prefix, suffix
1119
+ )
1120
+
1121
+ display_text = quote_text[:50] + "..." if len(quote_text) > 50 else quote_text
1122
+
1123
+ results.append(
1124
+ {
1125
+ "index": idx,
1126
+ "text": display_text,
1127
+ "status": status.value,
1128
+ "found_pages": found_pages,
1129
+ "message": message,
1130
+ }
1131
+ )
1132
+
1133
+ if status == VerificationStatus.VERIFIED:
1134
+ verified_count += 1
1135
+ else:
1136
+ not_found_count += 1
1137
+
1138
+ # Output results
1139
+ output = {
1140
+ "doi": doi,
1141
+ "version": version,
1142
+ "results": results,
1143
+ "summary": {
1144
+ "total": len(quotes),
1145
+ "verified": verified_count,
1146
+ "not_found": not_found_count,
1147
+ "errors": 0,
1148
+ },
1149
+ }
1150
+ print(json.dumps(output))
1151
+
1152
+ if not_found_count > 0:
1153
+ raise SystemExit(1)
1154
+ raise SystemExit(0)
1155
+
1156
+
1157
+ @paper.command("verify-quote")
1158
+ @click.argument("doi")
1159
+ @click.option("--quote", "-q", required=True, help="Exact quote text to verify")
1160
+ @click.option("--version", "-v", type=int, help="Paper version (for arXiv papers)")
1161
+ @click.option("--page", "-p", type=int, help="Expected page number (1-indexed)")
1162
+ @click.option("--json", "output_json", is_flag=True, help="Output as JSON")
1163
+ def paper_verify_quote(
1164
+ doi: str, quote: str, version: int | None, page: int | None, output_json: bool
1165
+ ) -> None:
1166
+ """Verify a quote exists in a cached paper.
1167
+
1168
+ Searches for the exact quote in the paper's text. Uses fuzzy matching
1169
+ to handle minor OCR/extraction differences.
1170
+
1171
+ Exit codes:
1172
+ 0 - Quote verified (found in paper)
1173
+ 1 - Quote not found
1174
+ 2 - Error (paper not cached, etc.)
1175
+ """
1176
+ from astra.papers.cache import PaperCache
1177
+ from astra.verification.core import VerificationStatus, verify_quote_in_pdf
1178
+ from astra.verification.pdf import extract_text_from_pdf
1179
+
1180
+ cache = PaperCache()
1181
+ cached_paper = cache.get(doi, version)
1182
+
1183
+ if not cached_paper:
1184
+ if output_json:
1185
+ print(
1186
+ json.dumps(
1187
+ {
1188
+ "status": "error",
1189
+ "message": f"Paper not in cache: {doi}",
1190
+ "found_pages": [],
1191
+ "expected_page": page,
1192
+ }
1193
+ )
1194
+ )
1195
+ else:
1196
+ console.print(f"[red]Error:[/red] Paper not in cache: {doi}")
1197
+ console.print("Use [cyan]astra paper add[/cyan] first.")
1198
+ raise SystemExit(2)
1199
+
1200
+ try:
1201
+ pdf = extract_text_from_pdf(cached_paper.pdf_path)
1202
+ except Exception as e:
1203
+ if output_json:
1204
+ print(
1205
+ json.dumps(
1206
+ {
1207
+ "status": "error",
1208
+ "message": f"Failed to extract text from PDF: {e}",
1209
+ "found_pages": [],
1210
+ "expected_page": page,
1211
+ }
1212
+ )
1213
+ )
1214
+ else:
1215
+ console.print(f"[red]Error:[/red] Failed to extract text from PDF: {e}")
1216
+ raise SystemExit(2)
1217
+
1218
+ status, found_pages, message = verify_quote_in_pdf(quote, pdf, page)
1219
+
1220
+ if output_json:
1221
+ print(
1222
+ json.dumps(
1223
+ {
1224
+ "status": status.value,
1225
+ "found_pages": found_pages,
1226
+ "expected_page": page,
1227
+ "message": message,
1228
+ }
1229
+ )
1230
+ )
1231
+ else:
1232
+ if status == VerificationStatus.VERIFIED:
1233
+ console.print(f"[green]✓ Verified[/green] {message}")
1234
+ else:
1235
+ console.print(f"[red]✗ Not found[/red] {message}")
1236
+
1237
+ raise SystemExit(0 if status == VerificationStatus.VERIFIED else 1)
1238
+
1239
+
1240
+ if __name__ == "__main__":
1241
+ main()