astra-tools 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- astra/cli.py +1241 -0
- astra/helpers.py +584 -0
- astra/papers/__init__.py +15 -0
- astra/papers/cache.py +346 -0
- astra/papers/download.py +404 -0
- astra/validation/__init__.py +31 -0
- astra/validation/schema.py +135 -0
- astra/validation/semantic.py +992 -0
- astra/verification/__init__.py +39 -0
- astra/verification/cache.py +234 -0
- astra/verification/core.py +427 -0
- astra/verification/pdf.py +389 -0
- astra_tools-0.2.2.dist-info/METADATA +22 -0
- astra_tools-0.2.2.dist-info/RECORD +17 -0
- astra_tools-0.2.2.dist-info/WHEEL +4 -0
- astra_tools-0.2.2.dist-info/entry_points.txt +2 -0
- astra_tools-0.2.2.dist-info/licenses/LICENSE +29 -0
astra/cli.py
ADDED
|
@@ -0,0 +1,1241 @@
|
|
|
1
|
+
"""Command-line interface for ASTRA."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import subprocess
|
|
7
|
+
import sys
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
import click
|
|
12
|
+
from rich.console import Console
|
|
13
|
+
from rich.table import Table
|
|
14
|
+
from rich.tree import Tree
|
|
15
|
+
|
|
16
|
+
from astra.helpers import (
|
|
17
|
+
_collect_node_decisions,
|
|
18
|
+
create_universe_from_defaults,
|
|
19
|
+
get_analysis_decisions,
|
|
20
|
+
get_decisions,
|
|
21
|
+
get_inputs,
|
|
22
|
+
get_outputs,
|
|
23
|
+
load_yaml,
|
|
24
|
+
save_yaml,
|
|
25
|
+
)
|
|
26
|
+
from astra.validation.schema import (
|
|
27
|
+
validate_analysis_schema,
|
|
28
|
+
validate_universe_schema,
|
|
29
|
+
)
|
|
30
|
+
from astra.validation.semantic import validate_analysis_file, validate_universe_file
|
|
31
|
+
|
|
32
|
+
console = Console()
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def find_analysis_file(start_path: Path | None = None) -> Path | None:
|
|
36
|
+
"""Find the astra.yaml file in the current or parent directories."""
|
|
37
|
+
if start_path is None:
|
|
38
|
+
start_path = Path.cwd()
|
|
39
|
+
|
|
40
|
+
# Resolve to absolute path to ensure parent traversal works correctly
|
|
41
|
+
current = start_path.resolve()
|
|
42
|
+
while current != current.parent:
|
|
43
|
+
astra_file = current / "astra.yaml"
|
|
44
|
+
if astra_file.exists():
|
|
45
|
+
return astra_file
|
|
46
|
+
current = current.parent
|
|
47
|
+
|
|
48
|
+
return None
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _require_analysis(analysis: Path | None, start_path: Path | None = None) -> Path:
|
|
52
|
+
"""Find or validate analysis file, exit with error if not found."""
|
|
53
|
+
if analysis is not None:
|
|
54
|
+
return analysis
|
|
55
|
+
found = find_analysis_file(start_path)
|
|
56
|
+
if found is None:
|
|
57
|
+
console.print("[red]Error:[/red] No astra.yaml found.")
|
|
58
|
+
raise SystemExit(1)
|
|
59
|
+
return found
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@click.group()
|
|
63
|
+
@click.version_option(package_name="astra-tools")
|
|
64
|
+
def main() -> None:
|
|
65
|
+
"""ASTRA - Agentic Schema for Transparent Research Analysis CLI."""
|
|
66
|
+
pass
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@main.command()
|
|
70
|
+
@click.argument("directory", type=click.Path(path_type=Path), default=".")
|
|
71
|
+
@click.option("--no-git", is_flag=True, help="Don't initialize git repository")
|
|
72
|
+
def init(directory: Path, no_git: bool) -> None:
|
|
73
|
+
"""Create a minimal ASTRA analysis scaffold.
|
|
74
|
+
|
|
75
|
+
Creates astra.yaml, universes/baseline.yaml, and .gitignore.
|
|
76
|
+
|
|
77
|
+
DIRECTORY is the project folder to create (default: current directory).
|
|
78
|
+
|
|
79
|
+
For full agentic scaffolding (Claude Code config, venv, HPC),
|
|
80
|
+
use 'prism init' instead.
|
|
81
|
+
|
|
82
|
+
Examples:
|
|
83
|
+
astra init my-analysis
|
|
84
|
+
astra init my-analysis --no-git
|
|
85
|
+
"""
|
|
86
|
+
# Check if this is already an ASTRA project
|
|
87
|
+
if (directory / "astra.yaml").exists():
|
|
88
|
+
console.print(
|
|
89
|
+
f"[red]Error:[/red] [cyan]{directory}[/cyan] is already an ASTRA project "
|
|
90
|
+
f"(astra.yaml exists)."
|
|
91
|
+
)
|
|
92
|
+
console.print(
|
|
93
|
+
"Use [cyan]astra validate[/cyan] to check it, or delete astra.yaml to re-init."
|
|
94
|
+
)
|
|
95
|
+
raise SystemExit(1)
|
|
96
|
+
|
|
97
|
+
# Create project directory
|
|
98
|
+
if directory != Path("."):
|
|
99
|
+
if directory.exists() and any(directory.iterdir()):
|
|
100
|
+
console.print(
|
|
101
|
+
f"[red]Error:[/red] [cyan]{directory}[/cyan] already exists and is not empty. "
|
|
102
|
+
"Please specify an empty or non-existing directory."
|
|
103
|
+
)
|
|
104
|
+
raise SystemExit(1)
|
|
105
|
+
directory.mkdir(parents=True, exist_ok=True)
|
|
106
|
+
|
|
107
|
+
# Create directory structure
|
|
108
|
+
(directory / "universes").mkdir(parents=True, exist_ok=True)
|
|
109
|
+
(directory / "outputs").mkdir(parents=True, exist_ok=True)
|
|
110
|
+
(directory / "src").mkdir(parents=True, exist_ok=True)
|
|
111
|
+
|
|
112
|
+
# Create .gitignore
|
|
113
|
+
gitignore = """# ASTRA Analysis
|
|
114
|
+
outputs/
|
|
115
|
+
__pycache__/
|
|
116
|
+
*.py[cod]
|
|
117
|
+
.venv/
|
|
118
|
+
.ipynb_checkpoints/
|
|
119
|
+
.DS_Store
|
|
120
|
+
"""
|
|
121
|
+
(directory / ".gitignore").write_text(gitignore)
|
|
122
|
+
|
|
123
|
+
# Create boilerplate astra.yaml
|
|
124
|
+
_create_boilerplate_astra_yaml(directory)
|
|
125
|
+
|
|
126
|
+
# Initialize git repository
|
|
127
|
+
_init_git_repo(directory, no_git)
|
|
128
|
+
|
|
129
|
+
# Print success message
|
|
130
|
+
console.print(f"[green]✓[/green] Created ASTRA analysis scaffold: [cyan]{directory}[/cyan]")
|
|
131
|
+
console.print("\nFor full agentic scaffolding, use [cyan]prism init[/cyan] instead.")
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _create_boilerplate_astra_yaml(directory: Path) -> None:
|
|
135
|
+
"""Create boilerplate astra.yaml with TODOs."""
|
|
136
|
+
name = directory.name if directory != Path(".") else "My Analysis"
|
|
137
|
+
|
|
138
|
+
astra_yaml = f"""# ASTRA Analysis Specification
|
|
139
|
+
|
|
140
|
+
version: "1.0"
|
|
141
|
+
name: "{name}"
|
|
142
|
+
description: |
|
|
143
|
+
TODO: Describe the goal of this analysis.
|
|
144
|
+
|
|
145
|
+
inputs:
|
|
146
|
+
- id: primary_data
|
|
147
|
+
type: data
|
|
148
|
+
description: "TODO: Describe your primary data source"
|
|
149
|
+
|
|
150
|
+
outputs:
|
|
151
|
+
- id: main_result
|
|
152
|
+
type: metric
|
|
153
|
+
description: "TODO: Describe your primary output metric"
|
|
154
|
+
recipe:
|
|
155
|
+
command: python src/main.py
|
|
156
|
+
|
|
157
|
+
- id: conclusion
|
|
158
|
+
type: report
|
|
159
|
+
description: "Summary of analysis findings"
|
|
160
|
+
recipe:
|
|
161
|
+
command: python src/main.py
|
|
162
|
+
inputs: [main_result]
|
|
163
|
+
|
|
164
|
+
decisions:
|
|
165
|
+
example_method:
|
|
166
|
+
label: "Example Method Choice"
|
|
167
|
+
rationale: "TODO: Explain why this decision matters"
|
|
168
|
+
default: option_a
|
|
169
|
+
options:
|
|
170
|
+
option_a:
|
|
171
|
+
label: "Option A"
|
|
172
|
+
description: "TODO: Describe option A"
|
|
173
|
+
option_b:
|
|
174
|
+
label: "Option B"
|
|
175
|
+
description: "TODO: Describe option B"
|
|
176
|
+
"""
|
|
177
|
+
(directory / "astra.yaml").write_text(astra_yaml)
|
|
178
|
+
|
|
179
|
+
# Create baseline universe
|
|
180
|
+
baseline_universe = """# Baseline Universe
|
|
181
|
+
# Default configuration using standard practices
|
|
182
|
+
|
|
183
|
+
id: baseline
|
|
184
|
+
description: "Default configuration using standard practices"
|
|
185
|
+
|
|
186
|
+
decisions:
|
|
187
|
+
example_method: option_a
|
|
188
|
+
"""
|
|
189
|
+
(directory / "universes" / "baseline.yaml").write_text(baseline_universe)
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def _init_git_repo(directory: Path, no_git: bool) -> None:
|
|
193
|
+
"""Initialize git repository if requested."""
|
|
194
|
+
if no_git or (directory / ".git").exists():
|
|
195
|
+
return
|
|
196
|
+
|
|
197
|
+
try:
|
|
198
|
+
subprocess.run(
|
|
199
|
+
["git", "init"],
|
|
200
|
+
cwd=directory,
|
|
201
|
+
capture_output=True,
|
|
202
|
+
check=True,
|
|
203
|
+
)
|
|
204
|
+
console.print("[green]✓[/green] Initialized git repository")
|
|
205
|
+
# Try to create initial commit
|
|
206
|
+
try:
|
|
207
|
+
subprocess.run(["git", "add", "."], cwd=directory, capture_output=True, check=True)
|
|
208
|
+
subprocess.run(
|
|
209
|
+
["git", "commit", "-m", "Initial ASTRA analysis structure"],
|
|
210
|
+
cwd=directory,
|
|
211
|
+
capture_output=True,
|
|
212
|
+
check=True,
|
|
213
|
+
)
|
|
214
|
+
except subprocess.CalledProcessError:
|
|
215
|
+
pass # Commit failed, but repo is initialized
|
|
216
|
+
except (subprocess.CalledProcessError, FileNotFoundError):
|
|
217
|
+
pass # Git not available
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
@main.command()
|
|
221
|
+
@click.argument("file", type=click.Path(exists=True, path_type=Path))
|
|
222
|
+
@click.option(
|
|
223
|
+
"--analysis",
|
|
224
|
+
"-a",
|
|
225
|
+
type=click.Path(exists=True, path_type=Path),
|
|
226
|
+
help="Analysis file for universe validation",
|
|
227
|
+
)
|
|
228
|
+
@click.option(
|
|
229
|
+
"--verify-evidence",
|
|
230
|
+
"-e",
|
|
231
|
+
is_flag=True,
|
|
232
|
+
help="Verify evidence quotes exist in source papers (requires papers to be cached)",
|
|
233
|
+
)
|
|
234
|
+
@click.option(
|
|
235
|
+
"--skip-evidence",
|
|
236
|
+
is_flag=True,
|
|
237
|
+
help="Skip evidence verification even if prior insights are present",
|
|
238
|
+
)
|
|
239
|
+
def validate(file: Path, analysis: Path | None, verify_evidence: bool, skip_evidence: bool) -> None:
|
|
240
|
+
"""Validate an ASTRA specification file.
|
|
241
|
+
|
|
242
|
+
FILE can be an analysis (astra.yaml) or universe file.
|
|
243
|
+
For universe files, use --analysis to specify the analysis file.
|
|
244
|
+
|
|
245
|
+
Evidence verification (--verify-evidence) checks that quotes in prior insights
|
|
246
|
+
actually exist in the source papers. Papers must be cached first using
|
|
247
|
+
'astra paper add'.
|
|
248
|
+
"""
|
|
249
|
+
# Determine file type
|
|
250
|
+
is_universe = "universe" in file.stem.lower() or file.parent.name == "universes"
|
|
251
|
+
|
|
252
|
+
if is_universe and analysis is None:
|
|
253
|
+
# Try to find analysis file
|
|
254
|
+
analysis = find_analysis_file(file.parent)
|
|
255
|
+
if analysis is None:
|
|
256
|
+
console.print("[red]Error:[/red] Universe validation requires an analysis file.")
|
|
257
|
+
console.print("Use --analysis to specify the analysis file.")
|
|
258
|
+
raise SystemExit(1)
|
|
259
|
+
|
|
260
|
+
console.print(f"Validating [cyan]{file}[/cyan]...")
|
|
261
|
+
|
|
262
|
+
# Schema validation
|
|
263
|
+
if is_universe:
|
|
264
|
+
schema_errors = validate_universe_schema(file)
|
|
265
|
+
else:
|
|
266
|
+
schema_errors = validate_analysis_schema(file)
|
|
267
|
+
|
|
268
|
+
if schema_errors:
|
|
269
|
+
console.print("\n[red]Schema validation errors:[/red]")
|
|
270
|
+
for schema_err in schema_errors:
|
|
271
|
+
console.print(f" • {schema_err}")
|
|
272
|
+
raise SystemExit(1)
|
|
273
|
+
|
|
274
|
+
console.print("[green]✓[/green] Schema validation passed")
|
|
275
|
+
|
|
276
|
+
# Semantic validation
|
|
277
|
+
if is_universe:
|
|
278
|
+
assert analysis is not None
|
|
279
|
+
semantic_errors = validate_universe_file(file, analysis)
|
|
280
|
+
else:
|
|
281
|
+
semantic_errors = validate_analysis_file(file)
|
|
282
|
+
|
|
283
|
+
if semantic_errors:
|
|
284
|
+
console.print("\n[red]Semantic validation errors:[/red]")
|
|
285
|
+
for semantic_err in semantic_errors:
|
|
286
|
+
console.print(f" • {semantic_err}")
|
|
287
|
+
raise SystemExit(1)
|
|
288
|
+
|
|
289
|
+
console.print("[green]✓[/green] Semantic validation passed")
|
|
290
|
+
|
|
291
|
+
# Evidence verification (for analysis files with prior insights)
|
|
292
|
+
if not is_universe and not skip_evidence:
|
|
293
|
+
data = load_yaml(file)
|
|
294
|
+
prior_insights = data.get("prior_insights", {})
|
|
295
|
+
|
|
296
|
+
if prior_insights:
|
|
297
|
+
if not verify_evidence:
|
|
298
|
+
# Show hint about evidence verification
|
|
299
|
+
evidence_count = sum(
|
|
300
|
+
len(insight.get("evidence", [])) for insight in prior_insights.values()
|
|
301
|
+
)
|
|
302
|
+
if evidence_count > 0:
|
|
303
|
+
console.print(
|
|
304
|
+
f"\n[dim]Note: {len(prior_insights)} prior insight(s) with "
|
|
305
|
+
f"{evidence_count} evidence item(s) found.[/dim]"
|
|
306
|
+
)
|
|
307
|
+
console.print(
|
|
308
|
+
"[dim]Run with --verify-evidence to verify quotes exist in papers.[/dim]"
|
|
309
|
+
)
|
|
310
|
+
else:
|
|
311
|
+
console.print("\n[bold]Verifying evidence...[/bold]")
|
|
312
|
+
_verify_insights_evidence(prior_insights)
|
|
313
|
+
|
|
314
|
+
console.print("\n[green]Validation successful![/green]")
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
def _verify_insights_evidence(prior_insights: dict[str, Any]) -> None:
|
|
318
|
+
"""Verify evidence for all prior insights."""
|
|
319
|
+
from astra.papers.cache import PaperCache
|
|
320
|
+
from astra.verification.cache import VerificationCache
|
|
321
|
+
from astra.verification.core import VerificationStatus, verify_all_insights
|
|
322
|
+
|
|
323
|
+
paper_cache = PaperCache()
|
|
324
|
+
verification_cache = VerificationCache()
|
|
325
|
+
|
|
326
|
+
results = verify_all_insights(prior_insights, paper_cache, verification_cache)
|
|
327
|
+
|
|
328
|
+
has_errors = False
|
|
329
|
+
verified_count = 0
|
|
330
|
+
cached_count = 0
|
|
331
|
+
skipped_count = 0
|
|
332
|
+
failed_count = 0
|
|
333
|
+
|
|
334
|
+
for insight_id, result in results.items():
|
|
335
|
+
for ev_result in result.evidence_results:
|
|
336
|
+
status = ev_result.status
|
|
337
|
+
if status in (VerificationStatus.VERIFIED, VerificationStatus.CACHED):
|
|
338
|
+
verified_count += 1
|
|
339
|
+
if status == VerificationStatus.CACHED:
|
|
340
|
+
cached_count += 1
|
|
341
|
+
elif status == VerificationStatus.SKIPPED:
|
|
342
|
+
skipped_count += 1
|
|
343
|
+
else:
|
|
344
|
+
failed_count += 1
|
|
345
|
+
has_errors = True
|
|
346
|
+
if status == VerificationStatus.ERROR:
|
|
347
|
+
icon = "[yellow]![/yellow]"
|
|
348
|
+
else:
|
|
349
|
+
icon = "[red]✗[/red]"
|
|
350
|
+
console.print(
|
|
351
|
+
f" {icon} [{insight_id}] {ev_result.evidence_id}: {ev_result.message}"
|
|
352
|
+
)
|
|
353
|
+
|
|
354
|
+
# Summary
|
|
355
|
+
total = verified_count + skipped_count + failed_count
|
|
356
|
+
if cached_count > 0:
|
|
357
|
+
console.print(
|
|
358
|
+
f"[green]✓[/green] Evidence: {verified_count}/{total} verified "
|
|
359
|
+
f"({cached_count} from cache), {skipped_count} skipped"
|
|
360
|
+
)
|
|
361
|
+
else:
|
|
362
|
+
console.print(
|
|
363
|
+
f"[green]✓[/green] Evidence: {verified_count}/{total} verified, {skipped_count} skipped"
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
if has_errors:
|
|
367
|
+
console.print(f"\n[red]Error:[/red] {failed_count} evidence item(s) failed verification")
|
|
368
|
+
console.print("\nTo fix:")
|
|
369
|
+
console.print(" 1. Check that quotes are exact copies from the paper")
|
|
370
|
+
console.print(" 2. Verify the DOI and version are correct")
|
|
371
|
+
console.print(" 3. Ensure the paper is cached: astra paper add <doi>")
|
|
372
|
+
raise SystemExit(1)
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
@main.command()
|
|
376
|
+
@click.option(
|
|
377
|
+
"--file",
|
|
378
|
+
"-f",
|
|
379
|
+
type=click.Path(exists=True, path_type=Path),
|
|
380
|
+
help="Analysis file (default: astra.yaml in current/parent dir)",
|
|
381
|
+
)
|
|
382
|
+
@click.option("--decisions", "-d", is_flag=True, help="Show decision details")
|
|
383
|
+
@click.option("--inputs", "-i", is_flag=True, help="Show input details")
|
|
384
|
+
@click.option("--outputs", "-o", is_flag=True, help="Show output details")
|
|
385
|
+
def info(
|
|
386
|
+
file: Path | None,
|
|
387
|
+
decisions: bool,
|
|
388
|
+
inputs: bool,
|
|
389
|
+
outputs: bool,
|
|
390
|
+
) -> None:
|
|
391
|
+
"""Show information about an analysis."""
|
|
392
|
+
file = _require_analysis(file)
|
|
393
|
+
data = load_yaml(file)
|
|
394
|
+
|
|
395
|
+
# Header
|
|
396
|
+
console.print(f"\n[bold]{data.get('name', 'Unknown')}[/bold]")
|
|
397
|
+
console.print(f"Version: {data.get('version', 'Unknown')}")
|
|
398
|
+
if data.get("description"):
|
|
399
|
+
console.print(f"\n{data['description']}")
|
|
400
|
+
|
|
401
|
+
# Summary stats
|
|
402
|
+
input_list = get_inputs(data)
|
|
403
|
+
output_list = get_outputs(data)
|
|
404
|
+
decision_dict = get_decisions(data)
|
|
405
|
+
console.print(
|
|
406
|
+
f"\n[dim]Inputs: {len(input_list)} | "
|
|
407
|
+
f"Outputs: {len(output_list)} | "
|
|
408
|
+
f"Decisions: {len(decision_dict)}[/dim]"
|
|
409
|
+
)
|
|
410
|
+
|
|
411
|
+
# Show all by default if no flags
|
|
412
|
+
show_all = not (decisions or inputs or outputs)
|
|
413
|
+
|
|
414
|
+
# Inputs
|
|
415
|
+
if inputs or show_all:
|
|
416
|
+
console.print("\n[bold]Inputs:[/bold]")
|
|
417
|
+
table = Table(show_header=True)
|
|
418
|
+
table.add_column("ID")
|
|
419
|
+
table.add_column("Type")
|
|
420
|
+
table.add_column("Description")
|
|
421
|
+
|
|
422
|
+
for inp in input_list:
|
|
423
|
+
table.add_row(inp.get("id", ""), inp.get("type", ""), inp.get("description", ""))
|
|
424
|
+
console.print(table)
|
|
425
|
+
|
|
426
|
+
# Outputs
|
|
427
|
+
if outputs or show_all:
|
|
428
|
+
console.print("\n[bold]Outputs:[/bold]")
|
|
429
|
+
table = Table(show_header=True)
|
|
430
|
+
table.add_column("ID")
|
|
431
|
+
table.add_column("Type")
|
|
432
|
+
table.add_column("Recipe")
|
|
433
|
+
table.add_column("Description")
|
|
434
|
+
|
|
435
|
+
for out in output_list:
|
|
436
|
+
recipe = out.get("recipe")
|
|
437
|
+
if recipe:
|
|
438
|
+
recipe_str = recipe.get("command", "yes")
|
|
439
|
+
else:
|
|
440
|
+
recipe_str = "[dim]-[/dim]"
|
|
441
|
+
table.add_row(
|
|
442
|
+
out.get("id", ""),
|
|
443
|
+
out.get("type", ""),
|
|
444
|
+
recipe_str,
|
|
445
|
+
out.get("description", ""),
|
|
446
|
+
)
|
|
447
|
+
console.print(table)
|
|
448
|
+
|
|
449
|
+
# Decisions (recursive tree)
|
|
450
|
+
if decisions or show_all:
|
|
451
|
+
console.print("\n[bold]Decisions:[/bold]")
|
|
452
|
+
decision_tree = get_analysis_decisions(data)
|
|
453
|
+
_display_decisions(decision_tree.get("decisions", {}))
|
|
454
|
+
_display_analysis_decisions(decision_tree.get("analyses", {}))
|
|
455
|
+
|
|
456
|
+
|
|
457
|
+
def _display_decisions(decisions: dict[str, Any], indent: str = "") -> None:
|
|
458
|
+
"""Display decisions as Rich trees."""
|
|
459
|
+
for decision_id, decision in decisions.items():
|
|
460
|
+
tree = Tree(f"{indent}[cyan]{decision_id}[/cyan]: {decision.get('label', '')}")
|
|
461
|
+
tags = decision.get("tags") or []
|
|
462
|
+
if tags:
|
|
463
|
+
tree.add(f"[dim]Tags:[/dim] {', '.join(tags)}")
|
|
464
|
+
if decision.get("rationale"):
|
|
465
|
+
tree.add(f"[dim]Rationale:[/dim] {decision['rationale']}")
|
|
466
|
+
|
|
467
|
+
options_branch = tree.add("[dim]Options:[/dim]")
|
|
468
|
+
options = decision.get("options", {})
|
|
469
|
+
default = decision.get("default")
|
|
470
|
+
for option_id, option in options.items():
|
|
471
|
+
default_marker = " [yellow](default)[/yellow]" if option_id == default else ""
|
|
472
|
+
option_text = f"{option_id}: {option.get('label', '')}{default_marker}"
|
|
473
|
+
if option.get("description"):
|
|
474
|
+
option_text += f" - [dim]{option['description']}[/dim]"
|
|
475
|
+
options_branch.add(option_text)
|
|
476
|
+
|
|
477
|
+
console.print(tree)
|
|
478
|
+
console.print()
|
|
479
|
+
|
|
480
|
+
|
|
481
|
+
def _display_analysis_decisions(analyses: dict[str, Any], depth: int = 0) -> None:
|
|
482
|
+
"""Recursively display decisions grouped by sub-analysis."""
|
|
483
|
+
for analysis_id, analysis_tree in analyses.items():
|
|
484
|
+
console.print(f"\n [bold magenta]{' ' * depth}Analysis: {analysis_id}[/bold magenta]")
|
|
485
|
+
_display_decisions(analysis_tree.get("decisions", {}), indent=" " * (depth + 1))
|
|
486
|
+
_display_analysis_decisions(analysis_tree.get("analyses", {}), depth + 1)
|
|
487
|
+
|
|
488
|
+
|
|
489
|
+
@main.group()
|
|
490
|
+
def universe() -> None:
|
|
491
|
+
"""Universe management commands."""
|
|
492
|
+
pass
|
|
493
|
+
|
|
494
|
+
|
|
495
|
+
@universe.command("generate")
|
|
496
|
+
@click.option("--name", "-n", default="baseline", help="Universe name/ID")
|
|
497
|
+
@click.option(
|
|
498
|
+
"--analysis",
|
|
499
|
+
"-a",
|
|
500
|
+
type=click.Path(exists=True, path_type=Path),
|
|
501
|
+
help="Analysis file",
|
|
502
|
+
)
|
|
503
|
+
@click.option(
|
|
504
|
+
"--output",
|
|
505
|
+
"-o",
|
|
506
|
+
type=click.Path(path_type=Path),
|
|
507
|
+
help="Output file (default: universes/<name>.yaml)",
|
|
508
|
+
)
|
|
509
|
+
@click.option("--description", "-d", help="Universe description")
|
|
510
|
+
def generate_universe(
|
|
511
|
+
name: str,
|
|
512
|
+
analysis: Path | None,
|
|
513
|
+
output: Path | None,
|
|
514
|
+
description: str | None,
|
|
515
|
+
) -> None:
|
|
516
|
+
"""Generate a universe from analysis defaults."""
|
|
517
|
+
analysis_path = _require_analysis(analysis)
|
|
518
|
+
data = load_yaml(analysis_path)
|
|
519
|
+
|
|
520
|
+
# Check all decisions have defaults (across entire tree)
|
|
521
|
+
missing_defaults: list[str] = []
|
|
522
|
+
_check_missing_defaults(data, missing_defaults, "")
|
|
523
|
+
if missing_defaults:
|
|
524
|
+
console.print("[red]Error:[/red] Some decisions don't have defaults:")
|
|
525
|
+
for d_id in missing_defaults:
|
|
526
|
+
console.print(f" • {d_id}")
|
|
527
|
+
raise SystemExit(1)
|
|
528
|
+
|
|
529
|
+
uni = create_universe_from_defaults(data, name, description)
|
|
530
|
+
|
|
531
|
+
if output is None:
|
|
532
|
+
output = analysis_path.parent / "universes" / f"{name}.yaml"
|
|
533
|
+
|
|
534
|
+
output.parent.mkdir(parents=True, exist_ok=True)
|
|
535
|
+
save_yaml(uni, output)
|
|
536
|
+
|
|
537
|
+
console.print(f"[green]✓[/green] Generated universe at [cyan]{output}[/cyan]")
|
|
538
|
+
console.print("\nDecisions:")
|
|
539
|
+
_print_universe_decisions(uni)
|
|
540
|
+
|
|
541
|
+
|
|
542
|
+
def _check_missing_defaults(node: dict[str, Any], missing: list[str], prefix: str = "") -> None:
|
|
543
|
+
"""Recursively check for decisions without defaults."""
|
|
544
|
+
for d_id, d in _collect_node_decisions(node).items():
|
|
545
|
+
if d.get("default") is None:
|
|
546
|
+
missing.append(f"{prefix}{d_id}")
|
|
547
|
+
for analysis_id, sub_node in (node.get("analyses") or {}).items():
|
|
548
|
+
_check_missing_defaults(sub_node, missing, f"{prefix}{analysis_id}.")
|
|
549
|
+
|
|
550
|
+
|
|
551
|
+
def _print_universe_decisions(uni: dict[str, Any], indent: str = " ") -> None:
|
|
552
|
+
"""Recursively print universe decisions."""
|
|
553
|
+
for d_id, opt_id in (uni.get("decisions") or {}).items():
|
|
554
|
+
console.print(f"{indent}{d_id}: {opt_id}")
|
|
555
|
+
for analysis_id, sub in (uni.get("analyses") or {}).items():
|
|
556
|
+
console.print(f"{indent}[magenta]{analysis_id}:[/magenta]")
|
|
557
|
+
_print_universe_decisions(sub, indent + " ")
|
|
558
|
+
|
|
559
|
+
|
|
560
|
+
@universe.command("check")
|
|
561
|
+
@click.argument("universe_file", type=click.Path(exists=True, path_type=Path))
|
|
562
|
+
@click.option(
|
|
563
|
+
"--analysis",
|
|
564
|
+
"-a",
|
|
565
|
+
type=click.Path(exists=True, path_type=Path),
|
|
566
|
+
help="Analysis file",
|
|
567
|
+
)
|
|
568
|
+
def check_universe(universe_file: Path, analysis: Path | None) -> None:
|
|
569
|
+
"""Check a universe against its analysis constraints."""
|
|
570
|
+
analysis_path = _require_analysis(analysis, universe_file.parent)
|
|
571
|
+
errors = validate_universe_file(universe_file, analysis_path)
|
|
572
|
+
|
|
573
|
+
if errors:
|
|
574
|
+
console.print("[red]Universe validation errors:[/red]")
|
|
575
|
+
for error in errors:
|
|
576
|
+
console.print(f" • {error}")
|
|
577
|
+
raise SystemExit(1)
|
|
578
|
+
|
|
579
|
+
console.print("[green]✓[/green] Universe is valid")
|
|
580
|
+
|
|
581
|
+
|
|
582
|
+
@main.command()
|
|
583
|
+
@click.option(
|
|
584
|
+
"--file",
|
|
585
|
+
"-f",
|
|
586
|
+
type=click.Path(exists=True, path_type=Path),
|
|
587
|
+
help="Analysis file",
|
|
588
|
+
)
|
|
589
|
+
@click.option(
|
|
590
|
+
"--format",
|
|
591
|
+
"fmt",
|
|
592
|
+
type=click.Choice(["mermaid", "ascii"]),
|
|
593
|
+
default="ascii",
|
|
594
|
+
help="Output format",
|
|
595
|
+
)
|
|
596
|
+
def viz(file: Path | None, fmt: str) -> None:
|
|
597
|
+
"""Visualize the decision space."""
|
|
598
|
+
file = _require_analysis(file)
|
|
599
|
+
data = load_yaml(file)
|
|
600
|
+
|
|
601
|
+
if fmt == "mermaid":
|
|
602
|
+
_viz_mermaid(data)
|
|
603
|
+
else:
|
|
604
|
+
_viz_ascii(data)
|
|
605
|
+
|
|
606
|
+
|
|
607
|
+
def _viz_ascii(data: dict[str, Any]) -> None:
|
|
608
|
+
"""Visualize decisions as ASCII tree."""
|
|
609
|
+
tree = Tree(f"[bold]{data.get('name', 'Unknown')}[/bold]")
|
|
610
|
+
_viz_ascii_node(tree, data)
|
|
611
|
+
console.print(tree)
|
|
612
|
+
|
|
613
|
+
|
|
614
|
+
def _viz_ascii_node(parent_tree: Tree, node: dict[str, Any]) -> None:
|
|
615
|
+
"""Recursively add decisions to an ASCII tree."""
|
|
616
|
+
decisions = _collect_node_decisions(node)
|
|
617
|
+
for decision_id, decision in decisions.items():
|
|
618
|
+
tags = decision.get("tags") or []
|
|
619
|
+
tag_str = f" [{', '.join(tags)}]" if tags else ""
|
|
620
|
+
branch = parent_tree.add(f"[cyan]{decision_id}[/cyan]{tag_str}")
|
|
621
|
+
|
|
622
|
+
options = decision.get("options", {})
|
|
623
|
+
default = decision.get("default")
|
|
624
|
+
for option_id, option in options.items():
|
|
625
|
+
default_marker = " [default]" if option_id == default else ""
|
|
626
|
+
constraints = []
|
|
627
|
+
if option.get("incompatible_with"):
|
|
628
|
+
constraints.append(f"\u2717 {', '.join(option['incompatible_with'])}")
|
|
629
|
+
if option.get("requires"):
|
|
630
|
+
constraints.append(f"\u2192 {', '.join(option['requires'])}")
|
|
631
|
+
|
|
632
|
+
option_text = f"{option_id}: {option.get('label', '')}{default_marker}"
|
|
633
|
+
if constraints:
|
|
634
|
+
option_text += f" [dim]({'; '.join(constraints)})[/dim]"
|
|
635
|
+
branch.add(option_text)
|
|
636
|
+
|
|
637
|
+
for analysis_id, sub_node in (node.get("analyses") or {}).items():
|
|
638
|
+
sub_tree = parent_tree.add(f"[bold magenta]{analysis_id}[/bold magenta]")
|
|
639
|
+
_viz_ascii_node(sub_tree, sub_node)
|
|
640
|
+
|
|
641
|
+
|
|
642
|
+
def _viz_mermaid(data: dict[str, Any]) -> None:
|
|
643
|
+
"""Generate Mermaid diagram for decisions."""
|
|
644
|
+
lines = ["graph TD"]
|
|
645
|
+
|
|
646
|
+
_viz_mermaid_node(lines, data, "root")
|
|
647
|
+
|
|
648
|
+
lines.append("")
|
|
649
|
+
lines.append(" classDef default fill:#90EE90")
|
|
650
|
+
|
|
651
|
+
console.print("\n".join(lines))
|
|
652
|
+
|
|
653
|
+
|
|
654
|
+
def _viz_mermaid_node(lines: list[str], node: dict[str, Any], node_prefix: str) -> None:
|
|
655
|
+
"""Recursively generate Mermaid subgraphs for an analysis node."""
|
|
656
|
+
decisions = _collect_node_decisions(node)
|
|
657
|
+
sub_analyses = node.get("analyses") or {}
|
|
658
|
+
|
|
659
|
+
# If this node has decisions or sub-analyses, wrap in subgraph
|
|
660
|
+
has_content = decisions or sub_analyses
|
|
661
|
+
if has_content and node_prefix != "root":
|
|
662
|
+
lines.append(f" subgraph {node_prefix}[{node_prefix}]")
|
|
663
|
+
|
|
664
|
+
for decision_id, decision in decisions.items():
|
|
665
|
+
qualified = f"{node_prefix}__{decision_id}"
|
|
666
|
+
lines.append(f" {qualified}[{decision.get('label', decision_id)}]")
|
|
667
|
+
|
|
668
|
+
options = decision.get("options", {})
|
|
669
|
+
default = decision.get("default")
|
|
670
|
+
for option_id, option in options.items():
|
|
671
|
+
node_id = f"{qualified}_{option_id}"
|
|
672
|
+
style = ":::default" if option_id == default else ""
|
|
673
|
+
lines.append(f" {node_id}(({option.get('label', option_id)})){style}")
|
|
674
|
+
lines.append(f" {qualified} --> {node_id}")
|
|
675
|
+
|
|
676
|
+
if option.get("incompatible_with"):
|
|
677
|
+
for ref in option["incompatible_with"]:
|
|
678
|
+
target = f"{node_prefix}__{ref.replace('.', '_')}"
|
|
679
|
+
lines.append(f" {node_id} -.->|incompatible| {target}")
|
|
680
|
+
|
|
681
|
+
if option.get("requires"):
|
|
682
|
+
for ref in option["requires"]:
|
|
683
|
+
target = f"{node_prefix}__{ref.replace('.', '_')}"
|
|
684
|
+
lines.append(f" {node_id} -->|requires| {target}")
|
|
685
|
+
|
|
686
|
+
for analysis_id, sub_node in sub_analyses.items():
|
|
687
|
+
_viz_mermaid_node(lines, sub_node, f"{node_prefix}__{analysis_id}")
|
|
688
|
+
|
|
689
|
+
if has_content and node_prefix != "root":
|
|
690
|
+
lines.append(" end")
|
|
691
|
+
|
|
692
|
+
|
|
693
|
+
@main.group()
|
|
694
|
+
def schema() -> None:
|
|
695
|
+
"""Schema commands."""
|
|
696
|
+
pass
|
|
697
|
+
|
|
698
|
+
|
|
699
|
+
@schema.command("export")
|
|
700
|
+
@click.option(
|
|
701
|
+
"--output",
|
|
702
|
+
"-o",
|
|
703
|
+
type=click.Path(path_type=Path),
|
|
704
|
+
default="schemas",
|
|
705
|
+
help="Output directory (default: schemas/)",
|
|
706
|
+
)
|
|
707
|
+
def schema_export(output: Path) -> None:
|
|
708
|
+
"""Export LinkML schemas to files."""
|
|
709
|
+
import shutil
|
|
710
|
+
|
|
711
|
+
from astra.datamodel import SCHEMA_DIRECTORY
|
|
712
|
+
|
|
713
|
+
output.mkdir(parents=True, exist_ok=True)
|
|
714
|
+
exported = []
|
|
715
|
+
for schema_file in sorted(SCHEMA_DIRECTORY.glob("*.yaml")):
|
|
716
|
+
dest = output / schema_file.name
|
|
717
|
+
shutil.copy2(schema_file, dest)
|
|
718
|
+
exported.append(schema_file.name)
|
|
719
|
+
|
|
720
|
+
console.print(f"[green]✓[/green] Exported schemas to [cyan]{output}/[/cyan]")
|
|
721
|
+
for name in exported:
|
|
722
|
+
console.print(f" • {output}/{name}")
|
|
723
|
+
|
|
724
|
+
|
|
725
|
+
@schema.command("show")
|
|
726
|
+
@click.argument("schema_type", type=click.Choice(["analysis", "universe", "insights"]))
|
|
727
|
+
def schema_show(schema_type: str) -> None:
|
|
728
|
+
"""Print a LinkML schema to stdout."""
|
|
729
|
+
from astra.datamodel import SCHEMA_DIRECTORY
|
|
730
|
+
|
|
731
|
+
name_map = {
|
|
732
|
+
"analysis": "analysis.yaml",
|
|
733
|
+
"universe": "universe.yaml",
|
|
734
|
+
"insights": "insight.yaml",
|
|
735
|
+
}
|
|
736
|
+
schema_path = SCHEMA_DIRECTORY / name_map[schema_type]
|
|
737
|
+
console.print(schema_path.read_text())
|
|
738
|
+
|
|
739
|
+
|
|
740
|
+
# =============================================================================
|
|
741
|
+
# Paper commands
|
|
742
|
+
# =============================================================================
|
|
743
|
+
|
|
744
|
+
|
|
745
|
+
@main.group()
|
|
746
|
+
def paper() -> None:
|
|
747
|
+
"""Paper management commands for evidence verification."""
|
|
748
|
+
pass
|
|
749
|
+
|
|
750
|
+
|
|
751
|
+
@paper.command("add")
|
|
752
|
+
@click.argument("doi")
|
|
753
|
+
@click.option("--version", "-v", type=int, help="Paper version (for arXiv papers)")
|
|
754
|
+
@click.option(
|
|
755
|
+
"--pdf",
|
|
756
|
+
type=click.Path(exists=True, path_type=Path),
|
|
757
|
+
help="Use local PDF instead of downloading",
|
|
758
|
+
)
|
|
759
|
+
def paper_add(doi: str, version: int | None, pdf: Path | None) -> None:
|
|
760
|
+
"""Add a paper to the cache by DOI.
|
|
761
|
+
|
|
762
|
+
DOI can be any valid DOI. For arXiv papers, use the format:
|
|
763
|
+
10.48550/arXiv.1706.03762
|
|
764
|
+
|
|
765
|
+
Examples:
|
|
766
|
+
astra paper add 10.48550/arXiv.1706.03762 --version 7
|
|
767
|
+
astra paper add 10.1038/s41586-023-06221-2
|
|
768
|
+
astra paper add 10.1234/example --pdf ./local_paper.pdf
|
|
769
|
+
"""
|
|
770
|
+
from astra.papers.cache import PaperCache
|
|
771
|
+
from astra.papers.download import download_paper
|
|
772
|
+
|
|
773
|
+
cache = PaperCache()
|
|
774
|
+
|
|
775
|
+
# Check if already cached
|
|
776
|
+
if cache.has(doi, version):
|
|
777
|
+
paper = cache.get(doi, version)
|
|
778
|
+
if paper:
|
|
779
|
+
console.print(f"[yellow]Paper already cached:[/yellow] {doi}")
|
|
780
|
+
console.print(f" Path: {paper.pdf_path}")
|
|
781
|
+
if paper.metadata.title:
|
|
782
|
+
console.print(f" Title: {paper.metadata.title}")
|
|
783
|
+
return
|
|
784
|
+
|
|
785
|
+
# Add from local file or download
|
|
786
|
+
if pdf:
|
|
787
|
+
console.print(f"Adding paper from local file: [cyan]{pdf}[/cyan]")
|
|
788
|
+
paper = cache.add_from_file(doi, pdf, version=version)
|
|
789
|
+
console.print("[green]✓[/green] Paper added to cache")
|
|
790
|
+
console.print(f" DOI: {doi}")
|
|
791
|
+
if version:
|
|
792
|
+
console.print(f" Version: {version}")
|
|
793
|
+
console.print(f" Path: {paper.pdf_path}")
|
|
794
|
+
console.print(f" SHA-256: {paper.metadata.sha256[:16]}...")
|
|
795
|
+
else:
|
|
796
|
+
console.print(f"Downloading paper: [cyan]{doi}[/cyan]")
|
|
797
|
+
if version:
|
|
798
|
+
console.print(f" Version: {version}")
|
|
799
|
+
|
|
800
|
+
result = download_paper(doi, version)
|
|
801
|
+
|
|
802
|
+
if not result.success:
|
|
803
|
+
console.print(f"[red]Error:[/red] {result.error}")
|
|
804
|
+
raise SystemExit(1)
|
|
805
|
+
|
|
806
|
+
if result.content is None:
|
|
807
|
+
console.print("[red]Error:[/red] No content received")
|
|
808
|
+
raise SystemExit(1)
|
|
809
|
+
|
|
810
|
+
paper = cache.add(
|
|
811
|
+
doi=doi,
|
|
812
|
+
pdf_content=result.content,
|
|
813
|
+
version=version,
|
|
814
|
+
title=result.title,
|
|
815
|
+
authors=result.authors,
|
|
816
|
+
source_url=result.url,
|
|
817
|
+
)
|
|
818
|
+
|
|
819
|
+
console.print("[green]✓[/green] Paper downloaded and cached")
|
|
820
|
+
console.print(f" DOI: {doi}")
|
|
821
|
+
if version:
|
|
822
|
+
console.print(f" Version: {version}")
|
|
823
|
+
if paper.metadata.title:
|
|
824
|
+
console.print(f" Title: {paper.metadata.title}")
|
|
825
|
+
console.print(f" Path: {paper.pdf_path}")
|
|
826
|
+
console.print(f" SHA-256: {paper.metadata.sha256[:16]}...")
|
|
827
|
+
|
|
828
|
+
|
|
829
|
+
@paper.command("list")
|
|
830
|
+
def paper_list() -> None:
|
|
831
|
+
"""List all cached papers."""
|
|
832
|
+
from astra.papers.cache import PaperCache
|
|
833
|
+
|
|
834
|
+
cache = PaperCache()
|
|
835
|
+
papers = cache.list_papers()
|
|
836
|
+
|
|
837
|
+
if not papers:
|
|
838
|
+
console.print("[dim]No papers cached[/dim]")
|
|
839
|
+
return
|
|
840
|
+
|
|
841
|
+
table = Table(show_header=True, expand=True)
|
|
842
|
+
table.add_column("DOI", no_wrap=True)
|
|
843
|
+
table.add_column("Ver", no_wrap=True)
|
|
844
|
+
table.add_column("Title", ratio=2)
|
|
845
|
+
table.add_column("Retrieved", no_wrap=True)
|
|
846
|
+
|
|
847
|
+
for paper in papers:
|
|
848
|
+
meta = paper.metadata
|
|
849
|
+
version_str = str(meta.version) if meta.version else "-"
|
|
850
|
+
title = meta.title or "[dim](unknown)[/dim]"
|
|
851
|
+
retrieved = meta.retrieved_at[:10] if meta.retrieved_at else "-"
|
|
852
|
+
table.add_row(meta.doi, version_str, title, retrieved)
|
|
853
|
+
|
|
854
|
+
console.print(table)
|
|
855
|
+
console.print(f"\n[dim]{len(papers)} paper(s) cached[/dim]")
|
|
856
|
+
|
|
857
|
+
|
|
858
|
+
@paper.command("show")
|
|
859
|
+
@click.argument("doi")
|
|
860
|
+
@click.option("--version", "-v", type=int, help="Paper version (for arXiv papers)")
|
|
861
|
+
def paper_show(doi: str, version: int | None) -> None:
|
|
862
|
+
"""Show details of a cached paper."""
|
|
863
|
+
from astra.papers.cache import PaperCache
|
|
864
|
+
|
|
865
|
+
cache = PaperCache()
|
|
866
|
+
paper = cache.get(doi, version)
|
|
867
|
+
|
|
868
|
+
if not paper:
|
|
869
|
+
console.print(f"[red]Error:[/red] Paper not found in cache: {doi}")
|
|
870
|
+
if version:
|
|
871
|
+
console.print(f" (version {version})")
|
|
872
|
+
console.print("\nUse [cyan]astra paper add[/cyan] to download the paper first.")
|
|
873
|
+
raise SystemExit(1)
|
|
874
|
+
|
|
875
|
+
meta = paper.metadata
|
|
876
|
+
console.print(f"\n[bold]DOI:[/bold] {meta.doi}")
|
|
877
|
+
if meta.version:
|
|
878
|
+
console.print(f"[bold]Version:[/bold] {meta.version}")
|
|
879
|
+
if meta.title:
|
|
880
|
+
console.print(f"[bold]Title:[/bold] {meta.title}")
|
|
881
|
+
if meta.authors:
|
|
882
|
+
console.print(f"[bold]Authors:[/bold] {', '.join(meta.authors)}")
|
|
883
|
+
console.print(f"[bold]SHA-256:[/bold] {meta.sha256}")
|
|
884
|
+
console.print(f"[bold]Retrieved:[/bold] {meta.retrieved_at}")
|
|
885
|
+
if meta.source_url:
|
|
886
|
+
console.print(f"[bold]Source:[/bold] {meta.source_url}")
|
|
887
|
+
console.print(f"[bold]Path:[/bold] {paper.pdf_path}")
|
|
888
|
+
|
|
889
|
+
|
|
890
|
+
@paper.command("path")
|
|
891
|
+
@click.argument("doi")
|
|
892
|
+
@click.option("--version", "-v", type=int, help="Paper version (for arXiv papers)")
|
|
893
|
+
def paper_path(doi: str, version: int | None) -> None:
|
|
894
|
+
"""Print the path to a cached paper's PDF.
|
|
895
|
+
|
|
896
|
+
Useful for piping to other tools or agents that need to read the PDF.
|
|
897
|
+
"""
|
|
898
|
+
from astra.papers.cache import PaperCache
|
|
899
|
+
|
|
900
|
+
cache = PaperCache()
|
|
901
|
+
path = cache.get_path(doi, version)
|
|
902
|
+
|
|
903
|
+
if not path:
|
|
904
|
+
console.print(f"[red]Error:[/red] Paper not found: {doi}")
|
|
905
|
+
raise SystemExit(1)
|
|
906
|
+
|
|
907
|
+
# Print just the path (no formatting) for easy piping
|
|
908
|
+
print(path)
|
|
909
|
+
|
|
910
|
+
|
|
911
|
+
@paper.command("remove")
|
|
912
|
+
@click.argument("doi")
|
|
913
|
+
@click.option("--version", "-v", type=int, help="Paper version (for arXiv papers)")
|
|
914
|
+
def paper_remove(doi: str, version: int | None) -> None:
|
|
915
|
+
"""Remove a paper from the cache."""
|
|
916
|
+
from astra.papers.cache import PaperCache
|
|
917
|
+
|
|
918
|
+
cache = PaperCache()
|
|
919
|
+
|
|
920
|
+
if not cache.has(doi, version):
|
|
921
|
+
console.print(f"[red]Error:[/red] Paper not found: {doi}")
|
|
922
|
+
raise SystemExit(1)
|
|
923
|
+
|
|
924
|
+
cache.remove(doi, version)
|
|
925
|
+
console.print("[green]✓[/green] Paper removed from cache")
|
|
926
|
+
|
|
927
|
+
|
|
928
|
+
@paper.command("fetch-metadata")
|
|
929
|
+
@click.argument("doi", required=False)
|
|
930
|
+
@click.option("--version", "-v", type=int, help="Paper version (for arXiv papers)")
|
|
931
|
+
@click.option("--all", "fetch_all", is_flag=True, help="Fetch metadata for all cached papers")
|
|
932
|
+
def paper_fetch_metadata(doi: str | None, version: int | None, fetch_all: bool) -> None:
|
|
933
|
+
"""Fetch metadata (title, authors) for cached papers.
|
|
934
|
+
|
|
935
|
+
Uses DOI content negotiation to retrieve metadata from DOI.org.
|
|
936
|
+
|
|
937
|
+
Examples:
|
|
938
|
+
|
|
939
|
+
astra paper fetch-metadata 10.48550/arXiv.1706.03762
|
|
940
|
+
|
|
941
|
+
astra paper fetch-metadata --all
|
|
942
|
+
"""
|
|
943
|
+
from astra.papers.cache import PaperCache
|
|
944
|
+
from astra.papers.download import fetch_doi_metadata
|
|
945
|
+
|
|
946
|
+
cache = PaperCache()
|
|
947
|
+
|
|
948
|
+
if fetch_all:
|
|
949
|
+
papers = cache.list_papers()
|
|
950
|
+
if not papers:
|
|
951
|
+
console.print("[dim]No papers cached[/dim]")
|
|
952
|
+
return
|
|
953
|
+
|
|
954
|
+
updated = 0
|
|
955
|
+
for paper in papers:
|
|
956
|
+
meta = paper.metadata
|
|
957
|
+
if meta.title and meta.authors:
|
|
958
|
+
continue
|
|
959
|
+
|
|
960
|
+
console.print(f"Fetching metadata for {meta.doi}...", end=" ")
|
|
961
|
+
doi_meta = fetch_doi_metadata(meta.doi)
|
|
962
|
+
|
|
963
|
+
if doi_meta.title or doi_meta.authors:
|
|
964
|
+
cache.update_metadata(
|
|
965
|
+
meta.doi,
|
|
966
|
+
meta.version,
|
|
967
|
+
title=doi_meta.title,
|
|
968
|
+
authors=doi_meta.authors,
|
|
969
|
+
)
|
|
970
|
+
console.print(f"[green]✓[/green] {doi_meta.title or '(no title)'}")
|
|
971
|
+
updated += 1
|
|
972
|
+
else:
|
|
973
|
+
console.print("[yellow]⚠[/yellow] No metadata found")
|
|
974
|
+
|
|
975
|
+
console.print(f"\n[dim]Updated {updated} paper(s)[/dim]")
|
|
976
|
+
return
|
|
977
|
+
|
|
978
|
+
if not doi:
|
|
979
|
+
console.print("[red]Error:[/red] Provide a DOI or use --all")
|
|
980
|
+
raise SystemExit(1)
|
|
981
|
+
|
|
982
|
+
if not cache.has(doi, version):
|
|
983
|
+
console.print(f"[red]Error:[/red] Paper not found in cache: {doi}")
|
|
984
|
+
raise SystemExit(1)
|
|
985
|
+
|
|
986
|
+
console.print(f"Fetching metadata for {doi}...")
|
|
987
|
+
doi_meta = fetch_doi_metadata(doi)
|
|
988
|
+
|
|
989
|
+
if not doi_meta.title and not doi_meta.authors:
|
|
990
|
+
console.print("[yellow]⚠[/yellow] No metadata found for this DOI")
|
|
991
|
+
raise SystemExit(1)
|
|
992
|
+
|
|
993
|
+
cache.update_metadata(doi, version, title=doi_meta.title, authors=doi_meta.authors)
|
|
994
|
+
|
|
995
|
+
console.print("[green]✓[/green] Metadata updated:")
|
|
996
|
+
if doi_meta.title:
|
|
997
|
+
console.print(f" Title: {doi_meta.title}")
|
|
998
|
+
if doi_meta.authors:
|
|
999
|
+
console.print(f" Authors: {', '.join(doi_meta.authors)}")
|
|
1000
|
+
|
|
1001
|
+
|
|
1002
|
+
@paper.command("verify-quotes")
|
|
1003
|
+
@click.argument("doi")
|
|
1004
|
+
@click.option("--version", "-v", type=int, help="Paper version (for arXiv papers)")
|
|
1005
|
+
def paper_verify_quotes(doi: str, version: int | None) -> None:
|
|
1006
|
+
"""Verify multiple quotes from a cached paper in a single operation.
|
|
1007
|
+
|
|
1008
|
+
Reads quote list from stdin as JSON. Extracts PDF text once and
|
|
1009
|
+
verifies all quotes against it.
|
|
1010
|
+
|
|
1011
|
+
Input format (stdin):
|
|
1012
|
+
{"quotes": [{"text": "...", "page": N, "prefix": "...", "suffix": "..."}, ...]}
|
|
1013
|
+
|
|
1014
|
+
Output format (stdout, JSON):
|
|
1015
|
+
{"doi": "...", "results": [...], "summary": {...}}
|
|
1016
|
+
|
|
1017
|
+
Exit codes:
|
|
1018
|
+
0 - All quotes verified
|
|
1019
|
+
1 - Some quotes not found
|
|
1020
|
+
2 - Error (paper not cached, invalid input, etc.)
|
|
1021
|
+
"""
|
|
1022
|
+
from astra.papers.cache import PaperCache
|
|
1023
|
+
from astra.verification.core import VerificationStatus, verify_quote_in_pdf
|
|
1024
|
+
from astra.verification.pdf import extract_text_from_pdf
|
|
1025
|
+
|
|
1026
|
+
# Read JSON input from stdin
|
|
1027
|
+
try:
|
|
1028
|
+
input_data = sys.stdin.read()
|
|
1029
|
+
if not input_data.strip():
|
|
1030
|
+
print(
|
|
1031
|
+
json.dumps(
|
|
1032
|
+
{
|
|
1033
|
+
"doi": doi,
|
|
1034
|
+
"version": version,
|
|
1035
|
+
"results": [],
|
|
1036
|
+
"summary": {"total": 0, "verified": 0, "not_found": 0, "errors": 1},
|
|
1037
|
+
"error": "No input provided on stdin",
|
|
1038
|
+
}
|
|
1039
|
+
)
|
|
1040
|
+
)
|
|
1041
|
+
raise SystemExit(2)
|
|
1042
|
+
|
|
1043
|
+
data = json.loads(input_data)
|
|
1044
|
+
quotes = data.get("quotes", [])
|
|
1045
|
+
except json.JSONDecodeError as e:
|
|
1046
|
+
print(
|
|
1047
|
+
json.dumps(
|
|
1048
|
+
{
|
|
1049
|
+
"doi": doi,
|
|
1050
|
+
"version": version,
|
|
1051
|
+
"results": [],
|
|
1052
|
+
"summary": {"total": 0, "verified": 0, "not_found": 0, "errors": 1},
|
|
1053
|
+
"error": f"Invalid JSON input: {e}",
|
|
1054
|
+
}
|
|
1055
|
+
)
|
|
1056
|
+
)
|
|
1057
|
+
raise SystemExit(2)
|
|
1058
|
+
|
|
1059
|
+
# Get paper from cache
|
|
1060
|
+
cache = PaperCache()
|
|
1061
|
+
cached_paper = cache.get(doi, version)
|
|
1062
|
+
|
|
1063
|
+
if not cached_paper:
|
|
1064
|
+
print(
|
|
1065
|
+
json.dumps(
|
|
1066
|
+
{
|
|
1067
|
+
"doi": doi,
|
|
1068
|
+
"version": version,
|
|
1069
|
+
"results": [],
|
|
1070
|
+
"summary": {"total": len(quotes), "verified": 0, "not_found": 0, "errors": 1},
|
|
1071
|
+
"error": f"Paper not in cache: {doi}",
|
|
1072
|
+
}
|
|
1073
|
+
)
|
|
1074
|
+
)
|
|
1075
|
+
raise SystemExit(2)
|
|
1076
|
+
|
|
1077
|
+
# Extract text from PDF (ONCE)
|
|
1078
|
+
try:
|
|
1079
|
+
pdf = extract_text_from_pdf(cached_paper.pdf_path)
|
|
1080
|
+
except Exception as e:
|
|
1081
|
+
print(
|
|
1082
|
+
json.dumps(
|
|
1083
|
+
{
|
|
1084
|
+
"doi": doi,
|
|
1085
|
+
"version": version,
|
|
1086
|
+
"results": [],
|
|
1087
|
+
"summary": {"total": len(quotes), "verified": 0, "not_found": 0, "errors": 1},
|
|
1088
|
+
"error": f"Failed to extract text from PDF: {e}",
|
|
1089
|
+
}
|
|
1090
|
+
)
|
|
1091
|
+
)
|
|
1092
|
+
raise SystemExit(2)
|
|
1093
|
+
|
|
1094
|
+
# Verify each quote
|
|
1095
|
+
results = []
|
|
1096
|
+
verified_count = 0
|
|
1097
|
+
not_found_count = 0
|
|
1098
|
+
|
|
1099
|
+
for idx, quote_data in enumerate(quotes):
|
|
1100
|
+
quote_text = quote_data.get("text", "")
|
|
1101
|
+
page_hint = quote_data.get("page")
|
|
1102
|
+
prefix = quote_data.get("prefix")
|
|
1103
|
+
suffix = quote_data.get("suffix")
|
|
1104
|
+
|
|
1105
|
+
if not quote_text:
|
|
1106
|
+
results.append(
|
|
1107
|
+
{
|
|
1108
|
+
"index": idx,
|
|
1109
|
+
"text": "",
|
|
1110
|
+
"status": "error",
|
|
1111
|
+
"found_pages": [],
|
|
1112
|
+
"message": "Empty quote text",
|
|
1113
|
+
}
|
|
1114
|
+
)
|
|
1115
|
+
continue
|
|
1116
|
+
|
|
1117
|
+
status, found_pages, message = verify_quote_in_pdf(
|
|
1118
|
+
quote_text, pdf, page_hint, prefix, suffix
|
|
1119
|
+
)
|
|
1120
|
+
|
|
1121
|
+
display_text = quote_text[:50] + "..." if len(quote_text) > 50 else quote_text
|
|
1122
|
+
|
|
1123
|
+
results.append(
|
|
1124
|
+
{
|
|
1125
|
+
"index": idx,
|
|
1126
|
+
"text": display_text,
|
|
1127
|
+
"status": status.value,
|
|
1128
|
+
"found_pages": found_pages,
|
|
1129
|
+
"message": message,
|
|
1130
|
+
}
|
|
1131
|
+
)
|
|
1132
|
+
|
|
1133
|
+
if status == VerificationStatus.VERIFIED:
|
|
1134
|
+
verified_count += 1
|
|
1135
|
+
else:
|
|
1136
|
+
not_found_count += 1
|
|
1137
|
+
|
|
1138
|
+
# Output results
|
|
1139
|
+
output = {
|
|
1140
|
+
"doi": doi,
|
|
1141
|
+
"version": version,
|
|
1142
|
+
"results": results,
|
|
1143
|
+
"summary": {
|
|
1144
|
+
"total": len(quotes),
|
|
1145
|
+
"verified": verified_count,
|
|
1146
|
+
"not_found": not_found_count,
|
|
1147
|
+
"errors": 0,
|
|
1148
|
+
},
|
|
1149
|
+
}
|
|
1150
|
+
print(json.dumps(output))
|
|
1151
|
+
|
|
1152
|
+
if not_found_count > 0:
|
|
1153
|
+
raise SystemExit(1)
|
|
1154
|
+
raise SystemExit(0)
|
|
1155
|
+
|
|
1156
|
+
|
|
1157
|
+
@paper.command("verify-quote")
|
|
1158
|
+
@click.argument("doi")
|
|
1159
|
+
@click.option("--quote", "-q", required=True, help="Exact quote text to verify")
|
|
1160
|
+
@click.option("--version", "-v", type=int, help="Paper version (for arXiv papers)")
|
|
1161
|
+
@click.option("--page", "-p", type=int, help="Expected page number (1-indexed)")
|
|
1162
|
+
@click.option("--json", "output_json", is_flag=True, help="Output as JSON")
|
|
1163
|
+
def paper_verify_quote(
|
|
1164
|
+
doi: str, quote: str, version: int | None, page: int | None, output_json: bool
|
|
1165
|
+
) -> None:
|
|
1166
|
+
"""Verify a quote exists in a cached paper.
|
|
1167
|
+
|
|
1168
|
+
Searches for the exact quote in the paper's text. Uses fuzzy matching
|
|
1169
|
+
to handle minor OCR/extraction differences.
|
|
1170
|
+
|
|
1171
|
+
Exit codes:
|
|
1172
|
+
0 - Quote verified (found in paper)
|
|
1173
|
+
1 - Quote not found
|
|
1174
|
+
2 - Error (paper not cached, etc.)
|
|
1175
|
+
"""
|
|
1176
|
+
from astra.papers.cache import PaperCache
|
|
1177
|
+
from astra.verification.core import VerificationStatus, verify_quote_in_pdf
|
|
1178
|
+
from astra.verification.pdf import extract_text_from_pdf
|
|
1179
|
+
|
|
1180
|
+
cache = PaperCache()
|
|
1181
|
+
cached_paper = cache.get(doi, version)
|
|
1182
|
+
|
|
1183
|
+
if not cached_paper:
|
|
1184
|
+
if output_json:
|
|
1185
|
+
print(
|
|
1186
|
+
json.dumps(
|
|
1187
|
+
{
|
|
1188
|
+
"status": "error",
|
|
1189
|
+
"message": f"Paper not in cache: {doi}",
|
|
1190
|
+
"found_pages": [],
|
|
1191
|
+
"expected_page": page,
|
|
1192
|
+
}
|
|
1193
|
+
)
|
|
1194
|
+
)
|
|
1195
|
+
else:
|
|
1196
|
+
console.print(f"[red]Error:[/red] Paper not in cache: {doi}")
|
|
1197
|
+
console.print("Use [cyan]astra paper add[/cyan] first.")
|
|
1198
|
+
raise SystemExit(2)
|
|
1199
|
+
|
|
1200
|
+
try:
|
|
1201
|
+
pdf = extract_text_from_pdf(cached_paper.pdf_path)
|
|
1202
|
+
except Exception as e:
|
|
1203
|
+
if output_json:
|
|
1204
|
+
print(
|
|
1205
|
+
json.dumps(
|
|
1206
|
+
{
|
|
1207
|
+
"status": "error",
|
|
1208
|
+
"message": f"Failed to extract text from PDF: {e}",
|
|
1209
|
+
"found_pages": [],
|
|
1210
|
+
"expected_page": page,
|
|
1211
|
+
}
|
|
1212
|
+
)
|
|
1213
|
+
)
|
|
1214
|
+
else:
|
|
1215
|
+
console.print(f"[red]Error:[/red] Failed to extract text from PDF: {e}")
|
|
1216
|
+
raise SystemExit(2)
|
|
1217
|
+
|
|
1218
|
+
status, found_pages, message = verify_quote_in_pdf(quote, pdf, page)
|
|
1219
|
+
|
|
1220
|
+
if output_json:
|
|
1221
|
+
print(
|
|
1222
|
+
json.dumps(
|
|
1223
|
+
{
|
|
1224
|
+
"status": status.value,
|
|
1225
|
+
"found_pages": found_pages,
|
|
1226
|
+
"expected_page": page,
|
|
1227
|
+
"message": message,
|
|
1228
|
+
}
|
|
1229
|
+
)
|
|
1230
|
+
)
|
|
1231
|
+
else:
|
|
1232
|
+
if status == VerificationStatus.VERIFIED:
|
|
1233
|
+
console.print(f"[green]✓ Verified[/green] {message}")
|
|
1234
|
+
else:
|
|
1235
|
+
console.print(f"[red]✗ Not found[/red] {message}")
|
|
1236
|
+
|
|
1237
|
+
raise SystemExit(0 if status == VerificationStatus.VERIFIED else 1)
|
|
1238
|
+
|
|
1239
|
+
|
|
1240
|
+
if __name__ == "__main__":
|
|
1241
|
+
main()
|