ase-python 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ase/__init__.py +21 -0
- ase/adapters/__init__.py +14 -0
- ase/adapters/contract.py +28 -0
- ase/adapters/frameworks/__init__.py +17 -0
- ase/adapters/frameworks/base.py +259 -0
- ase/adapters/frameworks/langgraph.py +19 -0
- ase/adapters/frameworks/mcp.py +68 -0
- ase/adapters/frameworks/openai_agents.py +19 -0
- ase/adapters/frameworks/pydantic_ai.py +19 -0
- ase/adapters/io.py +50 -0
- ase/adapters/model.py +89 -0
- ase/adapters/protocol.py +72 -0
- ase/adapters/replay.py +261 -0
- ase/cli/__init__.py +7 -0
- ase/cli/_trace_outputs.py +40 -0
- ase/cli/adapter_cmd.py +38 -0
- ase/cli/certify_cmd.py +74 -0
- ase/cli/compare.py +145 -0
- ase/cli/doctor_cmd.py +45 -0
- ase/cli/examples_cmd.py +27 -0
- ase/cli/history_cmd.py +126 -0
- ase/cli/import_cmd.py +34 -0
- ase/cli/main.py +134 -0
- ase/cli/replay_cmd.py +48 -0
- ase/cli/report.py +115 -0
- ase/cli/spec_cmd.py +53 -0
- ase/cli/test_cmd.py +121 -0
- ase/config/env_loader.py +71 -0
- ase/config/loader.py +82 -0
- ase/config/model.py +51 -0
- ase/conformance/__init__.py +7 -0
- ase/conformance/matrix.py +111 -0
- ase/conformance/model.py +91 -0
- ase/conformance/schema.py +37 -0
- ase/conformance/service.py +194 -0
- ase/core/engine.py +348 -0
- ase/errors.py +59 -0
- ase/evaluation/__init__.py +7 -0
- ase/evaluation/base.py +63 -0
- ase/evaluation/consistency.py +79 -0
- ase/evaluation/correctness.py +117 -0
- ase/evaluation/efficiency.py +145 -0
- ase/evaluation/engine.py +182 -0
- ase/evaluation/policy.py +134 -0
- ase/evaluation/scoring.py +64 -0
- ase/evaluation/trace_summary.py +36 -0
- ase/examples_matrix.py +118 -0
- ase/reporting/__init__.py +7 -0
- ase/reporting/json_report.py +45 -0
- ase/reporting/junit.py +38 -0
- ase/reporting/markdown.py +32 -0
- ase/reporting/terminal.py +66 -0
- ase/scenario/__init__.py +7 -0
- ase/scenario/model.py +294 -0
- ase/scenario/parser.py +40 -0
- ase/storage/__init__.py +7 -0
- ase/storage/trace_store.py +136 -0
- ase/trace/__init__.py +7 -0
- ase/trace/builder.py +175 -0
- ase/trace/model.py +264 -0
- ase/trace/otel_export.py +75 -0
- ase/trace/otel_import.py +96 -0
- ase/trace/redaction.py +10 -0
- ase/trace/serializer.py +50 -0
- ase_python-0.1.0.dist-info/METADATA +184 -0
- ase_python-0.1.0.dist-info/RECORD +69 -0
- ase_python-0.1.0.dist-info/WHEEL +4 -0
- ase_python-0.1.0.dist-info/entry_points.txt +2 -0
- ase_python-0.1.0.dist-info/licenses/LICENSE +105 -0
ase/cli/history_cmd.py
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
"""ase history — list and inspect stored trace runs."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import datetime
|
|
7
|
+
from typing import Annotated
|
|
8
|
+
|
|
9
|
+
import typer
|
|
10
|
+
from rich import box
|
|
11
|
+
from rich.console import Console
|
|
12
|
+
from rich.table import Table
|
|
13
|
+
|
|
14
|
+
from ase.storage.trace_store import TraceStore
|
|
15
|
+
from ase.trace.model import Trace
|
|
16
|
+
|
|
17
|
+
_console = Console()
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def run(
|
|
21
|
+
scenario: Annotated[str | None, typer.Option("--scenario", "-s")] = None,
|
|
22
|
+
status: Annotated[str | None, typer.Option("--status")] = None,
|
|
23
|
+
limit: Annotated[int, typer.Option("--limit", "-n")] = 20,
|
|
24
|
+
trace_id: Annotated[str | None, typer.Option("--trace-id")] = None,
|
|
25
|
+
) -> None:
|
|
26
|
+
"""List or inspect stored trace runs from the persistent trace store."""
|
|
27
|
+
asyncio.run(_run(scenario, status, limit, trace_id))
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
async def _run(
|
|
31
|
+
scenario: str | None,
|
|
32
|
+
status: str | None,
|
|
33
|
+
limit: int,
|
|
34
|
+
trace_id: str | None,
|
|
35
|
+
) -> None:
|
|
36
|
+
store = TraceStore()
|
|
37
|
+
await store.setup()
|
|
38
|
+
if trace_id:
|
|
39
|
+
await _show_trace(store, trace_id)
|
|
40
|
+
else:
|
|
41
|
+
await _list_traces(store, scenario, status, limit)
|
|
42
|
+
await store.close()
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
async def _show_trace(store: TraceStore, trace_id: str) -> None:
|
|
46
|
+
"""Fetch and display one trace with runtime and evaluation metadata."""
|
|
47
|
+
trace = await store.get_trace(trace_id)
|
|
48
|
+
if trace is None:
|
|
49
|
+
_console.print(f"[red]Trace not found: {trace_id}[/red]")
|
|
50
|
+
raise typer.Exit(code=1)
|
|
51
|
+
_console.print(f"\n[bold]Trace:[/bold] {trace.trace_id}")
|
|
52
|
+
_console.print(f"Scenario: {trace.scenario_id} — {trace.scenario_name}")
|
|
53
|
+
_console.print(f"Execution: [bold]{trace.status.value}[/bold]")
|
|
54
|
+
_console.print(f"Evaluation: [bold]{_evaluation_status(trace)}[/bold]")
|
|
55
|
+
_console.print(f"Runtime: [bold]{_runtime_label(trace)}[/bold]")
|
|
56
|
+
if trace.certification_level is not None:
|
|
57
|
+
_console.print(f"Certified: {trace.certification_level.value}")
|
|
58
|
+
if trace.evaluation is not None:
|
|
59
|
+
_console.print(f"ASE Score: {trace.evaluation.ase_score:.2f}")
|
|
60
|
+
_console.print(f"Duration: {trace.metrics.total_duration_ms:.0f}ms")
|
|
61
|
+
_console.print(f"Tool calls: {trace.metrics.total_tool_calls}")
|
|
62
|
+
if trace.error_message:
|
|
63
|
+
_console.print(f"\n[red]Error:[/red] {trace.error_message}")
|
|
64
|
+
if trace.stderr_output:
|
|
65
|
+
_console.print(f"\n[yellow]Stderr:[/yellow]\n{trace.stderr_output}")
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
async def _list_traces(
|
|
69
|
+
store: TraceStore,
|
|
70
|
+
scenario: str | None,
|
|
71
|
+
status: str | None,
|
|
72
|
+
limit: int,
|
|
73
|
+
) -> None:
|
|
74
|
+
"""Display a summary table of recent runs."""
|
|
75
|
+
rows = await store.list_traces(scenario_id=scenario, status=status, limit=limit)
|
|
76
|
+
if not rows:
|
|
77
|
+
_console.print("[yellow]No traces found.[/yellow]")
|
|
78
|
+
return
|
|
79
|
+
table = Table(title="Trace History", box=box.SIMPLE_HEAD, expand=False)
|
|
80
|
+
table.add_column("Trace ID", style="dim", no_wrap=True)
|
|
81
|
+
table.add_column("Scenario", style="bold")
|
|
82
|
+
table.add_column("Runtime")
|
|
83
|
+
table.add_column("Evaluation")
|
|
84
|
+
table.add_column("Execution")
|
|
85
|
+
table.add_column("Certified")
|
|
86
|
+
table.add_column("Score", justify="right")
|
|
87
|
+
table.add_column("Started At")
|
|
88
|
+
for row in rows:
|
|
89
|
+
score = row["ase_score"]
|
|
90
|
+
table.add_row(
|
|
91
|
+
row["trace_id"][:26],
|
|
92
|
+
row["scenario_id"],
|
|
93
|
+
row.get("runtime_mode") or "unknown",
|
|
94
|
+
row["evaluation_status"] or "unknown",
|
|
95
|
+
row["status"],
|
|
96
|
+
row.get("certification_level") or "—",
|
|
97
|
+
f"{score:.2f}" if score is not None else "—",
|
|
98
|
+
_ms_to_str(row["started_at_ms"]),
|
|
99
|
+
)
|
|
100
|
+
_console.print()
|
|
101
|
+
_console.print(table)
|
|
102
|
+
_console.print(f"\n[dim]Showing {len(rows)} of last {limit} runs.[/dim]\n")
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def _ms_to_str(ms: float | None) -> str:
|
|
106
|
+
"""Render millisecond timestamps as local human-readable strings."""
|
|
107
|
+
if ms is None:
|
|
108
|
+
return "—"
|
|
109
|
+
return datetime.datetime.fromtimestamp(ms / 1000).strftime("%Y-%m-%d %H:%M:%S")
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _evaluation_status(trace: Trace) -> str:
|
|
113
|
+
"""Return a human-readable evaluation outcome for a trace."""
|
|
114
|
+
if trace.evaluation is None:
|
|
115
|
+
return "unknown"
|
|
116
|
+
return "passed" if trace.evaluation.passed else "failed"
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def _runtime_label(trace: Trace) -> str:
|
|
120
|
+
"""Return a compact runtime label for trace detail views."""
|
|
121
|
+
if trace.runtime_provenance is None:
|
|
122
|
+
return "unknown"
|
|
123
|
+
framework = trace.runtime_provenance.framework
|
|
124
|
+
if framework:
|
|
125
|
+
return f"{trace.runtime_provenance.mode} ({framework})"
|
|
126
|
+
return trace.runtime_provenance.mode
|
ase/cli/import_cmd.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""ase import — convert external trace formats into native ASE traces."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Annotated
|
|
7
|
+
|
|
8
|
+
import typer
|
|
9
|
+
from rich.console import Console
|
|
10
|
+
|
|
11
|
+
from ase.cli._trace_outputs import write_trace_artifacts
|
|
12
|
+
from ase.config.model import OutputFormat
|
|
13
|
+
from ase.trace.otel_import import read_otel_trace
|
|
14
|
+
|
|
15
|
+
app = typer.Typer(help="Import external trace formats into native ASE traces.")
|
|
16
|
+
_console = Console()
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@app.command("otel")
|
|
20
|
+
def otel(
|
|
21
|
+
trace_file: Annotated[Path, typer.Argument(help="OTEL-like JSON trace file.")],
|
|
22
|
+
trace_out: Annotated[
|
|
23
|
+
Path | None,
|
|
24
|
+
typer.Option("--trace-out", help="Write native trace JSON here."),
|
|
25
|
+
] = None,
|
|
26
|
+
output: Annotated[OutputFormat | None, typer.Option("--output", "-o")] = None,
|
|
27
|
+
out_file: Annotated[Path | None, typer.Option("--out-file", "-f")] = None,
|
|
28
|
+
) -> None:
|
|
29
|
+
"""Import one OTEL-like JSON trace into native ASE format."""
|
|
30
|
+
trace = read_otel_trace(trace_file)
|
|
31
|
+
write_trace_artifacts(trace, trace_out=trace_out, output=output, out_file=out_file)
|
|
32
|
+
_console.print(f"trace_id: {trace.trace_id}")
|
|
33
|
+
if trace_out is not None:
|
|
34
|
+
_console.print(f"trace_out: {trace_out}")
|
ase/cli/main.py
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
"""ASE CLI entry point.
|
|
2
|
+
|
|
3
|
+
Defines the top-level Typer app and registers all sub-commands.
|
|
4
|
+
Run via:
|
|
5
|
+
ase watch | test | compare | report | init | history | doctor
|
|
6
|
+
ase spec | baseline | adapter | replay | import | certify
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from importlib import import_module
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Annotated
|
|
14
|
+
|
|
15
|
+
import typer
|
|
16
|
+
|
|
17
|
+
from ase.config.env_loader import load_local_dotenv
|
|
18
|
+
from ase.config.model import OutputFormat
|
|
19
|
+
|
|
20
|
+
# Load .env from the project root (or any parent directory) before any command
|
|
21
|
+
# runs. Keeping this in-repo avoids CLI boot failures caused by broken optional
|
|
22
|
+
# dotenv/plugin installs in the user's environment.
|
|
23
|
+
load_local_dotenv()
|
|
24
|
+
|
|
25
|
+
app = typer.Typer(
|
|
26
|
+
name="ase",
|
|
27
|
+
help="Agent Simulation Engine — pytest for AI agent tool calls.",
|
|
28
|
+
no_args_is_help=True,
|
|
29
|
+
pretty_exceptions_enable=False,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _run_test(
|
|
34
|
+
scenario: Annotated[
|
|
35
|
+
list[Path] | None,
|
|
36
|
+
typer.Argument(help="Scenario YAML file(s) or directories to run."),
|
|
37
|
+
] = None,
|
|
38
|
+
config: Annotated[
|
|
39
|
+
Path | None,
|
|
40
|
+
typer.Option("--config", "-c", help="Path to ase.yaml config file."),
|
|
41
|
+
] = None,
|
|
42
|
+
output: Annotated[
|
|
43
|
+
OutputFormat | None,
|
|
44
|
+
typer.Option("--output", "-o", help="Output format."),
|
|
45
|
+
] = None,
|
|
46
|
+
out_file: Annotated[
|
|
47
|
+
Path | None,
|
|
48
|
+
typer.Option("--out-file", "-f", help="Write report to this file."),
|
|
49
|
+
] = None,
|
|
50
|
+
fail_fast: Annotated[
|
|
51
|
+
bool,
|
|
52
|
+
typer.Option("--fail-fast", help="Stop after first failed scenario."),
|
|
53
|
+
] = False,
|
|
54
|
+
workers: Annotated[
|
|
55
|
+
int,
|
|
56
|
+
typer.Option("--workers", "-w", help="Max concurrent scenarios."),
|
|
57
|
+
] = 4,
|
|
58
|
+
debug: Annotated[
|
|
59
|
+
bool,
|
|
60
|
+
typer.Option("--debug", help="Stream agent stdout/stderr live to terminal."),
|
|
61
|
+
] = False,
|
|
62
|
+
) -> None:
|
|
63
|
+
"""Lazy-load ase test so non-proxy commands do not import mitmproxy."""
|
|
64
|
+
from ase.cli.test_cmd import run
|
|
65
|
+
run(scenario, config, output, out_file, fail_fast, workers, debug)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _run_watch(
|
|
69
|
+
ctx: typer.Context,
|
|
70
|
+
command: Annotated[
|
|
71
|
+
list[str] | None,
|
|
72
|
+
typer.Argument(help="Agent command to run, e.g. python agent.py"),
|
|
73
|
+
] = None,
|
|
74
|
+
port: Annotated[
|
|
75
|
+
int,
|
|
76
|
+
typer.Option("--port", "-p", help="Proxy port to listen on."),
|
|
77
|
+
] = 0,
|
|
78
|
+
timeout: Annotated[
|
|
79
|
+
int,
|
|
80
|
+
typer.Option("--timeout", "-t", help="Max seconds to wait for agent."),
|
|
81
|
+
] = 120,
|
|
82
|
+
) -> None:
|
|
83
|
+
"""Lazy-load ase watch so the proxy stack is optional at import time."""
|
|
84
|
+
from ase.cli.watch import run
|
|
85
|
+
run(ctx, command, port, timeout)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _run_doctor() -> None:
|
|
89
|
+
"""Lazy-load ase doctor so core CLI boot is resilient to optional modules."""
|
|
90
|
+
from ase.cli.doctor_cmd import run
|
|
91
|
+
run()
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _register_command(name: str, module_path: str, attr: str = "run") -> None:
|
|
95
|
+
"""Register one command only when its module is importable in this env."""
|
|
96
|
+
try:
|
|
97
|
+
module = import_module(module_path)
|
|
98
|
+
except ImportError:
|
|
99
|
+
return
|
|
100
|
+
app.command(name)(getattr(module, attr))
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _register_typer(name: str, module_path: str, attr: str = "app") -> None:
|
|
104
|
+
"""Register one Typer sub-app only when its module is importable."""
|
|
105
|
+
try:
|
|
106
|
+
module = import_module(module_path)
|
|
107
|
+
except ImportError:
|
|
108
|
+
return
|
|
109
|
+
app.add_typer(getattr(module, attr), name=name)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
app.command("watch")(_run_watch)
|
|
113
|
+
app.command("test")(_run_test)
|
|
114
|
+
app.command("doctor")(_run_doctor)
|
|
115
|
+
_register_command("compare", "ase.cli.compare")
|
|
116
|
+
_register_command("report", "ase.cli.report")
|
|
117
|
+
_register_command("certify", "ase.cli.certify_cmd")
|
|
118
|
+
_register_command("replay", "ase.cli.replay_cmd")
|
|
119
|
+
_register_command("init", "ase.cli.init_cmd")
|
|
120
|
+
_register_command("history", "ase.cli.history_cmd")
|
|
121
|
+
_register_typer("spec", "ase.cli.spec_cmd")
|
|
122
|
+
_register_typer("baseline", "ase.cli.baseline_cmd")
|
|
123
|
+
_register_typer("adapter", "ase.cli.adapter_cmd")
|
|
124
|
+
_register_typer("examples", "ase.cli.examples_cmd")
|
|
125
|
+
_register_typer("import", "ase.cli.import_cmd")
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def main() -> None:
|
|
129
|
+
"""Entry point called by the `ase` console script."""
|
|
130
|
+
app()
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
if __name__ == "__main__":
|
|
134
|
+
main()
|
ase/cli/replay_cmd.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""ase replay — convert adapter events into native ASE traces."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Annotated
|
|
7
|
+
|
|
8
|
+
import typer
|
|
9
|
+
from rich.console import Console
|
|
10
|
+
|
|
11
|
+
from ase.adapters.protocol import read_and_verify
|
|
12
|
+
from ase.adapters.replay import trace_from_adapter_events
|
|
13
|
+
from ase.cli._trace_outputs import write_trace_artifacts
|
|
14
|
+
from ase.config.model import OutputFormat
|
|
15
|
+
from ase.errors import AdapterProtocolError
|
|
16
|
+
|
|
17
|
+
_console = Console()
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def run(
|
|
21
|
+
event_file: Annotated[Path, typer.Argument(help="JSONL adapter event file.")],
|
|
22
|
+
scenario_id: Annotated[
|
|
23
|
+
str,
|
|
24
|
+
typer.Option("--scenario-id", help="Scenario id for the replayed trace."),
|
|
25
|
+
],
|
|
26
|
+
scenario_name: Annotated[
|
|
27
|
+
str,
|
|
28
|
+
typer.Option(
|
|
29
|
+
"--scenario-name",
|
|
30
|
+
help="Scenario name for the replayed trace.",
|
|
31
|
+
),
|
|
32
|
+
],
|
|
33
|
+
trace_out: Annotated[
|
|
34
|
+
Path | None,
|
|
35
|
+
typer.Option("--trace-out", help="Write native trace JSON here."),
|
|
36
|
+
] = None,
|
|
37
|
+
output: Annotated[OutputFormat | None, typer.Option("--output", "-o")] = None,
|
|
38
|
+
out_file: Annotated[Path | None, typer.Option("--out-file", "-f")] = None,
|
|
39
|
+
) -> None:
|
|
40
|
+
"""Replay one validated adapter event stream into a native ASE trace."""
|
|
41
|
+
events, result = read_and_verify(event_file)
|
|
42
|
+
if not result.passed:
|
|
43
|
+
raise AdapterProtocolError("adapter event stream failed verification")
|
|
44
|
+
trace = trace_from_adapter_events(events, scenario_id, scenario_name)
|
|
45
|
+
write_trace_artifacts(trace, trace_out=trace_out, output=output, out_file=out_file)
|
|
46
|
+
_console.print(f"trace_id: {trace.trace_id}")
|
|
47
|
+
if trace_out is not None:
|
|
48
|
+
_console.print(f"trace_out: {trace_out}")
|
ase/cli/report.py
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
"""ase report — render one saved trace for operators and CI."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Annotated
|
|
8
|
+
|
|
9
|
+
import typer
|
|
10
|
+
from rich.console import Console
|
|
11
|
+
|
|
12
|
+
from ase.config.model import OutputFormat
|
|
13
|
+
from ase.errors import CLIError, TraceSerializationError
|
|
14
|
+
from ase.trace.model import Trace
|
|
15
|
+
|
|
16
|
+
_console = Console()
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def run(
|
|
20
|
+
trace_file: Annotated[Path, typer.Argument(help="Native ASE trace JSON file.")],
|
|
21
|
+
output: Annotated[
|
|
22
|
+
OutputFormat,
|
|
23
|
+
typer.Option("--output", "-o", help="Output format."),
|
|
24
|
+
] = OutputFormat.TERMINAL,
|
|
25
|
+
out_file: Annotated[
|
|
26
|
+
Path | None,
|
|
27
|
+
typer.Option("--out-file", "-f", help="Write the rendered report to this file."),
|
|
28
|
+
] = None,
|
|
29
|
+
) -> None:
|
|
30
|
+
"""Render a trace in a compact operator-facing or machine-readable format."""
|
|
31
|
+
trace = _load_trace(trace_file)
|
|
32
|
+
rendered = _render_trace(trace, output)
|
|
33
|
+
if out_file is not None:
|
|
34
|
+
suffix = "\n" if not rendered.endswith("\n") else ""
|
|
35
|
+
out_file.write_text(rendered + suffix, encoding="utf-8")
|
|
36
|
+
return
|
|
37
|
+
_console.print(rendered)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _load_trace(path: Path) -> Trace:
|
|
41
|
+
"""Load one native ASE trace with contextual parse errors."""
|
|
42
|
+
try:
|
|
43
|
+
return Trace.model_validate_json(path.read_text(encoding="utf-8"))
|
|
44
|
+
except OSError as exc:
|
|
45
|
+
raise TraceSerializationError(f"failed to read trace file {path}: {exc}") from exc
|
|
46
|
+
except Exception as exc: # noqa: BLE001
|
|
47
|
+
raise TraceSerializationError(f"failed to parse trace file {path}: {exc}") from exc
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _render_trace(trace: Trace, output: OutputFormat) -> str:
|
|
51
|
+
"""Choose the stable renderer for the requested output format."""
|
|
52
|
+
if output == OutputFormat.JSON:
|
|
53
|
+
return json.dumps(trace.model_dump(mode="json"), indent=2)
|
|
54
|
+
if output == OutputFormat.MARKDOWN:
|
|
55
|
+
return _to_markdown(trace)
|
|
56
|
+
if output == OutputFormat.OTEL_JSON:
|
|
57
|
+
return _to_otel_json(trace)
|
|
58
|
+
if output == OutputFormat.TERMINAL:
|
|
59
|
+
return _to_terminal_text(trace)
|
|
60
|
+
raise CLIError(f"unsupported report output format: {output}")
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _to_terminal_text(trace: Trace) -> str:
|
|
64
|
+
"""Summarize the key execution, runtime, and evaluation facts for operators."""
|
|
65
|
+
evaluation = trace.evaluation
|
|
66
|
+
runtime = trace.runtime_provenance
|
|
67
|
+
lines = [
|
|
68
|
+
f"trace_id: {trace.trace_id}",
|
|
69
|
+
f"scenario: {trace.scenario_id}",
|
|
70
|
+
f"status: {trace.status}",
|
|
71
|
+
f"runtime_mode: {runtime.mode if runtime else 'unknown'}",
|
|
72
|
+
f"framework: {runtime.framework if runtime and runtime.framework else 'unknown'}",
|
|
73
|
+
f"tool_calls: {trace.metrics.total_tool_calls}",
|
|
74
|
+
f"llm_calls: {trace.metrics.total_llm_calls}",
|
|
75
|
+
]
|
|
76
|
+
if evaluation is not None:
|
|
77
|
+
assertions = (
|
|
78
|
+
f"{evaluation.passed_count} passed / "
|
|
79
|
+
f"{evaluation.failed_count} failed / "
|
|
80
|
+
f"{evaluation.total} total"
|
|
81
|
+
)
|
|
82
|
+
lines.extend(
|
|
83
|
+
[
|
|
84
|
+
f"evaluation: {'passed' if evaluation.passed else 'failed'}",
|
|
85
|
+
f"ase_score: {evaluation.ase_score:.2f}",
|
|
86
|
+
f"assertions: {assertions}",
|
|
87
|
+
]
|
|
88
|
+
)
|
|
89
|
+
return "\n".join(lines)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _to_markdown(trace: Trace) -> str:
|
|
93
|
+
"""Render a short Markdown report suitable for CI summaries and PR comments."""
|
|
94
|
+
evaluation = trace.evaluation
|
|
95
|
+
runtime = trace.runtime_provenance
|
|
96
|
+
lines = [
|
|
97
|
+
"# ASE Trace Report",
|
|
98
|
+
"",
|
|
99
|
+
f"- Trace ID: `{trace.trace_id}`",
|
|
100
|
+
f"- Scenario: `{trace.scenario_id}`",
|
|
101
|
+
f"- Status: `{trace.status}`",
|
|
102
|
+
f"- Runtime: `{runtime.mode if runtime else 'unknown'}`",
|
|
103
|
+
f"- Framework: `{runtime.framework if runtime and runtime.framework else 'unknown'}`",
|
|
104
|
+
f"- Tool calls: `{trace.metrics.total_tool_calls}`",
|
|
105
|
+
]
|
|
106
|
+
if evaluation is not None:
|
|
107
|
+
lines.append(f"- ASE score: `{evaluation.ase_score:.2f}`")
|
|
108
|
+
return "\n".join(lines)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _to_otel_json(trace: Trace) -> str:
|
|
112
|
+
"""Delegate OTEL-like export to the trace interoperability layer."""
|
|
113
|
+
from ase.trace.otel_export import to_otel_dict
|
|
114
|
+
|
|
115
|
+
return json.dumps(to_otel_dict(trace), indent=2)
|
ase/cli/spec_cmd.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
"""ase spec — validate scenarios and expose their JSON schema."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Annotated
|
|
8
|
+
|
|
9
|
+
import typer
|
|
10
|
+
from rich.console import Console
|
|
11
|
+
|
|
12
|
+
from ase.scenario.parser import parse_file, schema_path
|
|
13
|
+
|
|
14
|
+
app = typer.Typer(help="Validate ASE scenarios and inspect the public schema.")
|
|
15
|
+
_console = Console()
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@app.command("validate")
|
|
19
|
+
def validate(
|
|
20
|
+
scenario: Annotated[Path, typer.Argument(help="Scenario YAML file to validate.")],
|
|
21
|
+
) -> None:
|
|
22
|
+
"""Validate one scenario file against ASE's current scenario model."""
|
|
23
|
+
config = parse_file(scenario)
|
|
24
|
+
_console.print(f"valid: {config.scenario_id}")
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@app.command("print-schema")
|
|
28
|
+
def print_schema(
|
|
29
|
+
kind: Annotated[
|
|
30
|
+
str,
|
|
31
|
+
typer.Option("--kind", help="scenario | conformance-manifest | conformance-result"),
|
|
32
|
+
] = "scenario",
|
|
33
|
+
path: Annotated[bool, typer.Option("--path", help="Print the schema file path only.")] = False,
|
|
34
|
+
) -> None:
|
|
35
|
+
"""Print the public scenario schema or its repo path."""
|
|
36
|
+
target = _schema_path(kind)
|
|
37
|
+
if path:
|
|
38
|
+
_console.print(str(target))
|
|
39
|
+
return
|
|
40
|
+
payload = json.loads(target.read_text(encoding="utf-8"))
|
|
41
|
+
_console.print(json.dumps(payload, indent=2))
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _schema_path(kind: str) -> Path:
|
|
45
|
+
"""Resolve public schema aliases onto repo paths."""
|
|
46
|
+
if kind == "scenario":
|
|
47
|
+
return schema_path()
|
|
48
|
+
root = schema_path().parent
|
|
49
|
+
if kind == "conformance-manifest":
|
|
50
|
+
return root / "ase_conformance_manifest.schema.json"
|
|
51
|
+
if kind == "conformance-result":
|
|
52
|
+
return root / "ase_conformance_result.schema.json"
|
|
53
|
+
raise typer.BadParameter(f"unknown schema kind: {kind}")
|
ase/cli/test_cmd.py
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
"""ase test — run adapter-backed scenarios through ASE's evaluation flow."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import subprocess
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Annotated
|
|
8
|
+
|
|
9
|
+
import typer
|
|
10
|
+
from rich.console import Console
|
|
11
|
+
|
|
12
|
+
from ase.adapters.protocol import read_and_verify
|
|
13
|
+
from ase.adapters.replay import trace_from_adapter_events
|
|
14
|
+
from ase.config.model import OutputFormat
|
|
15
|
+
from ase.errors import CLIError
|
|
16
|
+
from ase.evaluation.engine import EvaluationEngine
|
|
17
|
+
from ase.evaluation.trace_summary import attach_summary
|
|
18
|
+
from ase.scenario.model import AgentRuntimeMode, ScenarioConfig
|
|
19
|
+
from ase.scenario.parser import parse_file
|
|
20
|
+
from ase.storage.trace_store import TraceStore
|
|
21
|
+
from ase.trace.model import TraceStatus
|
|
22
|
+
|
|
23
|
+
_console = Console()
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def run(
|
|
27
|
+
scenario: Annotated[list[Path] | None, typer.Argument()] = None,
|
|
28
|
+
config: Annotated[Path | None, typer.Option("--config", "-c")] = None,
|
|
29
|
+
output: Annotated[OutputFormat | None, typer.Option("--output", "-o")] = None,
|
|
30
|
+
out_file: Annotated[Path | None, typer.Option("--out-file", "-f")] = None,
|
|
31
|
+
fail_fast: Annotated[bool, typer.Option("--fail-fast")] = False,
|
|
32
|
+
workers: Annotated[int, typer.Option("--workers", "-w")] = 4,
|
|
33
|
+
debug: Annotated[bool, typer.Option("--debug")] = False,
|
|
34
|
+
) -> None:
|
|
35
|
+
"""Run adapter-backed scenarios through replay, evaluation, and persistence."""
|
|
36
|
+
del config, output, out_file, workers, debug
|
|
37
|
+
scenario_paths = scenario or []
|
|
38
|
+
if not scenario_paths:
|
|
39
|
+
raise typer.Exit(code=1)
|
|
40
|
+
store = TraceStore()
|
|
41
|
+
_run_all(scenario_paths, store, fail_fast=fail_fast)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _run_all(paths: list[Path], store: TraceStore, *, fail_fast: bool) -> None:
|
|
45
|
+
"""Execute all requested scenarios and stop early only when requested."""
|
|
46
|
+
import asyncio
|
|
47
|
+
|
|
48
|
+
asyncio.run(store.setup())
|
|
49
|
+
failures = 0
|
|
50
|
+
for path in paths:
|
|
51
|
+
try:
|
|
52
|
+
_run_one(path, store)
|
|
53
|
+
except CLIError as exc:
|
|
54
|
+
failures += 1
|
|
55
|
+
_console.print(f"[red]{exc}[/red]")
|
|
56
|
+
if fail_fast:
|
|
57
|
+
break
|
|
58
|
+
asyncio.run(store.close())
|
|
59
|
+
if failures:
|
|
60
|
+
raise typer.Exit(code=1)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _run_one(path: Path, store: TraceStore) -> None:
|
|
64
|
+
"""Execute one adapter scenario end to end and persist its trace."""
|
|
65
|
+
scenario = parse_file(path)
|
|
66
|
+
if scenario.runtime_mode != AgentRuntimeMode.ADAPTER:
|
|
67
|
+
raise CLIError(f"recovered ase test currently supports adapter mode only: {path}")
|
|
68
|
+
event_path = _event_path(path, scenario)
|
|
69
|
+
result = _run_agent(scenario, event_path)
|
|
70
|
+
events, verification = read_and_verify(event_path)
|
|
71
|
+
if not verification.passed:
|
|
72
|
+
details = ", ".join(verification.errors)
|
|
73
|
+
raise CLIError(f"adapter event stream failed verification: {details}")
|
|
74
|
+
trace = trace_from_adapter_events(events, scenario.scenario_id, scenario.name)
|
|
75
|
+
trace.stderr_output = result.stderr.strip() or None
|
|
76
|
+
if result.returncode != 0:
|
|
77
|
+
trace.status = TraceStatus.FAILED
|
|
78
|
+
trace.error_message = trace.stderr_output or f"agent exited with code {result.returncode}"
|
|
79
|
+
evaluators = list(scenario.assertions) + list(scenario.policies)
|
|
80
|
+
summary = EvaluationEngine().evaluate(trace, evaluators, {})
|
|
81
|
+
attach_summary(trace, summary)
|
|
82
|
+
import asyncio
|
|
83
|
+
|
|
84
|
+
asyncio.run(store.save_trace(trace, ase_score=summary.ase_score))
|
|
85
|
+
_render_summary(trace, summary)
|
|
86
|
+
if not summary.passed or result.returncode != 0:
|
|
87
|
+
raise CLIError(f"scenario failed: {scenario.scenario_id}")
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _run_agent(scenario: ScenarioConfig, event_path: Path) -> subprocess.CompletedProcess[str]:
|
|
91
|
+
"""Run the scenario agent with the event sink path exported to the process."""
|
|
92
|
+
event_path.unlink(missing_ok=True)
|
|
93
|
+
env = dict(scenario.agent.env)
|
|
94
|
+
env.update({"ASE_ADAPTER_EVENT_SOURCE": str(event_path)})
|
|
95
|
+
return subprocess.run(
|
|
96
|
+
scenario.agent.command,
|
|
97
|
+
cwd=Path.cwd(),
|
|
98
|
+
env={**dict(__import__("os").environ), **env},
|
|
99
|
+
capture_output=True,
|
|
100
|
+
text=True,
|
|
101
|
+
check=False,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def _event_path(path: Path, scenario: ScenarioConfig) -> Path:
|
|
106
|
+
"""Resolve an adapter event file relative to the scenario file location."""
|
|
107
|
+
runtime = scenario.agent_runtime
|
|
108
|
+
if runtime is None or not runtime.event_source:
|
|
109
|
+
raise CLIError(f"adapter runtime missing event_source: {path}")
|
|
110
|
+
source = Path(runtime.event_source)
|
|
111
|
+
return source if source.is_absolute() else path.resolve().parent / source
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def _render_summary(trace: object, summary: object) -> None:
|
|
115
|
+
"""Print a compact operator-facing outcome for the recovered test path."""
|
|
116
|
+
trace_id = getattr(trace, "trace_id", "unknown")
|
|
117
|
+
scenario_id = getattr(trace, "scenario_id", "unknown")
|
|
118
|
+
passed = getattr(summary, "passed", False)
|
|
119
|
+
score = getattr(summary, "ase_score", 0.0)
|
|
120
|
+
status = "[green]PASS[/green]" if passed else "[red]FAIL[/red]"
|
|
121
|
+
_console.print(f"{status} {scenario_id} trace={trace_id} ase_score={score:.2f}")
|