ase-python 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. ase/__init__.py +21 -0
  2. ase/adapters/__init__.py +14 -0
  3. ase/adapters/contract.py +28 -0
  4. ase/adapters/frameworks/__init__.py +17 -0
  5. ase/adapters/frameworks/base.py +259 -0
  6. ase/adapters/frameworks/langgraph.py +19 -0
  7. ase/adapters/frameworks/mcp.py +68 -0
  8. ase/adapters/frameworks/openai_agents.py +19 -0
  9. ase/adapters/frameworks/pydantic_ai.py +19 -0
  10. ase/adapters/io.py +50 -0
  11. ase/adapters/model.py +89 -0
  12. ase/adapters/protocol.py +72 -0
  13. ase/adapters/replay.py +261 -0
  14. ase/cli/__init__.py +7 -0
  15. ase/cli/_trace_outputs.py +40 -0
  16. ase/cli/adapter_cmd.py +38 -0
  17. ase/cli/certify_cmd.py +74 -0
  18. ase/cli/compare.py +145 -0
  19. ase/cli/doctor_cmd.py +45 -0
  20. ase/cli/examples_cmd.py +27 -0
  21. ase/cli/history_cmd.py +126 -0
  22. ase/cli/import_cmd.py +34 -0
  23. ase/cli/main.py +134 -0
  24. ase/cli/replay_cmd.py +48 -0
  25. ase/cli/report.py +115 -0
  26. ase/cli/spec_cmd.py +53 -0
  27. ase/cli/test_cmd.py +121 -0
  28. ase/config/env_loader.py +71 -0
  29. ase/config/loader.py +82 -0
  30. ase/config/model.py +51 -0
  31. ase/conformance/__init__.py +7 -0
  32. ase/conformance/matrix.py +111 -0
  33. ase/conformance/model.py +91 -0
  34. ase/conformance/schema.py +37 -0
  35. ase/conformance/service.py +194 -0
  36. ase/core/engine.py +348 -0
  37. ase/errors.py +59 -0
  38. ase/evaluation/__init__.py +7 -0
  39. ase/evaluation/base.py +63 -0
  40. ase/evaluation/consistency.py +79 -0
  41. ase/evaluation/correctness.py +117 -0
  42. ase/evaluation/efficiency.py +145 -0
  43. ase/evaluation/engine.py +182 -0
  44. ase/evaluation/policy.py +134 -0
  45. ase/evaluation/scoring.py +64 -0
  46. ase/evaluation/trace_summary.py +36 -0
  47. ase/examples_matrix.py +118 -0
  48. ase/reporting/__init__.py +7 -0
  49. ase/reporting/json_report.py +45 -0
  50. ase/reporting/junit.py +38 -0
  51. ase/reporting/markdown.py +32 -0
  52. ase/reporting/terminal.py +66 -0
  53. ase/scenario/__init__.py +7 -0
  54. ase/scenario/model.py +294 -0
  55. ase/scenario/parser.py +40 -0
  56. ase/storage/__init__.py +7 -0
  57. ase/storage/trace_store.py +136 -0
  58. ase/trace/__init__.py +7 -0
  59. ase/trace/builder.py +175 -0
  60. ase/trace/model.py +264 -0
  61. ase/trace/otel_export.py +75 -0
  62. ase/trace/otel_import.py +96 -0
  63. ase/trace/redaction.py +10 -0
  64. ase/trace/serializer.py +50 -0
  65. ase_python-0.1.0.dist-info/METADATA +184 -0
  66. ase_python-0.1.0.dist-info/RECORD +69 -0
  67. ase_python-0.1.0.dist-info/WHEEL +4 -0
  68. ase_python-0.1.0.dist-info/entry_points.txt +2 -0
  69. ase_python-0.1.0.dist-info/licenses/LICENSE +105 -0
ase/cli/history_cmd.py ADDED
@@ -0,0 +1,126 @@
1
+ """ase history — list and inspect stored trace runs."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import datetime
7
+ from typing import Annotated
8
+
9
+ import typer
10
+ from rich import box
11
+ from rich.console import Console
12
+ from rich.table import Table
13
+
14
+ from ase.storage.trace_store import TraceStore
15
+ from ase.trace.model import Trace
16
+
17
+ _console = Console()
18
+
19
+
20
+ def run(
21
+ scenario: Annotated[str | None, typer.Option("--scenario", "-s")] = None,
22
+ status: Annotated[str | None, typer.Option("--status")] = None,
23
+ limit: Annotated[int, typer.Option("--limit", "-n")] = 20,
24
+ trace_id: Annotated[str | None, typer.Option("--trace-id")] = None,
25
+ ) -> None:
26
+ """List or inspect stored trace runs from the persistent trace store."""
27
+ asyncio.run(_run(scenario, status, limit, trace_id))
28
+
29
+
30
+ async def _run(
31
+ scenario: str | None,
32
+ status: str | None,
33
+ limit: int,
34
+ trace_id: str | None,
35
+ ) -> None:
36
+ store = TraceStore()
37
+ await store.setup()
38
+ if trace_id:
39
+ await _show_trace(store, trace_id)
40
+ else:
41
+ await _list_traces(store, scenario, status, limit)
42
+ await store.close()
43
+
44
+
45
+ async def _show_trace(store: TraceStore, trace_id: str) -> None:
46
+ """Fetch and display one trace with runtime and evaluation metadata."""
47
+ trace = await store.get_trace(trace_id)
48
+ if trace is None:
49
+ _console.print(f"[red]Trace not found: {trace_id}[/red]")
50
+ raise typer.Exit(code=1)
51
+ _console.print(f"\n[bold]Trace:[/bold] {trace.trace_id}")
52
+ _console.print(f"Scenario: {trace.scenario_id} — {trace.scenario_name}")
53
+ _console.print(f"Execution: [bold]{trace.status.value}[/bold]")
54
+ _console.print(f"Evaluation: [bold]{_evaluation_status(trace)}[/bold]")
55
+ _console.print(f"Runtime: [bold]{_runtime_label(trace)}[/bold]")
56
+ if trace.certification_level is not None:
57
+ _console.print(f"Certified: {trace.certification_level.value}")
58
+ if trace.evaluation is not None:
59
+ _console.print(f"ASE Score: {trace.evaluation.ase_score:.2f}")
60
+ _console.print(f"Duration: {trace.metrics.total_duration_ms:.0f}ms")
61
+ _console.print(f"Tool calls: {trace.metrics.total_tool_calls}")
62
+ if trace.error_message:
63
+ _console.print(f"\n[red]Error:[/red] {trace.error_message}")
64
+ if trace.stderr_output:
65
+ _console.print(f"\n[yellow]Stderr:[/yellow]\n{trace.stderr_output}")
66
+
67
+
68
+ async def _list_traces(
69
+ store: TraceStore,
70
+ scenario: str | None,
71
+ status: str | None,
72
+ limit: int,
73
+ ) -> None:
74
+ """Display a summary table of recent runs."""
75
+ rows = await store.list_traces(scenario_id=scenario, status=status, limit=limit)
76
+ if not rows:
77
+ _console.print("[yellow]No traces found.[/yellow]")
78
+ return
79
+ table = Table(title="Trace History", box=box.SIMPLE_HEAD, expand=False)
80
+ table.add_column("Trace ID", style="dim", no_wrap=True)
81
+ table.add_column("Scenario", style="bold")
82
+ table.add_column("Runtime")
83
+ table.add_column("Evaluation")
84
+ table.add_column("Execution")
85
+ table.add_column("Certified")
86
+ table.add_column("Score", justify="right")
87
+ table.add_column("Started At")
88
+ for row in rows:
89
+ score = row["ase_score"]
90
+ table.add_row(
91
+ row["trace_id"][:26],
92
+ row["scenario_id"],
93
+ row.get("runtime_mode") or "unknown",
94
+ row["evaluation_status"] or "unknown",
95
+ row["status"],
96
+ row.get("certification_level") or "—",
97
+ f"{score:.2f}" if score is not None else "—",
98
+ _ms_to_str(row["started_at_ms"]),
99
+ )
100
+ _console.print()
101
+ _console.print(table)
102
+ _console.print(f"\n[dim]Showing {len(rows)} of last {limit} runs.[/dim]\n")
103
+
104
+
105
+ def _ms_to_str(ms: float | None) -> str:
106
+ """Render millisecond timestamps as local human-readable strings."""
107
+ if ms is None:
108
+ return "—"
109
+ return datetime.datetime.fromtimestamp(ms / 1000).strftime("%Y-%m-%d %H:%M:%S")
110
+
111
+
112
+ def _evaluation_status(trace: Trace) -> str:
113
+ """Return a human-readable evaluation outcome for a trace."""
114
+ if trace.evaluation is None:
115
+ return "unknown"
116
+ return "passed" if trace.evaluation.passed else "failed"
117
+
118
+
119
+ def _runtime_label(trace: Trace) -> str:
120
+ """Return a compact runtime label for trace detail views."""
121
+ if trace.runtime_provenance is None:
122
+ return "unknown"
123
+ framework = trace.runtime_provenance.framework
124
+ if framework:
125
+ return f"{trace.runtime_provenance.mode} ({framework})"
126
+ return trace.runtime_provenance.mode
ase/cli/import_cmd.py ADDED
@@ -0,0 +1,34 @@
1
+ """ase import — convert external trace formats into native ASE traces."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from typing import Annotated
7
+
8
+ import typer
9
+ from rich.console import Console
10
+
11
+ from ase.cli._trace_outputs import write_trace_artifacts
12
+ from ase.config.model import OutputFormat
13
+ from ase.trace.otel_import import read_otel_trace
14
+
15
+ app = typer.Typer(help="Import external trace formats into native ASE traces.")
16
+ _console = Console()
17
+
18
+
19
+ @app.command("otel")
20
+ def otel(
21
+ trace_file: Annotated[Path, typer.Argument(help="OTEL-like JSON trace file.")],
22
+ trace_out: Annotated[
23
+ Path | None,
24
+ typer.Option("--trace-out", help="Write native trace JSON here."),
25
+ ] = None,
26
+ output: Annotated[OutputFormat | None, typer.Option("--output", "-o")] = None,
27
+ out_file: Annotated[Path | None, typer.Option("--out-file", "-f")] = None,
28
+ ) -> None:
29
+ """Import one OTEL-like JSON trace into native ASE format."""
30
+ trace = read_otel_trace(trace_file)
31
+ write_trace_artifacts(trace, trace_out=trace_out, output=output, out_file=out_file)
32
+ _console.print(f"trace_id: {trace.trace_id}")
33
+ if trace_out is not None:
34
+ _console.print(f"trace_out: {trace_out}")
ase/cli/main.py ADDED
@@ -0,0 +1,134 @@
1
+ """ASE CLI entry point.
2
+
3
+ Defines the top-level Typer app and registers all sub-commands.
4
+ Run via:
5
+ ase watch | test | compare | report | init | history | doctor
6
+ ase spec | baseline | adapter | replay | import | certify
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from importlib import import_module
12
+ from pathlib import Path
13
+ from typing import Annotated
14
+
15
+ import typer
16
+
17
+ from ase.config.env_loader import load_local_dotenv
18
+ from ase.config.model import OutputFormat
19
+
20
+ # Load .env from the project root (or any parent directory) before any command
21
+ # runs. Keeping this in-repo avoids CLI boot failures caused by broken optional
22
+ # dotenv/plugin installs in the user's environment.
23
+ load_local_dotenv()
24
+
25
+ app = typer.Typer(
26
+ name="ase",
27
+ help="Agent Simulation Engine — pytest for AI agent tool calls.",
28
+ no_args_is_help=True,
29
+ pretty_exceptions_enable=False,
30
+ )
31
+
32
+
33
+ def _run_test(
34
+ scenario: Annotated[
35
+ list[Path] | None,
36
+ typer.Argument(help="Scenario YAML file(s) or directories to run."),
37
+ ] = None,
38
+ config: Annotated[
39
+ Path | None,
40
+ typer.Option("--config", "-c", help="Path to ase.yaml config file."),
41
+ ] = None,
42
+ output: Annotated[
43
+ OutputFormat | None,
44
+ typer.Option("--output", "-o", help="Output format."),
45
+ ] = None,
46
+ out_file: Annotated[
47
+ Path | None,
48
+ typer.Option("--out-file", "-f", help="Write report to this file."),
49
+ ] = None,
50
+ fail_fast: Annotated[
51
+ bool,
52
+ typer.Option("--fail-fast", help="Stop after first failed scenario."),
53
+ ] = False,
54
+ workers: Annotated[
55
+ int,
56
+ typer.Option("--workers", "-w", help="Max concurrent scenarios."),
57
+ ] = 4,
58
+ debug: Annotated[
59
+ bool,
60
+ typer.Option("--debug", help="Stream agent stdout/stderr live to terminal."),
61
+ ] = False,
62
+ ) -> None:
63
+ """Lazy-load ase test so non-proxy commands do not import mitmproxy."""
64
+ from ase.cli.test_cmd import run
65
+ run(scenario, config, output, out_file, fail_fast, workers, debug)
66
+
67
+
68
+ def _run_watch(
69
+ ctx: typer.Context,
70
+ command: Annotated[
71
+ list[str] | None,
72
+ typer.Argument(help="Agent command to run, e.g. python agent.py"),
73
+ ] = None,
74
+ port: Annotated[
75
+ int,
76
+ typer.Option("--port", "-p", help="Proxy port to listen on."),
77
+ ] = 0,
78
+ timeout: Annotated[
79
+ int,
80
+ typer.Option("--timeout", "-t", help="Max seconds to wait for agent."),
81
+ ] = 120,
82
+ ) -> None:
83
+ """Lazy-load ase watch so the proxy stack is optional at import time."""
84
+ from ase.cli.watch import run
85
+ run(ctx, command, port, timeout)
86
+
87
+
88
+ def _run_doctor() -> None:
89
+ """Lazy-load ase doctor so core CLI boot is resilient to optional modules."""
90
+ from ase.cli.doctor_cmd import run
91
+ run()
92
+
93
+
94
+ def _register_command(name: str, module_path: str, attr: str = "run") -> None:
95
+ """Register one command only when its module is importable in this env."""
96
+ try:
97
+ module = import_module(module_path)
98
+ except ImportError:
99
+ return
100
+ app.command(name)(getattr(module, attr))
101
+
102
+
103
+ def _register_typer(name: str, module_path: str, attr: str = "app") -> None:
104
+ """Register one Typer sub-app only when its module is importable."""
105
+ try:
106
+ module = import_module(module_path)
107
+ except ImportError:
108
+ return
109
+ app.add_typer(getattr(module, attr), name=name)
110
+
111
+
112
+ app.command("watch")(_run_watch)
113
+ app.command("test")(_run_test)
114
+ app.command("doctor")(_run_doctor)
115
+ _register_command("compare", "ase.cli.compare")
116
+ _register_command("report", "ase.cli.report")
117
+ _register_command("certify", "ase.cli.certify_cmd")
118
+ _register_command("replay", "ase.cli.replay_cmd")
119
+ _register_command("init", "ase.cli.init_cmd")
120
+ _register_command("history", "ase.cli.history_cmd")
121
+ _register_typer("spec", "ase.cli.spec_cmd")
122
+ _register_typer("baseline", "ase.cli.baseline_cmd")
123
+ _register_typer("adapter", "ase.cli.adapter_cmd")
124
+ _register_typer("examples", "ase.cli.examples_cmd")
125
+ _register_typer("import", "ase.cli.import_cmd")
126
+
127
+
128
+ def main() -> None:
129
+ """Entry point called by the `ase` console script."""
130
+ app()
131
+
132
+
133
+ if __name__ == "__main__":
134
+ main()
ase/cli/replay_cmd.py ADDED
@@ -0,0 +1,48 @@
1
+ """ase replay — convert adapter events into native ASE traces."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from typing import Annotated
7
+
8
+ import typer
9
+ from rich.console import Console
10
+
11
+ from ase.adapters.protocol import read_and_verify
12
+ from ase.adapters.replay import trace_from_adapter_events
13
+ from ase.cli._trace_outputs import write_trace_artifacts
14
+ from ase.config.model import OutputFormat
15
+ from ase.errors import AdapterProtocolError
16
+
17
+ _console = Console()
18
+
19
+
20
+ def run(
21
+ event_file: Annotated[Path, typer.Argument(help="JSONL adapter event file.")],
22
+ scenario_id: Annotated[
23
+ str,
24
+ typer.Option("--scenario-id", help="Scenario id for the replayed trace."),
25
+ ],
26
+ scenario_name: Annotated[
27
+ str,
28
+ typer.Option(
29
+ "--scenario-name",
30
+ help="Scenario name for the replayed trace.",
31
+ ),
32
+ ],
33
+ trace_out: Annotated[
34
+ Path | None,
35
+ typer.Option("--trace-out", help="Write native trace JSON here."),
36
+ ] = None,
37
+ output: Annotated[OutputFormat | None, typer.Option("--output", "-o")] = None,
38
+ out_file: Annotated[Path | None, typer.Option("--out-file", "-f")] = None,
39
+ ) -> None:
40
+ """Replay one validated adapter event stream into a native ASE trace."""
41
+ events, result = read_and_verify(event_file)
42
+ if not result.passed:
43
+ raise AdapterProtocolError("adapter event stream failed verification")
44
+ trace = trace_from_adapter_events(events, scenario_id, scenario_name)
45
+ write_trace_artifacts(trace, trace_out=trace_out, output=output, out_file=out_file)
46
+ _console.print(f"trace_id: {trace.trace_id}")
47
+ if trace_out is not None:
48
+ _console.print(f"trace_out: {trace_out}")
ase/cli/report.py ADDED
@@ -0,0 +1,115 @@
1
+ """ase report — render one saved trace for operators and CI."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from pathlib import Path
7
+ from typing import Annotated
8
+
9
+ import typer
10
+ from rich.console import Console
11
+
12
+ from ase.config.model import OutputFormat
13
+ from ase.errors import CLIError, TraceSerializationError
14
+ from ase.trace.model import Trace
15
+
16
+ _console = Console()
17
+
18
+
19
+ def run(
20
+ trace_file: Annotated[Path, typer.Argument(help="Native ASE trace JSON file.")],
21
+ output: Annotated[
22
+ OutputFormat,
23
+ typer.Option("--output", "-o", help="Output format."),
24
+ ] = OutputFormat.TERMINAL,
25
+ out_file: Annotated[
26
+ Path | None,
27
+ typer.Option("--out-file", "-f", help="Write the rendered report to this file."),
28
+ ] = None,
29
+ ) -> None:
30
+ """Render a trace in a compact operator-facing or machine-readable format."""
31
+ trace = _load_trace(trace_file)
32
+ rendered = _render_trace(trace, output)
33
+ if out_file is not None:
34
+ suffix = "\n" if not rendered.endswith("\n") else ""
35
+ out_file.write_text(rendered + suffix, encoding="utf-8")
36
+ return
37
+ _console.print(rendered)
38
+
39
+
40
+ def _load_trace(path: Path) -> Trace:
41
+ """Load one native ASE trace with contextual parse errors."""
42
+ try:
43
+ return Trace.model_validate_json(path.read_text(encoding="utf-8"))
44
+ except OSError as exc:
45
+ raise TraceSerializationError(f"failed to read trace file {path}: {exc}") from exc
46
+ except Exception as exc: # noqa: BLE001
47
+ raise TraceSerializationError(f"failed to parse trace file {path}: {exc}") from exc
48
+
49
+
50
+ def _render_trace(trace: Trace, output: OutputFormat) -> str:
51
+ """Choose the stable renderer for the requested output format."""
52
+ if output == OutputFormat.JSON:
53
+ return json.dumps(trace.model_dump(mode="json"), indent=2)
54
+ if output == OutputFormat.MARKDOWN:
55
+ return _to_markdown(trace)
56
+ if output == OutputFormat.OTEL_JSON:
57
+ return _to_otel_json(trace)
58
+ if output == OutputFormat.TERMINAL:
59
+ return _to_terminal_text(trace)
60
+ raise CLIError(f"unsupported report output format: {output}")
61
+
62
+
63
+ def _to_terminal_text(trace: Trace) -> str:
64
+ """Summarize the key execution, runtime, and evaluation facts for operators."""
65
+ evaluation = trace.evaluation
66
+ runtime = trace.runtime_provenance
67
+ lines = [
68
+ f"trace_id: {trace.trace_id}",
69
+ f"scenario: {trace.scenario_id}",
70
+ f"status: {trace.status}",
71
+ f"runtime_mode: {runtime.mode if runtime else 'unknown'}",
72
+ f"framework: {runtime.framework if runtime and runtime.framework else 'unknown'}",
73
+ f"tool_calls: {trace.metrics.total_tool_calls}",
74
+ f"llm_calls: {trace.metrics.total_llm_calls}",
75
+ ]
76
+ if evaluation is not None:
77
+ assertions = (
78
+ f"{evaluation.passed_count} passed / "
79
+ f"{evaluation.failed_count} failed / "
80
+ f"{evaluation.total} total"
81
+ )
82
+ lines.extend(
83
+ [
84
+ f"evaluation: {'passed' if evaluation.passed else 'failed'}",
85
+ f"ase_score: {evaluation.ase_score:.2f}",
86
+ f"assertions: {assertions}",
87
+ ]
88
+ )
89
+ return "\n".join(lines)
90
+
91
+
92
+ def _to_markdown(trace: Trace) -> str:
93
+ """Render a short Markdown report suitable for CI summaries and PR comments."""
94
+ evaluation = trace.evaluation
95
+ runtime = trace.runtime_provenance
96
+ lines = [
97
+ "# ASE Trace Report",
98
+ "",
99
+ f"- Trace ID: `{trace.trace_id}`",
100
+ f"- Scenario: `{trace.scenario_id}`",
101
+ f"- Status: `{trace.status}`",
102
+ f"- Runtime: `{runtime.mode if runtime else 'unknown'}`",
103
+ f"- Framework: `{runtime.framework if runtime and runtime.framework else 'unknown'}`",
104
+ f"- Tool calls: `{trace.metrics.total_tool_calls}`",
105
+ ]
106
+ if evaluation is not None:
107
+ lines.append(f"- ASE score: `{evaluation.ase_score:.2f}`")
108
+ return "\n".join(lines)
109
+
110
+
111
+ def _to_otel_json(trace: Trace) -> str:
112
+ """Delegate OTEL-like export to the trace interoperability layer."""
113
+ from ase.trace.otel_export import to_otel_dict
114
+
115
+ return json.dumps(to_otel_dict(trace), indent=2)
ase/cli/spec_cmd.py ADDED
@@ -0,0 +1,53 @@
1
+ """ase spec — validate scenarios and expose their JSON schema."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from pathlib import Path
7
+ from typing import Annotated
8
+
9
+ import typer
10
+ from rich.console import Console
11
+
12
+ from ase.scenario.parser import parse_file, schema_path
13
+
14
+ app = typer.Typer(help="Validate ASE scenarios and inspect the public schema.")
15
+ _console = Console()
16
+
17
+
18
+ @app.command("validate")
19
+ def validate(
20
+ scenario: Annotated[Path, typer.Argument(help="Scenario YAML file to validate.")],
21
+ ) -> None:
22
+ """Validate one scenario file against ASE's current scenario model."""
23
+ config = parse_file(scenario)
24
+ _console.print(f"valid: {config.scenario_id}")
25
+
26
+
27
+ @app.command("print-schema")
28
+ def print_schema(
29
+ kind: Annotated[
30
+ str,
31
+ typer.Option("--kind", help="scenario | conformance-manifest | conformance-result"),
32
+ ] = "scenario",
33
+ path: Annotated[bool, typer.Option("--path", help="Print the schema file path only.")] = False,
34
+ ) -> None:
35
+ """Print the public scenario schema or its repo path."""
36
+ target = _schema_path(kind)
37
+ if path:
38
+ _console.print(str(target))
39
+ return
40
+ payload = json.loads(target.read_text(encoding="utf-8"))
41
+ _console.print(json.dumps(payload, indent=2))
42
+
43
+
44
+ def _schema_path(kind: str) -> Path:
45
+ """Resolve public schema aliases onto repo paths."""
46
+ if kind == "scenario":
47
+ return schema_path()
48
+ root = schema_path().parent
49
+ if kind == "conformance-manifest":
50
+ return root / "ase_conformance_manifest.schema.json"
51
+ if kind == "conformance-result":
52
+ return root / "ase_conformance_result.schema.json"
53
+ raise typer.BadParameter(f"unknown schema kind: {kind}")
ase/cli/test_cmd.py ADDED
@@ -0,0 +1,121 @@
1
+ """ase test — run adapter-backed scenarios through ASE's evaluation flow."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import subprocess
6
+ from pathlib import Path
7
+ from typing import Annotated
8
+
9
+ import typer
10
+ from rich.console import Console
11
+
12
+ from ase.adapters.protocol import read_and_verify
13
+ from ase.adapters.replay import trace_from_adapter_events
14
+ from ase.config.model import OutputFormat
15
+ from ase.errors import CLIError
16
+ from ase.evaluation.engine import EvaluationEngine
17
+ from ase.evaluation.trace_summary import attach_summary
18
+ from ase.scenario.model import AgentRuntimeMode, ScenarioConfig
19
+ from ase.scenario.parser import parse_file
20
+ from ase.storage.trace_store import TraceStore
21
+ from ase.trace.model import TraceStatus
22
+
23
+ _console = Console()
24
+
25
+
26
+ def run(
27
+ scenario: Annotated[list[Path] | None, typer.Argument()] = None,
28
+ config: Annotated[Path | None, typer.Option("--config", "-c")] = None,
29
+ output: Annotated[OutputFormat | None, typer.Option("--output", "-o")] = None,
30
+ out_file: Annotated[Path | None, typer.Option("--out-file", "-f")] = None,
31
+ fail_fast: Annotated[bool, typer.Option("--fail-fast")] = False,
32
+ workers: Annotated[int, typer.Option("--workers", "-w")] = 4,
33
+ debug: Annotated[bool, typer.Option("--debug")] = False,
34
+ ) -> None:
35
+ """Run adapter-backed scenarios through replay, evaluation, and persistence."""
36
+ del config, output, out_file, workers, debug
37
+ scenario_paths = scenario or []
38
+ if not scenario_paths:
39
+ raise typer.Exit(code=1)
40
+ store = TraceStore()
41
+ _run_all(scenario_paths, store, fail_fast=fail_fast)
42
+
43
+
44
+ def _run_all(paths: list[Path], store: TraceStore, *, fail_fast: bool) -> None:
45
+ """Execute all requested scenarios and stop early only when requested."""
46
+ import asyncio
47
+
48
+ asyncio.run(store.setup())
49
+ failures = 0
50
+ for path in paths:
51
+ try:
52
+ _run_one(path, store)
53
+ except CLIError as exc:
54
+ failures += 1
55
+ _console.print(f"[red]{exc}[/red]")
56
+ if fail_fast:
57
+ break
58
+ asyncio.run(store.close())
59
+ if failures:
60
+ raise typer.Exit(code=1)
61
+
62
+
63
+ def _run_one(path: Path, store: TraceStore) -> None:
64
+ """Execute one adapter scenario end to end and persist its trace."""
65
+ scenario = parse_file(path)
66
+ if scenario.runtime_mode != AgentRuntimeMode.ADAPTER:
67
+ raise CLIError(f"recovered ase test currently supports adapter mode only: {path}")
68
+ event_path = _event_path(path, scenario)
69
+ result = _run_agent(scenario, event_path)
70
+ events, verification = read_and_verify(event_path)
71
+ if not verification.passed:
72
+ details = ", ".join(verification.errors)
73
+ raise CLIError(f"adapter event stream failed verification: {details}")
74
+ trace = trace_from_adapter_events(events, scenario.scenario_id, scenario.name)
75
+ trace.stderr_output = result.stderr.strip() or None
76
+ if result.returncode != 0:
77
+ trace.status = TraceStatus.FAILED
78
+ trace.error_message = trace.stderr_output or f"agent exited with code {result.returncode}"
79
+ evaluators = list(scenario.assertions) + list(scenario.policies)
80
+ summary = EvaluationEngine().evaluate(trace, evaluators, {})
81
+ attach_summary(trace, summary)
82
+ import asyncio
83
+
84
+ asyncio.run(store.save_trace(trace, ase_score=summary.ase_score))
85
+ _render_summary(trace, summary)
86
+ if not summary.passed or result.returncode != 0:
87
+ raise CLIError(f"scenario failed: {scenario.scenario_id}")
88
+
89
+
90
+ def _run_agent(scenario: ScenarioConfig, event_path: Path) -> subprocess.CompletedProcess[str]:
91
+ """Run the scenario agent with the event sink path exported to the process."""
92
+ event_path.unlink(missing_ok=True)
93
+ env = dict(scenario.agent.env)
94
+ env.update({"ASE_ADAPTER_EVENT_SOURCE": str(event_path)})
95
+ return subprocess.run(
96
+ scenario.agent.command,
97
+ cwd=Path.cwd(),
98
+ env={**dict(__import__("os").environ), **env},
99
+ capture_output=True,
100
+ text=True,
101
+ check=False,
102
+ )
103
+
104
+
105
+ def _event_path(path: Path, scenario: ScenarioConfig) -> Path:
106
+ """Resolve an adapter event file relative to the scenario file location."""
107
+ runtime = scenario.agent_runtime
108
+ if runtime is None or not runtime.event_source:
109
+ raise CLIError(f"adapter runtime missing event_source: {path}")
110
+ source = Path(runtime.event_source)
111
+ return source if source.is_absolute() else path.resolve().parent / source
112
+
113
+
114
+ def _render_summary(trace: object, summary: object) -> None:
115
+ """Print a compact operator-facing outcome for the recovered test path."""
116
+ trace_id = getattr(trace, "trace_id", "unknown")
117
+ scenario_id = getattr(trace, "scenario_id", "unknown")
118
+ passed = getattr(summary, "passed", False)
119
+ score = getattr(summary, "ase_score", 0.0)
120
+ status = "[green]PASS[/green]" if passed else "[red]FAIL[/red]"
121
+ _console.print(f"{status} {scenario_id} trace={trace_id} ase_score={score:.2f}")