ase-python 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ase/__init__.py +21 -0
- ase/adapters/__init__.py +14 -0
- ase/adapters/contract.py +28 -0
- ase/adapters/frameworks/__init__.py +17 -0
- ase/adapters/frameworks/base.py +259 -0
- ase/adapters/frameworks/langgraph.py +19 -0
- ase/adapters/frameworks/mcp.py +68 -0
- ase/adapters/frameworks/openai_agents.py +19 -0
- ase/adapters/frameworks/pydantic_ai.py +19 -0
- ase/adapters/io.py +50 -0
- ase/adapters/model.py +89 -0
- ase/adapters/protocol.py +72 -0
- ase/adapters/replay.py +261 -0
- ase/cli/__init__.py +7 -0
- ase/cli/_trace_outputs.py +40 -0
- ase/cli/adapter_cmd.py +38 -0
- ase/cli/certify_cmd.py +74 -0
- ase/cli/compare.py +145 -0
- ase/cli/doctor_cmd.py +45 -0
- ase/cli/examples_cmd.py +27 -0
- ase/cli/history_cmd.py +126 -0
- ase/cli/import_cmd.py +34 -0
- ase/cli/main.py +134 -0
- ase/cli/replay_cmd.py +48 -0
- ase/cli/report.py +115 -0
- ase/cli/spec_cmd.py +53 -0
- ase/cli/test_cmd.py +121 -0
- ase/config/env_loader.py +71 -0
- ase/config/loader.py +82 -0
- ase/config/model.py +51 -0
- ase/conformance/__init__.py +7 -0
- ase/conformance/matrix.py +111 -0
- ase/conformance/model.py +91 -0
- ase/conformance/schema.py +37 -0
- ase/conformance/service.py +194 -0
- ase/core/engine.py +348 -0
- ase/errors.py +59 -0
- ase/evaluation/__init__.py +7 -0
- ase/evaluation/base.py +63 -0
- ase/evaluation/consistency.py +79 -0
- ase/evaluation/correctness.py +117 -0
- ase/evaluation/efficiency.py +145 -0
- ase/evaluation/engine.py +182 -0
- ase/evaluation/policy.py +134 -0
- ase/evaluation/scoring.py +64 -0
- ase/evaluation/trace_summary.py +36 -0
- ase/examples_matrix.py +118 -0
- ase/reporting/__init__.py +7 -0
- ase/reporting/json_report.py +45 -0
- ase/reporting/junit.py +38 -0
- ase/reporting/markdown.py +32 -0
- ase/reporting/terminal.py +66 -0
- ase/scenario/__init__.py +7 -0
- ase/scenario/model.py +294 -0
- ase/scenario/parser.py +40 -0
- ase/storage/__init__.py +7 -0
- ase/storage/trace_store.py +136 -0
- ase/trace/__init__.py +7 -0
- ase/trace/builder.py +175 -0
- ase/trace/model.py +264 -0
- ase/trace/otel_export.py +75 -0
- ase/trace/otel_import.py +96 -0
- ase/trace/redaction.py +10 -0
- ase/trace/serializer.py +50 -0
- ase_python-0.1.0.dist-info/METADATA +184 -0
- ase_python-0.1.0.dist-info/RECORD +69 -0
- ase_python-0.1.0.dist-info/WHEEL +4 -0
- ase_python-0.1.0.dist-info/entry_points.txt +2 -0
- ase_python-0.1.0.dist-info/licenses/LICENSE +105 -0
ase/adapters/replay.py
ADDED
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
"""Convert adapter event streams into ASE-native traces."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from ase.adapters.model import AdapterEvent, AdapterEventType
|
|
8
|
+
from ase.trace.builder import TraceBuilder
|
|
9
|
+
from ase.trace.model import (
|
|
10
|
+
AdapterMetadata,
|
|
11
|
+
AgentGraphNode,
|
|
12
|
+
ExternalTraceRef,
|
|
13
|
+
HandoffEdge,
|
|
14
|
+
ProtocolEvent,
|
|
15
|
+
RuntimeProvenance,
|
|
16
|
+
SessionTraceEvent,
|
|
17
|
+
ToolCallEvent,
|
|
18
|
+
ToolCallKind,
|
|
19
|
+
Trace,
|
|
20
|
+
TraceStatus,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def trace_from_adapter_events(
|
|
25
|
+
events: list[AdapterEvent],
|
|
26
|
+
scenario_id: str,
|
|
27
|
+
scenario_name: str,
|
|
28
|
+
) -> Trace:
|
|
29
|
+
"""Build an ASE trace from a validated adapter event stream."""
|
|
30
|
+
builder = TraceBuilder(scenario_id=scenario_id, scenario_name=scenario_name)
|
|
31
|
+
trace = _replay_events(builder, events)
|
|
32
|
+
_attach_adapter_metadata(trace, events)
|
|
33
|
+
return trace
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _replay_events(builder: TraceBuilder, events: list[AdapterEvent]) -> Trace:
|
|
37
|
+
"""Replay adapter events into trace-level tool calls and protocol blocks."""
|
|
38
|
+
pending_tools: dict[str, AdapterEvent] = {}
|
|
39
|
+
status = TraceStatus.PASSED
|
|
40
|
+
error_message: str | None = None
|
|
41
|
+
trace = builder.current_trace
|
|
42
|
+
|
|
43
|
+
for event in events:
|
|
44
|
+
_track_agent_node(trace, event)
|
|
45
|
+
_attach_external_trace_ref(trace, event)
|
|
46
|
+
if event.event_type == AdapterEventType.APPROVAL:
|
|
47
|
+
builder.add_approval(approval=_approval_event(event))
|
|
48
|
+
continue
|
|
49
|
+
if event.event_type == AdapterEventType.TOOL_START:
|
|
50
|
+
_append_tool_protocol_event(trace, event)
|
|
51
|
+
pending_tools[event.span_id or event.event_id] = event
|
|
52
|
+
continue
|
|
53
|
+
if event.event_type == AdapterEventType.TOOL_END:
|
|
54
|
+
_append_tool_call(builder, pending_tools, event)
|
|
55
|
+
_append_tool_protocol_event(trace, event)
|
|
56
|
+
if _is_error_status(event.status):
|
|
57
|
+
status = TraceStatus.FAILED
|
|
58
|
+
error_message = event.message or error_message
|
|
59
|
+
continue
|
|
60
|
+
if event.event_type in {
|
|
61
|
+
AdapterEventType.SESSION_READ,
|
|
62
|
+
AdapterEventType.SESSION_WRITE,
|
|
63
|
+
}:
|
|
64
|
+
trace.session_events.append(_session_event(event))
|
|
65
|
+
trace.protocol_events.append(_protocol_event(event))
|
|
66
|
+
continue
|
|
67
|
+
if event.event_type == AdapterEventType.HANDOFF:
|
|
68
|
+
trace.handoff_edges.append(_handoff_edge(event))
|
|
69
|
+
if event.event_type in {
|
|
70
|
+
AdapterEventType.HANDOFF,
|
|
71
|
+
AdapterEventType.GUARDRAIL,
|
|
72
|
+
AdapterEventType.HUMAN_FEEDBACK,
|
|
73
|
+
AdapterEventType.STREAM_CHUNK,
|
|
74
|
+
}:
|
|
75
|
+
trace.protocol_events.append(_protocol_event(event))
|
|
76
|
+
if event.event_type == AdapterEventType.AGENT_END and _is_error_status(event.status):
|
|
77
|
+
status = TraceStatus.FAILED
|
|
78
|
+
error_message = event.message or error_message
|
|
79
|
+
|
|
80
|
+
if pending_tools and status == TraceStatus.PASSED:
|
|
81
|
+
status = TraceStatus.ERROR
|
|
82
|
+
error_message = "adapter event stream ended with unfinished tool spans"
|
|
83
|
+
return builder.finish(status=status, error_message=error_message)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _append_tool_call(
|
|
87
|
+
builder: TraceBuilder,
|
|
88
|
+
pending_tools: dict[str, AdapterEvent],
|
|
89
|
+
event: AdapterEvent,
|
|
90
|
+
) -> None:
|
|
91
|
+
"""Combine tool_start/tool_end events into one ASE tool call record."""
|
|
92
|
+
span_key = event.span_id or event.event_id
|
|
93
|
+
start = pending_tools.pop(span_key, event)
|
|
94
|
+
duration = max(event.timestamp_ms - start.timestamp_ms, 0.0)
|
|
95
|
+
builder.add_tool_call(
|
|
96
|
+
ToolCallEvent(
|
|
97
|
+
kind=_tool_call_kind(event.tool_kind),
|
|
98
|
+
method=event.method or "UNKNOWN",
|
|
99
|
+
target=event.target or "unknown",
|
|
100
|
+
payload=start.data or start.metadata,
|
|
101
|
+
response_status=_response_status(event),
|
|
102
|
+
response_body=_response_body(event),
|
|
103
|
+
duration_ms=duration,
|
|
104
|
+
)
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def _tool_call_kind(raw_kind: str | None) -> ToolCallKind:
|
|
109
|
+
"""Map adapter tool kinds onto ASE's native tool-call taxonomy."""
|
|
110
|
+
try:
|
|
111
|
+
return ToolCallKind(raw_kind or ToolCallKind.UNKNOWN.value)
|
|
112
|
+
except ValueError:
|
|
113
|
+
return ToolCallKind.UNKNOWN
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _track_agent_node(trace: Trace, event: AdapterEvent) -> None:
|
|
117
|
+
"""Register agent identities for multi-agent traces."""
|
|
118
|
+
if not event.agent_id:
|
|
119
|
+
return
|
|
120
|
+
existing = {node.agent_id for node in trace.agent_graph.nodes}
|
|
121
|
+
if event.agent_id in existing:
|
|
122
|
+
return
|
|
123
|
+
trace.agent_graph.nodes.append(
|
|
124
|
+
AgentGraphNode(
|
|
125
|
+
agent_id=event.agent_id,
|
|
126
|
+
name=event.name,
|
|
127
|
+
parent_agent_id=event.parent_agent_id,
|
|
128
|
+
metadata=event.metadata,
|
|
129
|
+
)
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _attach_adapter_metadata(trace: Trace, events: list[AdapterEvent]) -> None:
|
|
134
|
+
"""Lift adapter metadata from the stream onto the trace root."""
|
|
135
|
+
metadata = _first_metadata(events)
|
|
136
|
+
adapter_name = str(metadata.get("adapter_name", "external-adapter"))
|
|
137
|
+
transport = str(metadata.get("transport", "jsonl-stdio"))
|
|
138
|
+
trace.adapter_metadata = AdapterMetadata(
|
|
139
|
+
name=adapter_name,
|
|
140
|
+
transport=transport,
|
|
141
|
+
framework=_optional_str(metadata.get("framework")),
|
|
142
|
+
language=_optional_str(metadata.get("language")),
|
|
143
|
+
version=_optional_str(metadata.get("adapter_version")),
|
|
144
|
+
source="adapter",
|
|
145
|
+
metadata=dict(metadata),
|
|
146
|
+
)
|
|
147
|
+
trace.runtime_provenance = RuntimeProvenance(
|
|
148
|
+
mode="adapter",
|
|
149
|
+
framework=_optional_str(metadata.get("framework")),
|
|
150
|
+
framework_version=_optional_str(metadata.get("framework_version")),
|
|
151
|
+
adapter_name=adapter_name,
|
|
152
|
+
adapter_version=_optional_str(metadata.get("adapter_version")),
|
|
153
|
+
event_source=_optional_str(metadata.get("event_source")),
|
|
154
|
+
metadata=dict(metadata),
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def _attach_external_trace_ref(trace: Trace, event: AdapterEvent) -> None:
|
|
159
|
+
"""Preserve references to external tracing systems when provided."""
|
|
160
|
+
external = event.metadata.get("external_trace")
|
|
161
|
+
if not isinstance(external, dict):
|
|
162
|
+
return
|
|
163
|
+
if not external.get("system") or not external.get("trace_id"):
|
|
164
|
+
return
|
|
165
|
+
trace.external_trace_refs.append(
|
|
166
|
+
ExternalTraceRef(
|
|
167
|
+
system=str(external["system"]),
|
|
168
|
+
trace_id=str(external["trace_id"]),
|
|
169
|
+
url=_optional_str(external.get("url")),
|
|
170
|
+
metadata={
|
|
171
|
+
key: value
|
|
172
|
+
for key, value in external.items()
|
|
173
|
+
if key not in {"system", "trace_id", "url"}
|
|
174
|
+
},
|
|
175
|
+
)
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def _session_event(event: AdapterEvent) -> SessionTraceEvent:
|
|
180
|
+
"""Translate adapter session activity into ASE session events."""
|
|
181
|
+
return SessionTraceEvent(
|
|
182
|
+
session_id=event.session_id or "unknown",
|
|
183
|
+
operation=event.event_type.value,
|
|
184
|
+
timestamp_ms=event.timestamp_ms,
|
|
185
|
+
agent_id=event.agent_id,
|
|
186
|
+
key=_optional_str(event.data.get("key")),
|
|
187
|
+
metadata=dict(event.data),
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def _handoff_edge(event: AdapterEvent) -> HandoffEdge:
|
|
192
|
+
"""Translate adapter handoff events into ASE handoff edges."""
|
|
193
|
+
return HandoffEdge(
|
|
194
|
+
from_agent_id=event.agent_id or "unknown",
|
|
195
|
+
to_agent_id=event.target_agent_id or "unknown",
|
|
196
|
+
timestamp_ms=event.timestamp_ms,
|
|
197
|
+
label=event.name,
|
|
198
|
+
metadata=dict(event.data),
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def _protocol_event(event: AdapterEvent) -> ProtocolEvent:
|
|
203
|
+
"""Preserve non-tool protocol events for replay and certification."""
|
|
204
|
+
return ProtocolEvent(
|
|
205
|
+
protocol=event.protocol or "adapter",
|
|
206
|
+
event_type=event.event_type.value,
|
|
207
|
+
timestamp_ms=event.timestamp_ms,
|
|
208
|
+
agent_id=event.agent_id,
|
|
209
|
+
metadata={"data": dict(event.data), "message": event.message, **event.metadata},
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def _approval_event(event: AdapterEvent) -> Any:
|
|
214
|
+
"""Build an approval event compatible with TraceBuilder's API."""
|
|
215
|
+
from ase.trace.model import ApprovalEvent
|
|
216
|
+
|
|
217
|
+
return ApprovalEvent(
|
|
218
|
+
approval_id=event.approval_id or "approval",
|
|
219
|
+
actor=event.agent_id or "adapter",
|
|
220
|
+
granted=bool(event.granted if event.granted is not None else True),
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def _response_status(event: AdapterEvent) -> int | None:
|
|
225
|
+
"""Extract a response status code from adapter data when present."""
|
|
226
|
+
raw_status = event.data.get("response_status") or event.data.get("status_code")
|
|
227
|
+
return raw_status if isinstance(raw_status, int) else None
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def _response_body(event: AdapterEvent) -> dict[str, Any] | None:
|
|
231
|
+
"""Preserve tool-end payloads and protocol hints for imported traces."""
|
|
232
|
+
body = dict(event.data or {})
|
|
233
|
+
if event.protocol is not None:
|
|
234
|
+
body.setdefault("protocol", event.protocol)
|
|
235
|
+
return body or None
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def _append_tool_protocol_event(trace: Trace, event: AdapterEvent) -> None:
|
|
239
|
+
"""Preserve non-default tool protocols such as MCP alongside tool calls."""
|
|
240
|
+
if event.protocol is not None:
|
|
241
|
+
trace.protocol_events.append(_protocol_event(event))
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def _is_error_status(status: str | None) -> bool:
|
|
245
|
+
"""Treat explicit error-like statuses as failed execution outcomes."""
|
|
246
|
+
return (status or "").lower() in {"error", "failed", "failure", "timeout"}
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def _first_metadata(events: list[AdapterEvent]) -> dict[str, Any]:
|
|
250
|
+
"""Return the first non-empty metadata mapping from the stream."""
|
|
251
|
+
for event in events:
|
|
252
|
+
if event.metadata:
|
|
253
|
+
return dict(event.metadata)
|
|
254
|
+
return {}
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
def _optional_str(value: Any) -> str | None:
|
|
258
|
+
"""Convert optional values to strings when present."""
|
|
259
|
+
if value is None:
|
|
260
|
+
return None
|
|
261
|
+
return str(value)
|
ase/cli/__init__.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""Shared helpers for writing native traces and rendered outputs."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from ase.config.model import OutputFormat
|
|
8
|
+
from ase.reporting import json_report, markdown
|
|
9
|
+
from ase.trace.model import Trace
|
|
10
|
+
from ase.trace.otel_export import to_otel_dict
|
|
11
|
+
from ase.trace.serializer import write_to_file
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def write_trace_artifacts(
|
|
15
|
+
trace: Trace,
|
|
16
|
+
*,
|
|
17
|
+
trace_out: Path | None = None,
|
|
18
|
+
output: OutputFormat | None = None,
|
|
19
|
+
out_file: Path | None = None,
|
|
20
|
+
) -> None:
|
|
21
|
+
"""Write native traces and optional rendered outputs for CLI workflows."""
|
|
22
|
+
if trace_out is not None:
|
|
23
|
+
write_to_file(trace, trace_out)
|
|
24
|
+
if output is None or out_file is None:
|
|
25
|
+
return
|
|
26
|
+
rendered = render_trace(trace, output)
|
|
27
|
+
out_file.write_text(rendered + "\n", encoding="utf-8")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def render_trace(trace: Trace, output: OutputFormat) -> str:
|
|
31
|
+
"""Render one trace using the requested public output format."""
|
|
32
|
+
if output == OutputFormat.JSON:
|
|
33
|
+
return json_report.to_string(trace=trace)
|
|
34
|
+
if output == OutputFormat.MARKDOWN:
|
|
35
|
+
return markdown.to_string(trace=trace)
|
|
36
|
+
if output == OutputFormat.OTEL_JSON:
|
|
37
|
+
import json
|
|
38
|
+
|
|
39
|
+
return json.dumps(to_otel_dict(trace), indent=2)
|
|
40
|
+
return json_report.to_string(trace=trace)
|
ase/cli/adapter_cmd.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""Adapter protocol CLI commands."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Annotated
|
|
7
|
+
|
|
8
|
+
import typer
|
|
9
|
+
from rich.console import Console
|
|
10
|
+
|
|
11
|
+
from ase.adapters.protocol import read_and_verify
|
|
12
|
+
from ase.errors import AdapterProtocolError
|
|
13
|
+
|
|
14
|
+
app = typer.Typer(help="Validate ASE adapter event streams.")
|
|
15
|
+
_console = Console()
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@app.command("verify")
|
|
19
|
+
def verify(
|
|
20
|
+
event_file: Annotated[Path, typer.Argument(help="JSONL adapter event file to validate.")],
|
|
21
|
+
) -> None:
|
|
22
|
+
"""Validate an adapter event file and print its event-family counts."""
|
|
23
|
+
try:
|
|
24
|
+
_events, result = read_and_verify(event_file)
|
|
25
|
+
except AdapterProtocolError as exc:
|
|
26
|
+
_console.print(f"[red]{exc}[/red]")
|
|
27
|
+
raise typer.Exit(code=1) from exc
|
|
28
|
+
_console.print(f"events: {result.total_events}")
|
|
29
|
+
for event_type, count in sorted(result.event_type_counts.items()):
|
|
30
|
+
_console.print(f" {event_type}: {count}")
|
|
31
|
+
for warning in result.warnings:
|
|
32
|
+
_console.print(f"[yellow]{warning}[/yellow]")
|
|
33
|
+
if result.passed:
|
|
34
|
+
_console.print("[green]adapter event stream passed[/green]")
|
|
35
|
+
return
|
|
36
|
+
for error in result.errors:
|
|
37
|
+
_console.print(f"[red]{error}[/red]")
|
|
38
|
+
raise typer.Exit(code=1)
|
ase/cli/certify_cmd.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
"""ase certify — validate a manifest and emit certification output."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Annotated
|
|
8
|
+
|
|
9
|
+
import typer
|
|
10
|
+
from rich.console import Console
|
|
11
|
+
from rich.table import Table
|
|
12
|
+
|
|
13
|
+
from ase.conformance.service import certify_manifest, load_manifest, sign_result
|
|
14
|
+
from ase.errors import ConformanceError
|
|
15
|
+
|
|
16
|
+
_console = Console()
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def run(
|
|
20
|
+
manifest: Annotated[Path, typer.Argument(help="Conformance manifest to certify.")],
|
|
21
|
+
out_file: Annotated[
|
|
22
|
+
Path | None,
|
|
23
|
+
typer.Option("--out-file", "-f", help="Write certification JSON to this file."),
|
|
24
|
+
] = None,
|
|
25
|
+
signing_key_env: Annotated[
|
|
26
|
+
str | None,
|
|
27
|
+
typer.Option("--signing-key-env", help="Env var containing the HMAC signing key."),
|
|
28
|
+
] = None,
|
|
29
|
+
verbose: Annotated[
|
|
30
|
+
bool,
|
|
31
|
+
typer.Option("--verbose", help="Render all certification checks."),
|
|
32
|
+
] = False,
|
|
33
|
+
) -> None:
|
|
34
|
+
"""Run a conformance manifest and emit a certification result."""
|
|
35
|
+
try:
|
|
36
|
+
loaded = load_manifest(manifest)
|
|
37
|
+
result = sign_result(certify_manifest(loaded, manifest), signing_key_env=signing_key_env)
|
|
38
|
+
except ConformanceError as exc:
|
|
39
|
+
_console.print(f"[red]{exc}[/red]")
|
|
40
|
+
raise typer.Exit(code=1) from exc
|
|
41
|
+
|
|
42
|
+
if out_file is not None:
|
|
43
|
+
out_file.write_text(json.dumps(result.model_dump(), indent=2) + "\n", encoding="utf-8")
|
|
44
|
+
_render_result(result, verbose)
|
|
45
|
+
if not result.passed:
|
|
46
|
+
raise typer.Exit(code=1)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _render_result(result: object, verbose: bool) -> None:
|
|
50
|
+
"""Render a compact certification summary with optional detailed checks."""
|
|
51
|
+
from ase.conformance.model import ConformanceResult
|
|
52
|
+
|
|
53
|
+
assert isinstance(result, ConformanceResult)
|
|
54
|
+
status = "[green]passed[/green]" if result.passed else "[red]failed[/red]"
|
|
55
|
+
_console.print(f"framework: {result.framework or 'unknown'}")
|
|
56
|
+
_console.print(f"adapter: {result.adapter_name}")
|
|
57
|
+
_console.print(f"bundle: {result.bundle_family}@{result.bundle_version}")
|
|
58
|
+
_console.print(f"level: {result.certification_level.value}")
|
|
59
|
+
_console.print(f"status: {status}")
|
|
60
|
+
if not verbose and result.passed:
|
|
61
|
+
return
|
|
62
|
+
table = Table(title="Certification Checks")
|
|
63
|
+
table.add_column("Case")
|
|
64
|
+
table.add_column("Check")
|
|
65
|
+
table.add_column("Status")
|
|
66
|
+
table.add_column("Message")
|
|
67
|
+
for check in result.checks:
|
|
68
|
+
table.add_row(
|
|
69
|
+
check.case_id,
|
|
70
|
+
check.check_id,
|
|
71
|
+
"passed" if check.passed else "failed",
|
|
72
|
+
check.message,
|
|
73
|
+
)
|
|
74
|
+
_console.print(table)
|
ase/cli/compare.py
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
"""ase compare — diff two saved ASE traces."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Annotated, Any
|
|
8
|
+
|
|
9
|
+
import typer
|
|
10
|
+
from rich.console import Console
|
|
11
|
+
|
|
12
|
+
from ase.config.model import OutputFormat
|
|
13
|
+
from ase.errors import CLIError, TraceSerializationError
|
|
14
|
+
from ase.trace.model import Trace
|
|
15
|
+
|
|
16
|
+
_console = Console()
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def run(
|
|
20
|
+
baseline: Annotated[Path, typer.Argument(help="Baseline native ASE trace JSON file.")],
|
|
21
|
+
candidate: Annotated[Path, typer.Argument(help="Candidate native ASE trace JSON file.")],
|
|
22
|
+
output: Annotated[
|
|
23
|
+
OutputFormat,
|
|
24
|
+
typer.Option("--output", "-o", help="Output format."),
|
|
25
|
+
] = OutputFormat.TERMINAL,
|
|
26
|
+
) -> None:
|
|
27
|
+
"""Compare two traces by stable runtime, evaluation, and metric fields."""
|
|
28
|
+
baseline_trace = _load_trace(baseline)
|
|
29
|
+
candidate_trace = _load_trace(candidate)
|
|
30
|
+
diff = _build_diff(baseline_trace, candidate_trace)
|
|
31
|
+
if output == OutputFormat.JSON:
|
|
32
|
+
_console.print(json.dumps(diff, indent=2))
|
|
33
|
+
return
|
|
34
|
+
if output == OutputFormat.MARKDOWN:
|
|
35
|
+
_console.print(_to_markdown(diff))
|
|
36
|
+
return
|
|
37
|
+
if output != OutputFormat.TERMINAL:
|
|
38
|
+
raise CLIError(f"unsupported compare output format: {output}")
|
|
39
|
+
_console.print(_to_terminal_text(diff))
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _load_trace(path: Path) -> Trace:
|
|
43
|
+
"""Load one native ASE trace with contextual parse errors."""
|
|
44
|
+
try:
|
|
45
|
+
return Trace.model_validate_json(path.read_text(encoding="utf-8"))
|
|
46
|
+
except OSError as exc:
|
|
47
|
+
raise TraceSerializationError(f"failed to read trace file {path}: {exc}") from exc
|
|
48
|
+
except Exception as exc: # noqa: BLE001
|
|
49
|
+
raise TraceSerializationError(f"failed to parse trace file {path}: {exc}") from exc
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _build_diff(baseline: Trace, candidate: Trace) -> dict[str, Any]:
|
|
53
|
+
"""Produce a small, stable diff for operator comparison workflows."""
|
|
54
|
+
base_eval = baseline.evaluation
|
|
55
|
+
cand_eval = candidate.evaluation
|
|
56
|
+
base_runtime = baseline.runtime_provenance
|
|
57
|
+
cand_runtime = candidate.runtime_provenance
|
|
58
|
+
added = sorted(
|
|
59
|
+
set(cand_eval.failing_evaluators if cand_eval else [])
|
|
60
|
+
- set(base_eval.failing_evaluators if base_eval else [])
|
|
61
|
+
)
|
|
62
|
+
removed = sorted(
|
|
63
|
+
set(base_eval.failing_evaluators if base_eval else [])
|
|
64
|
+
- set(cand_eval.failing_evaluators if cand_eval else [])
|
|
65
|
+
)
|
|
66
|
+
return {
|
|
67
|
+
"baseline_trace_id": baseline.trace_id,
|
|
68
|
+
"candidate_trace_id": candidate.trace_id,
|
|
69
|
+
"scenario_ids": [baseline.scenario_id, candidate.scenario_id],
|
|
70
|
+
"runtime_mode": [
|
|
71
|
+
base_runtime.mode if base_runtime else None,
|
|
72
|
+
cand_runtime.mode if cand_runtime else None,
|
|
73
|
+
],
|
|
74
|
+
"framework": [
|
|
75
|
+
base_runtime.framework if base_runtime else None,
|
|
76
|
+
cand_runtime.framework if cand_runtime else None,
|
|
77
|
+
],
|
|
78
|
+
"status": [baseline.status, candidate.status],
|
|
79
|
+
"evaluation_passed": [
|
|
80
|
+
base_eval.passed if base_eval else None,
|
|
81
|
+
cand_eval.passed if cand_eval else None,
|
|
82
|
+
],
|
|
83
|
+
"ase_score_delta": (cand_eval.ase_score if cand_eval else 0.0)
|
|
84
|
+
- (base_eval.ase_score if base_eval else 0.0),
|
|
85
|
+
"failing_evaluators_added": added,
|
|
86
|
+
"failing_evaluators_removed": removed,
|
|
87
|
+
"metrics": {
|
|
88
|
+
"tool_calls": [
|
|
89
|
+
baseline.metrics.total_tool_calls,
|
|
90
|
+
candidate.metrics.total_tool_calls,
|
|
91
|
+
],
|
|
92
|
+
"llm_calls": [
|
|
93
|
+
baseline.metrics.total_llm_calls,
|
|
94
|
+
candidate.metrics.total_llm_calls,
|
|
95
|
+
],
|
|
96
|
+
"tokens": [
|
|
97
|
+
baseline.metrics.total_tokens_used,
|
|
98
|
+
candidate.metrics.total_tokens_used,
|
|
99
|
+
],
|
|
100
|
+
"duration_ms": [
|
|
101
|
+
baseline.metrics.total_duration_ms,
|
|
102
|
+
candidate.metrics.total_duration_ms,
|
|
103
|
+
],
|
|
104
|
+
"tool_breakdown": [
|
|
105
|
+
baseline.metrics.tool_call_breakdown,
|
|
106
|
+
candidate.metrics.tool_call_breakdown,
|
|
107
|
+
],
|
|
108
|
+
},
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _to_terminal_text(diff: dict[str, Any]) -> str:
|
|
113
|
+
"""Render a compact diff for direct terminal use."""
|
|
114
|
+
metrics = diff["metrics"]
|
|
115
|
+
return "\n".join(
|
|
116
|
+
[
|
|
117
|
+
f"baseline: {diff['baseline_trace_id']}",
|
|
118
|
+
f"candidate: {diff['candidate_trace_id']}",
|
|
119
|
+
f"scenario_ids: {diff['scenario_ids'][0]} -> {diff['scenario_ids'][1]}",
|
|
120
|
+
f"runtime_mode: {diff['runtime_mode'][0]} -> {diff['runtime_mode'][1]}",
|
|
121
|
+
f"framework: {diff['framework'][0]} -> {diff['framework'][1]}",
|
|
122
|
+
f"status: {diff['status'][0]} -> {diff['status'][1]}",
|
|
123
|
+
f"evaluation: {diff['evaluation_passed'][0]} -> {diff['evaluation_passed'][1]}",
|
|
124
|
+
f"ase_score_delta: {diff['ase_score_delta']:.2f}",
|
|
125
|
+
f"tool_calls: {metrics['tool_calls'][0]} -> {metrics['tool_calls'][1]}",
|
|
126
|
+
f"tokens: {metrics['tokens'][0]} -> {metrics['tokens'][1]}",
|
|
127
|
+
]
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _to_markdown(diff: dict[str, Any]) -> str:
|
|
132
|
+
"""Render a short Markdown diff for CI and review surfaces."""
|
|
133
|
+
metrics = diff["metrics"]
|
|
134
|
+
return "\n".join(
|
|
135
|
+
[
|
|
136
|
+
"# ASE Trace Diff",
|
|
137
|
+
"",
|
|
138
|
+
f"- Baseline: `{diff['baseline_trace_id']}`",
|
|
139
|
+
f"- Candidate: `{diff['candidate_trace_id']}`",
|
|
140
|
+
f"- Status: `{diff['status'][0]}` -> `{diff['status'][1]}`",
|
|
141
|
+
f"- Evaluation: `{diff['evaluation_passed'][0]}` -> `{diff['evaluation_passed'][1]}`",
|
|
142
|
+
f"- ASE score delta: `{diff['ase_score_delta']:.2f}`",
|
|
143
|
+
f"- Tool calls: `{metrics['tool_calls'][0]}` -> `{metrics['tool_calls'][1]}`",
|
|
144
|
+
]
|
|
145
|
+
)
|
ase/cli/doctor_cmd.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"""ase doctor — lightweight environment sanity checks.
|
|
2
|
+
|
|
3
|
+
This command exists to give operators a neutral boot-health signal without
|
|
4
|
+
requiring a full scenario run.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import platform
|
|
10
|
+
|
|
11
|
+
from rich.console import Console
|
|
12
|
+
from rich.table import Table
|
|
13
|
+
|
|
14
|
+
_console = Console()
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def run() -> None:
|
|
18
|
+
"""Show whether ASE's core imports and runtime prerequisites are available."""
|
|
19
|
+
table = Table(title="ASE Doctor", expand=False)
|
|
20
|
+
table.add_column("Check", style="bold")
|
|
21
|
+
table.add_column("Status")
|
|
22
|
+
table.add_column("Details")
|
|
23
|
+
for row in _rows():
|
|
24
|
+
table.add_row(*row)
|
|
25
|
+
_console.print(table)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _rows() -> list[tuple[str, str, str]]:
|
|
29
|
+
"""Keep doctor output deterministic and safe for clean environment checks."""
|
|
30
|
+
return [
|
|
31
|
+
("python", "ok", platform.python_version()),
|
|
32
|
+
("trace_model", *_import_status("ase.trace.model", "Trace")),
|
|
33
|
+
("config_loader", *_import_status("ase.config.loader", "load_config")),
|
|
34
|
+
("evaluation_engine", *_import_status("ase.evaluation.engine", "EvaluationEngine")),
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _import_status(module_name: str, attr_name: str) -> tuple[str, str]:
|
|
39
|
+
"""Convert importability into simple operator-facing status strings."""
|
|
40
|
+
try:
|
|
41
|
+
module = __import__(module_name, fromlist=[attr_name])
|
|
42
|
+
getattr(module, attr_name)
|
|
43
|
+
except (AttributeError, ImportError, ModuleNotFoundError) as exc:
|
|
44
|
+
return ("fail", str(exc))
|
|
45
|
+
return ("ok", "available")
|
ase/cli/examples_cmd.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""Public example-matrix CLI commands."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Annotated
|
|
6
|
+
|
|
7
|
+
import typer
|
|
8
|
+
from rich.console import Console
|
|
9
|
+
|
|
10
|
+
from ase.examples_matrix import run_examples
|
|
11
|
+
|
|
12
|
+
app = typer.Typer(help="Run ASE's public example matrix.")
|
|
13
|
+
_console = Console()
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@app.command("run")
|
|
17
|
+
def run(
|
|
18
|
+
example: Annotated[
|
|
19
|
+
list[str] | None,
|
|
20
|
+
typer.Option("--example", help="Run only the named example(s)."),
|
|
21
|
+
] = None,
|
|
22
|
+
) -> None:
|
|
23
|
+
"""Run the supported example matrix with the same commands users run."""
|
|
24
|
+
results = run_examples(example)
|
|
25
|
+
for result in results:
|
|
26
|
+
status = "[green]passed[/green]" if result.passed else "[red]failed[/red]"
|
|
27
|
+
_console.print(f"{result.example_name}: {status}")
|