ase-python 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ase/__init__.py +21 -0
- ase/adapters/__init__.py +14 -0
- ase/adapters/contract.py +28 -0
- ase/adapters/frameworks/__init__.py +17 -0
- ase/adapters/frameworks/base.py +259 -0
- ase/adapters/frameworks/langgraph.py +19 -0
- ase/adapters/frameworks/mcp.py +68 -0
- ase/adapters/frameworks/openai_agents.py +19 -0
- ase/adapters/frameworks/pydantic_ai.py +19 -0
- ase/adapters/io.py +50 -0
- ase/adapters/model.py +89 -0
- ase/adapters/protocol.py +72 -0
- ase/adapters/replay.py +261 -0
- ase/cli/__init__.py +7 -0
- ase/cli/_trace_outputs.py +40 -0
- ase/cli/adapter_cmd.py +38 -0
- ase/cli/certify_cmd.py +74 -0
- ase/cli/compare.py +145 -0
- ase/cli/doctor_cmd.py +45 -0
- ase/cli/examples_cmd.py +27 -0
- ase/cli/history_cmd.py +126 -0
- ase/cli/import_cmd.py +34 -0
- ase/cli/main.py +134 -0
- ase/cli/replay_cmd.py +48 -0
- ase/cli/report.py +115 -0
- ase/cli/spec_cmd.py +53 -0
- ase/cli/test_cmd.py +121 -0
- ase/config/env_loader.py +71 -0
- ase/config/loader.py +82 -0
- ase/config/model.py +51 -0
- ase/conformance/__init__.py +7 -0
- ase/conformance/matrix.py +111 -0
- ase/conformance/model.py +91 -0
- ase/conformance/schema.py +37 -0
- ase/conformance/service.py +194 -0
- ase/core/engine.py +348 -0
- ase/errors.py +59 -0
- ase/evaluation/__init__.py +7 -0
- ase/evaluation/base.py +63 -0
- ase/evaluation/consistency.py +79 -0
- ase/evaluation/correctness.py +117 -0
- ase/evaluation/efficiency.py +145 -0
- ase/evaluation/engine.py +182 -0
- ase/evaluation/policy.py +134 -0
- ase/evaluation/scoring.py +64 -0
- ase/evaluation/trace_summary.py +36 -0
- ase/examples_matrix.py +118 -0
- ase/reporting/__init__.py +7 -0
- ase/reporting/json_report.py +45 -0
- ase/reporting/junit.py +38 -0
- ase/reporting/markdown.py +32 -0
- ase/reporting/terminal.py +66 -0
- ase/scenario/__init__.py +7 -0
- ase/scenario/model.py +294 -0
- ase/scenario/parser.py +40 -0
- ase/storage/__init__.py +7 -0
- ase/storage/trace_store.py +136 -0
- ase/trace/__init__.py +7 -0
- ase/trace/builder.py +175 -0
- ase/trace/model.py +264 -0
- ase/trace/otel_export.py +75 -0
- ase/trace/otel_import.py +96 -0
- ase/trace/redaction.py +10 -0
- ase/trace/serializer.py +50 -0
- ase_python-0.1.0.dist-info/METADATA +184 -0
- ase_python-0.1.0.dist-info/RECORD +69 -0
- ase_python-0.1.0.dist-info/WHEEL +4 -0
- ase_python-0.1.0.dist-info/entry_points.txt +2 -0
- ase_python-0.1.0.dist-info/licenses/LICENSE +105 -0
ase/trace/builder.py
ADDED
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
"""Helpers for incrementally constructing ASE traces."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import hashlib
|
|
6
|
+
import json
|
|
7
|
+
import time
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
import ulid
|
|
11
|
+
|
|
12
|
+
from ase.errors import TraceError
|
|
13
|
+
from ase.trace.model import (
|
|
14
|
+
ApprovalEvent,
|
|
15
|
+
DeterminismMetadata,
|
|
16
|
+
LLMRequestEvent,
|
|
17
|
+
LLMResponseEvent,
|
|
18
|
+
RuntimeProvenance,
|
|
19
|
+
ToolCallEvent,
|
|
20
|
+
Trace,
|
|
21
|
+
TraceEvent,
|
|
22
|
+
TraceEventKind,
|
|
23
|
+
TraceMetrics,
|
|
24
|
+
TraceStatus,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class TraceBuilder:
|
|
29
|
+
"""Build one append-only trace from runtime events."""
|
|
30
|
+
|
|
31
|
+
def __init__(
|
|
32
|
+
self,
|
|
33
|
+
scenario_id: str,
|
|
34
|
+
scenario_name: str,
|
|
35
|
+
tags: dict[str, str] | None = None,
|
|
36
|
+
) -> None:
|
|
37
|
+
self._trace = Trace(
|
|
38
|
+
trace_id=str(ulid.new()),
|
|
39
|
+
scenario_id=scenario_id,
|
|
40
|
+
scenario_name=scenario_name,
|
|
41
|
+
tags=dict(tags or {}),
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
@property
|
|
45
|
+
def current_trace(self) -> Trace:
|
|
46
|
+
"""Expose the in-progress trace for replay helpers."""
|
|
47
|
+
return self._trace
|
|
48
|
+
|
|
49
|
+
def add_tool_call(self, tool_call: ToolCallEvent) -> TraceBuilder:
|
|
50
|
+
"""Append one tool-call event to the trace."""
|
|
51
|
+
self._trace.events.append(
|
|
52
|
+
TraceEvent(
|
|
53
|
+
event_id=str(ulid.new()),
|
|
54
|
+
kind=TraceEventKind.TOOL_CALL,
|
|
55
|
+
tool_call=tool_call,
|
|
56
|
+
)
|
|
57
|
+
)
|
|
58
|
+
return self
|
|
59
|
+
|
|
60
|
+
def add_approval(self, approval: ApprovalEvent) -> TraceBuilder:
|
|
61
|
+
"""Append one approval event to the trace."""
|
|
62
|
+
self._trace.events.append(
|
|
63
|
+
TraceEvent(
|
|
64
|
+
event_id=str(ulid.new()),
|
|
65
|
+
kind=TraceEventKind.APPROVAL,
|
|
66
|
+
approval=approval,
|
|
67
|
+
)
|
|
68
|
+
)
|
|
69
|
+
return self
|
|
70
|
+
|
|
71
|
+
def add_llm_request(self, llm_request: LLMRequestEvent) -> TraceBuilder:
|
|
72
|
+
"""Append one LLM request event to the trace."""
|
|
73
|
+
self._trace.events.append(
|
|
74
|
+
TraceEvent(
|
|
75
|
+
event_id=str(ulid.new()),
|
|
76
|
+
kind=TraceEventKind.LLM_REQUEST,
|
|
77
|
+
llm_request=llm_request,
|
|
78
|
+
)
|
|
79
|
+
)
|
|
80
|
+
return self
|
|
81
|
+
|
|
82
|
+
def add_llm_response(self, llm_response: LLMResponseEvent) -> TraceBuilder:
|
|
83
|
+
"""Append one LLM response event to the trace."""
|
|
84
|
+
self._trace.events.append(
|
|
85
|
+
TraceEvent(
|
|
86
|
+
event_id=str(ulid.new()),
|
|
87
|
+
kind=TraceEventKind.LLM_RESPONSE,
|
|
88
|
+
llm_response=llm_response,
|
|
89
|
+
)
|
|
90
|
+
)
|
|
91
|
+
return self
|
|
92
|
+
|
|
93
|
+
def add_raw_event(self, event: TraceEvent) -> TraceBuilder:
|
|
94
|
+
"""Append one fully formed trace event."""
|
|
95
|
+
self._trace.events.append(event)
|
|
96
|
+
return self
|
|
97
|
+
|
|
98
|
+
def set_runtime_provenance(
|
|
99
|
+
self,
|
|
100
|
+
mode: str,
|
|
101
|
+
framework: str | None = None,
|
|
102
|
+
framework_version: str | None = None,
|
|
103
|
+
adapter_name: str | None = None,
|
|
104
|
+
adapter_version: str | None = None,
|
|
105
|
+
conformance_bundle_version: str | None = None,
|
|
106
|
+
event_source: str | None = None,
|
|
107
|
+
metadata: dict[str, object] | None = None,
|
|
108
|
+
) -> TraceBuilder:
|
|
109
|
+
"""Persist runtime provenance on the trace root."""
|
|
110
|
+
self._trace.runtime_provenance = RuntimeProvenance(
|
|
111
|
+
mode=mode,
|
|
112
|
+
framework=framework,
|
|
113
|
+
framework_version=framework_version,
|
|
114
|
+
adapter_name=adapter_name,
|
|
115
|
+
adapter_version=adapter_version,
|
|
116
|
+
conformance_bundle_version=conformance_bundle_version,
|
|
117
|
+
event_source=event_source,
|
|
118
|
+
metadata=dict(metadata or {}),
|
|
119
|
+
)
|
|
120
|
+
return self
|
|
121
|
+
|
|
122
|
+
def set_determinism(
|
|
123
|
+
self,
|
|
124
|
+
fixture_hash: str | None = None,
|
|
125
|
+
replay_key: str | None = None,
|
|
126
|
+
baseline_trace_id: str | None = None,
|
|
127
|
+
) -> TraceBuilder:
|
|
128
|
+
"""Persist deterministic replay metadata on the trace root."""
|
|
129
|
+
self._trace.determinism = DeterminismMetadata(
|
|
130
|
+
fixture_hash=fixture_hash,
|
|
131
|
+
replay_key=replay_key,
|
|
132
|
+
baseline_trace_id=baseline_trace_id,
|
|
133
|
+
)
|
|
134
|
+
return self
|
|
135
|
+
|
|
136
|
+
def finish(
|
|
137
|
+
self,
|
|
138
|
+
status: TraceStatus = TraceStatus.PASSED,
|
|
139
|
+
error_message: str | None = None,
|
|
140
|
+
) -> Trace:
|
|
141
|
+
"""Finalize the trace and compute aggregate metrics."""
|
|
142
|
+
if self._trace.status != TraceStatus.RUNNING and self._trace.ended_at_ms is not None:
|
|
143
|
+
raise TraceError(f"trace already finished: {self._trace.trace_id}")
|
|
144
|
+
self._trace.status = status
|
|
145
|
+
self._trace.ended_at_ms = time.time() * 1000
|
|
146
|
+
self._trace.error_message = error_message
|
|
147
|
+
self._trace.metrics = _compute_metrics(self._trace)
|
|
148
|
+
return self._trace
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def _compute_metrics(trace: Trace) -> TraceMetrics:
|
|
152
|
+
"""Compute aggregate metrics from the trace event stream."""
|
|
153
|
+
metrics = TraceMetrics()
|
|
154
|
+
metrics.total_duration_ms = max(
|
|
155
|
+
(trace.ended_at_ms or trace.started_at_ms) - trace.started_at_ms,
|
|
156
|
+
0.0,
|
|
157
|
+
)
|
|
158
|
+
for event in trace.events:
|
|
159
|
+
if event.kind == TraceEventKind.TOOL_CALL and event.tool_call is not None:
|
|
160
|
+
metrics.total_tool_calls += 1
|
|
161
|
+
key = event.tool_call.kind.value
|
|
162
|
+
metrics.tool_call_breakdown[key] = metrics.tool_call_breakdown.get(key, 0) + 1
|
|
163
|
+
if event.kind == TraceEventKind.LLM_REQUEST:
|
|
164
|
+
metrics.total_llm_calls += 1
|
|
165
|
+
if event.llm_request and event.llm_request.token_count_estimate:
|
|
166
|
+
metrics.total_tokens_used += event.llm_request.token_count_estimate
|
|
167
|
+
if event.kind == TraceEventKind.LLM_RESPONSE and event.llm_response is not None:
|
|
168
|
+
metrics.total_tokens_used += event.llm_response.output_tokens
|
|
169
|
+
return metrics
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def fixture_hash(payload: dict[str, Any]) -> str:
|
|
173
|
+
"""Compute a stable content hash for deterministic fixture payloads."""
|
|
174
|
+
raw = json.dumps(payload, sort_keys=True, separators=(",", ":"))
|
|
175
|
+
return hashlib.sha256(raw.encode("utf-8")).hexdigest()
|
ase/trace/model.py
ADDED
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
"""Trace data model — the append-only record of a single agent run.
|
|
2
|
+
|
|
3
|
+
CRITICAL: This schema is append-only. Fields are NEVER removed or renamed.
|
|
4
|
+
New optional fields may be added with a default. Bump TRACE_SCHEMA_VERSION
|
|
5
|
+
when adding fields. Never change the meaning of an existing field.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import time
|
|
11
|
+
from enum import StrEnum
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
from pydantic import BaseModel, Field
|
|
15
|
+
|
|
16
|
+
TRACE_SCHEMA_VERSION = 7
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ToolCallKind(StrEnum):
|
|
20
|
+
"""Classify what kind of backend a tool call targeted."""
|
|
21
|
+
|
|
22
|
+
DATABASE = "database"
|
|
23
|
+
HTTP_API = "http_api"
|
|
24
|
+
EMAIL = "email"
|
|
25
|
+
FILESYSTEM = "filesystem"
|
|
26
|
+
QUEUE = "queue"
|
|
27
|
+
UNKNOWN = "unknown"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class TraceEventKind(StrEnum):
|
|
31
|
+
"""Represent the type of event recorded in a trace."""
|
|
32
|
+
|
|
33
|
+
TOOL_CALL = "tool_call"
|
|
34
|
+
TOOL_RESPONSE = "tool_response"
|
|
35
|
+
LLM_REQUEST = "llm_request"
|
|
36
|
+
LLM_RESPONSE = "llm_response"
|
|
37
|
+
APPROVAL = "approval"
|
|
38
|
+
SCENARIO_START = "scenario_start"
|
|
39
|
+
SCENARIO_END = "scenario_end"
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class TraceStatus(StrEnum):
|
|
43
|
+
"""Describe the overall outcome of one trace."""
|
|
44
|
+
|
|
45
|
+
RUNNING = "running"
|
|
46
|
+
PASSED = "passed"
|
|
47
|
+
FAILED = "failed"
|
|
48
|
+
ERROR = "error"
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class ToolCallEvent(BaseModel):
|
|
52
|
+
"""Capture one outbound tool call made by the agent."""
|
|
53
|
+
|
|
54
|
+
kind: ToolCallKind
|
|
55
|
+
method: str = Field(description="HTTP method or SQL verb")
|
|
56
|
+
target: str = Field(description="URL, table name, or address")
|
|
57
|
+
payload: dict[str, Any] = Field(default_factory=dict)
|
|
58
|
+
response_status: int | None = None
|
|
59
|
+
response_body: dict[str, Any] | None = None
|
|
60
|
+
duration_ms: float | None = None
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class LLMRequestEvent(BaseModel):
|
|
64
|
+
"""Capture one model request without storing raw prompt text."""
|
|
65
|
+
|
|
66
|
+
model: str
|
|
67
|
+
prompt_hash: str = Field(description="SHA-256 of the full prompt")
|
|
68
|
+
token_count_estimate: int | None = None
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class LLMResponseEvent(BaseModel):
|
|
72
|
+
"""Capture one model response summary."""
|
|
73
|
+
|
|
74
|
+
model: str
|
|
75
|
+
output_tokens: int
|
|
76
|
+
finish_reason: str
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class ApprovalEvent(BaseModel):
|
|
80
|
+
"""Capture one approval signal attached to the scenario or runtime."""
|
|
81
|
+
|
|
82
|
+
approval_id: str
|
|
83
|
+
actor: str
|
|
84
|
+
granted: bool = True
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class TraceEvent(BaseModel):
|
|
88
|
+
"""Represent one timestamped event in the trace timeline."""
|
|
89
|
+
|
|
90
|
+
event_id: str
|
|
91
|
+
kind: TraceEventKind
|
|
92
|
+
timestamp_ms: float = Field(default_factory=lambda: time.time() * 1000)
|
|
93
|
+
parent_event_id: str | None = None
|
|
94
|
+
tool_call: ToolCallEvent | None = None
|
|
95
|
+
llm_request: LLMRequestEvent | None = None
|
|
96
|
+
llm_response: LLMResponseEvent | None = None
|
|
97
|
+
approval: ApprovalEvent | None = None
|
|
98
|
+
metadata: dict[str, Any] = Field(default_factory=dict)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class TraceMetrics(BaseModel):
|
|
102
|
+
"""Store aggregate metrics computed from all trace events."""
|
|
103
|
+
|
|
104
|
+
total_tool_calls: int = 0
|
|
105
|
+
total_llm_calls: int = 0
|
|
106
|
+
total_tokens_used: int = 0
|
|
107
|
+
total_duration_ms: float = 0.0
|
|
108
|
+
tool_call_breakdown: dict[str, int] = Field(default_factory=dict)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
class PolicyResult(BaseModel):
|
|
112
|
+
"""Persist the outcome of one policy assertion on the trace."""
|
|
113
|
+
|
|
114
|
+
policy_id: str
|
|
115
|
+
evaluator: str
|
|
116
|
+
passed: bool
|
|
117
|
+
message: str = ""
|
|
118
|
+
details: dict[str, Any] = Field(default_factory=dict)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
class MutationSummary(BaseModel):
|
|
122
|
+
"""Summarize mutating tool calls across the run."""
|
|
123
|
+
|
|
124
|
+
total_mutations: int = 0
|
|
125
|
+
by_target: dict[str, int] = Field(default_factory=dict)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
class DeterminismMetadata(BaseModel):
|
|
129
|
+
"""Record replay-related metadata used for deterministic comparison."""
|
|
130
|
+
|
|
131
|
+
fixture_hash: str | None = None
|
|
132
|
+
replay_key: str | None = None
|
|
133
|
+
baseline_trace_id: str | None = None
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
class TraceEvaluation(BaseModel):
|
|
137
|
+
"""Persist the final evaluation outcome for one trace."""
|
|
138
|
+
|
|
139
|
+
passed: bool
|
|
140
|
+
ase_score: float
|
|
141
|
+
total: int
|
|
142
|
+
passed_count: int
|
|
143
|
+
failed_count: int
|
|
144
|
+
failing_evaluators: list[str] = Field(default_factory=list)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
class RuntimeProvenance(BaseModel):
|
|
148
|
+
"""Describe which ASE execution path produced the trace."""
|
|
149
|
+
|
|
150
|
+
mode: str
|
|
151
|
+
framework: str | None = None
|
|
152
|
+
framework_version: str | None = None
|
|
153
|
+
adapter_name: str | None = None
|
|
154
|
+
adapter_version: str | None = None
|
|
155
|
+
conformance_bundle_version: str | None = None
|
|
156
|
+
event_source: str | None = None
|
|
157
|
+
metadata: dict[str, Any] = Field(default_factory=dict)
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
class AdapterMetadata(BaseModel):
|
|
161
|
+
"""Describe the adapter or external runtime that produced the trace."""
|
|
162
|
+
|
|
163
|
+
name: str
|
|
164
|
+
transport: str
|
|
165
|
+
framework: str | None = None
|
|
166
|
+
language: str | None = None
|
|
167
|
+
version: str | None = None
|
|
168
|
+
source: str | None = None
|
|
169
|
+
metadata: dict[str, Any] = Field(default_factory=dict)
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
class AgentGraphNode(BaseModel):
|
|
173
|
+
"""Represent one node in a multi-agent execution graph."""
|
|
174
|
+
|
|
175
|
+
agent_id: str
|
|
176
|
+
name: str | None = None
|
|
177
|
+
role: str | None = None
|
|
178
|
+
parent_agent_id: str | None = None
|
|
179
|
+
metadata: dict[str, Any] = Field(default_factory=dict)
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
class AgentGraph(BaseModel):
|
|
183
|
+
"""Store graph metadata for multi-agent runs."""
|
|
184
|
+
|
|
185
|
+
nodes: list[AgentGraphNode] = Field(default_factory=list)
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
class SessionTraceEvent(BaseModel):
|
|
189
|
+
"""Capture one session read or write checkpoint."""
|
|
190
|
+
|
|
191
|
+
session_id: str
|
|
192
|
+
operation: str
|
|
193
|
+
timestamp_ms: float = Field(default_factory=lambda: time.time() * 1000)
|
|
194
|
+
agent_id: str | None = None
|
|
195
|
+
key: str | None = None
|
|
196
|
+
metadata: dict[str, Any] = Field(default_factory=dict)
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
class HandoffEdge(BaseModel):
|
|
200
|
+
"""Capture one delegation edge between two agents."""
|
|
201
|
+
|
|
202
|
+
from_agent_id: str
|
|
203
|
+
to_agent_id: str
|
|
204
|
+
timestamp_ms: float = Field(default_factory=lambda: time.time() * 1000)
|
|
205
|
+
label: str | None = None
|
|
206
|
+
metadata: dict[str, Any] = Field(default_factory=dict)
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
class ExternalTraceRef(BaseModel):
|
|
210
|
+
"""Reference an external trace in another system."""
|
|
211
|
+
|
|
212
|
+
system: str
|
|
213
|
+
trace_id: str
|
|
214
|
+
url: str | None = None
|
|
215
|
+
metadata: dict[str, Any] = Field(default_factory=dict)
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
class ProtocolEvent(BaseModel):
|
|
219
|
+
"""Preserve protocol-level events outside the tool timeline."""
|
|
220
|
+
|
|
221
|
+
protocol: str
|
|
222
|
+
event_type: str
|
|
223
|
+
timestamp_ms: float = Field(default_factory=lambda: time.time() * 1000)
|
|
224
|
+
agent_id: str | None = None
|
|
225
|
+
metadata: dict[str, Any] = Field(default_factory=dict)
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
class TraceCertificationLevel(StrEnum):
|
|
229
|
+
"""Represent the capability tier granted to a certified trace."""
|
|
230
|
+
|
|
231
|
+
CORE = "core"
|
|
232
|
+
STATEFUL = "stateful"
|
|
233
|
+
MULTI_AGENT = "multi_agent"
|
|
234
|
+
MCP = "mcp"
|
|
235
|
+
REALTIME = "realtime"
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
class Trace(BaseModel):
|
|
239
|
+
"""Complete record of a single agent scenario run."""
|
|
240
|
+
|
|
241
|
+
schema_version: int = TRACE_SCHEMA_VERSION
|
|
242
|
+
trace_id: str
|
|
243
|
+
scenario_id: str
|
|
244
|
+
scenario_name: str
|
|
245
|
+
started_at_ms: float = Field(default_factory=lambda: time.time() * 1000)
|
|
246
|
+
ended_at_ms: float | None = None
|
|
247
|
+
status: TraceStatus = TraceStatus.RUNNING
|
|
248
|
+
events: list[TraceEvent] = Field(default_factory=list)
|
|
249
|
+
metrics: TraceMetrics = Field(default_factory=TraceMetrics)
|
|
250
|
+
tags: dict[str, str] = Field(default_factory=dict)
|
|
251
|
+
mutation_summary: MutationSummary = Field(default_factory=MutationSummary)
|
|
252
|
+
policy_results: list[PolicyResult] = Field(default_factory=list)
|
|
253
|
+
determinism: DeterminismMetadata = Field(default_factory=DeterminismMetadata)
|
|
254
|
+
evaluation: TraceEvaluation | None = None
|
|
255
|
+
runtime_provenance: RuntimeProvenance | None = None
|
|
256
|
+
adapter_metadata: AdapterMetadata | None = None
|
|
257
|
+
agent_graph: AgentGraph = Field(default_factory=AgentGraph)
|
|
258
|
+
session_events: list[SessionTraceEvent] = Field(default_factory=list)
|
|
259
|
+
handoff_edges: list[HandoffEdge] = Field(default_factory=list)
|
|
260
|
+
external_trace_refs: list[ExternalTraceRef] = Field(default_factory=list)
|
|
261
|
+
protocol_events: list[ProtocolEvent] = Field(default_factory=list)
|
|
262
|
+
certification_level: TraceCertificationLevel | None = None
|
|
263
|
+
error_message: str | None = None
|
|
264
|
+
stderr_output: str | None = None
|
ase/trace/otel_export.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
"""OTEL-like export helpers for ASE traces."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import hashlib
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from ase.trace.model import Trace, TraceEventKind
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def to_otel_dict(trace: Trace) -> dict[str, Any]:
|
|
12
|
+
"""Export one ASE trace into a compact OTEL-like JSON structure."""
|
|
13
|
+
runtime = trace.runtime_provenance
|
|
14
|
+
attributes = {
|
|
15
|
+
"ase.trace_id": trace.trace_id,
|
|
16
|
+
"ase.scenario_id": trace.scenario_id,
|
|
17
|
+
"ase.status": trace.status.value,
|
|
18
|
+
"ase.runtime_mode": runtime.mode if runtime else "unknown",
|
|
19
|
+
"ase.framework": runtime.framework if runtime and runtime.framework else "",
|
|
20
|
+
}
|
|
21
|
+
if trace.evaluation is not None:
|
|
22
|
+
attributes["ase.evaluation.passed"] = trace.evaluation.passed
|
|
23
|
+
attributes["ase.evaluation.score"] = trace.evaluation.ase_score
|
|
24
|
+
spans = [_span_from_event(trace.trace_id, event) for event in trace.events]
|
|
25
|
+
return {
|
|
26
|
+
"resourceSpans": [
|
|
27
|
+
{
|
|
28
|
+
"resource": {"attributes": _kv(attributes)},
|
|
29
|
+
"scopeSpans": [{"scope": {"name": "ase"}, "spans": spans}],
|
|
30
|
+
}
|
|
31
|
+
]
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _span_from_event(trace_id: str, event: object) -> dict[str, Any]:
|
|
36
|
+
"""Convert one ASE event into an OTEL-like span."""
|
|
37
|
+
from ase.trace.model import TraceEvent
|
|
38
|
+
|
|
39
|
+
assert isinstance(event, TraceEvent)
|
|
40
|
+
span_name = event.kind.value
|
|
41
|
+
attributes: dict[str, Any] = {"ase.event_id": event.event_id}
|
|
42
|
+
if event.tool_call is not None:
|
|
43
|
+
attributes.update(
|
|
44
|
+
{
|
|
45
|
+
"ase.tool.kind": event.tool_call.kind.value,
|
|
46
|
+
"ase.tool.method": event.tool_call.method,
|
|
47
|
+
"ase.tool.target": event.tool_call.target,
|
|
48
|
+
}
|
|
49
|
+
)
|
|
50
|
+
return {
|
|
51
|
+
"traceId": _hash_hex(trace_id, 32),
|
|
52
|
+
"spanId": _hash_hex(event.event_id, 16),
|
|
53
|
+
"name": span_name,
|
|
54
|
+
"kind": _otel_kind(event.kind),
|
|
55
|
+
"startTimeUnixNano": int(event.timestamp_ms * 1_000_000),
|
|
56
|
+
"endTimeUnixNano": int(event.timestamp_ms * 1_000_000),
|
|
57
|
+
"attributes": _kv(attributes),
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _otel_kind(kind: TraceEventKind) -> int:
|
|
62
|
+
"""Map ASE event kinds onto OTEL span kinds."""
|
|
63
|
+
if kind == TraceEventKind.TOOL_CALL:
|
|
64
|
+
return 3
|
|
65
|
+
return 1
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _hash_hex(value: str, length: int) -> str:
|
|
69
|
+
"""Generate a deterministic hex identifier of the requested length."""
|
|
70
|
+
return hashlib.sha256(value.encode("utf-8")).hexdigest()[:length]
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _kv(values: dict[str, Any]) -> list[dict[str, Any]]:
|
|
74
|
+
"""Convert a flat mapping into OTEL-like key/value attributes."""
|
|
75
|
+
return [{"key": key, "value": {"stringValue": str(value)}} for key, value in values.items()]
|
ase/trace/otel_import.py
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
"""OTEL-like import helpers for ASE traces."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from ase.errors import OTelImportError
|
|
10
|
+
from ase.trace.builder import TraceBuilder
|
|
11
|
+
from ase.trace.model import AdapterMetadata, RuntimeProvenance, ToolCallEvent, ToolCallKind, Trace
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def read_otel_trace(path: Path) -> Trace:
|
|
15
|
+
"""Read one OTEL-like JSON trace and convert it into ASE format."""
|
|
16
|
+
try:
|
|
17
|
+
payload = json.loads(path.read_text(encoding="utf-8"))
|
|
18
|
+
except (OSError, json.JSONDecodeError) as exc:
|
|
19
|
+
raise OTelImportError(f"failed to read OTEL trace {path}: {exc}") from exc
|
|
20
|
+
return trace_from_otel_dict(payload)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def trace_from_otel_dict(data: dict[str, Any]) -> Trace:
|
|
24
|
+
"""Convert OTEL-like JSON into a native ASE trace."""
|
|
25
|
+
try:
|
|
26
|
+
resource_span = data["resourceSpans"][0]
|
|
27
|
+
scope_span = resource_span["scopeSpans"][0]
|
|
28
|
+
spans = scope_span.get("spans", [])
|
|
29
|
+
except (KeyError, IndexError, TypeError) as exc:
|
|
30
|
+
raise OTelImportError(f"invalid OTEL payload: {exc}") from exc
|
|
31
|
+
attrs = _attr_map(resource_span.get("resource", {}).get("attributes", []))
|
|
32
|
+
builder = TraceBuilder(
|
|
33
|
+
scenario_id=attrs.get("ase.scenario_id", "otel-import"),
|
|
34
|
+
scenario_name=attrs.get("ase.scenario_id", "otel-import"),
|
|
35
|
+
)
|
|
36
|
+
builder.set_runtime_provenance(
|
|
37
|
+
mode=attrs.get("ase.runtime_mode", "imported"),
|
|
38
|
+
framework=attrs.get("ase.framework") or None,
|
|
39
|
+
)
|
|
40
|
+
trace = builder.current_trace
|
|
41
|
+
trace.adapter_metadata = AdapterMetadata(
|
|
42
|
+
name="otel-import",
|
|
43
|
+
transport="otel-json",
|
|
44
|
+
framework=attrs.get("ase.framework") or None,
|
|
45
|
+
source="otel-import",
|
|
46
|
+
)
|
|
47
|
+
trace.runtime_provenance = RuntimeProvenance(
|
|
48
|
+
mode=attrs.get("ase.runtime_mode", "imported"),
|
|
49
|
+
framework=attrs.get("ase.framework") or None,
|
|
50
|
+
event_source="otel-json",
|
|
51
|
+
)
|
|
52
|
+
for span in spans:
|
|
53
|
+
span_attrs = _attr_map(span.get("attributes", []))
|
|
54
|
+
if "ase.tool.kind" not in span_attrs:
|
|
55
|
+
continue
|
|
56
|
+
builder.add_tool_call(
|
|
57
|
+
ToolCallEvent(
|
|
58
|
+
kind=_tool_kind(span_attrs.get("ase.tool.kind")),
|
|
59
|
+
method=span_attrs.get("ase.tool.method", "UNKNOWN"),
|
|
60
|
+
target=span_attrs.get("ase.tool.target", "unknown"),
|
|
61
|
+
)
|
|
62
|
+
)
|
|
63
|
+
return builder.finish()
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _attr_map(attributes: list[dict[str, Any]]) -> dict[str, str]:
|
|
67
|
+
"""Convert OTEL-like key/value attributes into a plain mapping."""
|
|
68
|
+
values: dict[str, str] = {}
|
|
69
|
+
for item in attributes:
|
|
70
|
+
key = item.get("key")
|
|
71
|
+
value = item.get("value", {})
|
|
72
|
+
if key is None:
|
|
73
|
+
continue
|
|
74
|
+
values[str(key)] = _attribute_value(value)
|
|
75
|
+
return values
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _tool_kind(value: str | None) -> ToolCallKind:
|
|
79
|
+
"""Parse imported tool kinds without crashing the full import."""
|
|
80
|
+
try:
|
|
81
|
+
return ToolCallKind(value or ToolCallKind.UNKNOWN.value)
|
|
82
|
+
except ValueError:
|
|
83
|
+
return ToolCallKind.UNKNOWN
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _attribute_value(value: dict[str, Any]) -> str:
|
|
87
|
+
"""Read OTEL-like scalar values without losing string attributes."""
|
|
88
|
+
if "stringValue" in value:
|
|
89
|
+
return str(value["stringValue"])
|
|
90
|
+
if "boolValue" in value:
|
|
91
|
+
return str(value["boolValue"])
|
|
92
|
+
if "intValue" in value:
|
|
93
|
+
return str(value["intValue"])
|
|
94
|
+
if "doubleValue" in value:
|
|
95
|
+
return str(value["doubleValue"])
|
|
96
|
+
return ""
|
ase/trace/redaction.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"""No-op redaction helpers for persisted traces."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from ase.trace.model import Trace
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def redact_trace(trace: Trace) -> Trace:
|
|
9
|
+
"""Return the trace unchanged until a richer redaction layer is restored."""
|
|
10
|
+
return trace
|
ase/trace/serializer.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""JSON serialization helpers for native ASE traces."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from ase.errors import TraceSchemaMigrationError, TraceSerializationError
|
|
9
|
+
from ase.trace.model import TRACE_SCHEMA_VERSION, Trace
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def serialize(trace: Trace) -> str:
|
|
13
|
+
"""Serialize one trace to stable JSON."""
|
|
14
|
+
try:
|
|
15
|
+
return json.dumps(trace.model_dump(mode="json"), indent=2)
|
|
16
|
+
except Exception as exc: # noqa: BLE001
|
|
17
|
+
raise TraceSerializationError(f"failed to serialize trace {trace.trace_id}: {exc}") from exc
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def deserialize(raw: str) -> Trace:
|
|
21
|
+
"""Parse one trace from native JSON."""
|
|
22
|
+
try:
|
|
23
|
+
payload = json.loads(raw)
|
|
24
|
+
except json.JSONDecodeError as exc:
|
|
25
|
+
raise TraceSerializationError(f"failed to parse trace JSON: {exc}") from exc
|
|
26
|
+
schema_version = payload.get("schema_version", TRACE_SCHEMA_VERSION)
|
|
27
|
+
if schema_version > TRACE_SCHEMA_VERSION:
|
|
28
|
+
raise TraceSchemaMigrationError(
|
|
29
|
+
f"trace schema {schema_version} is newer than supported {TRACE_SCHEMA_VERSION}"
|
|
30
|
+
)
|
|
31
|
+
try:
|
|
32
|
+
return Trace.model_validate(payload)
|
|
33
|
+
except Exception as exc: # noqa: BLE001
|
|
34
|
+
raise TraceSerializationError(f"failed to validate trace payload: {exc}") from exc
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def read_from_file(path: Path) -> Trace:
|
|
38
|
+
"""Read one native trace from disk."""
|
|
39
|
+
try:
|
|
40
|
+
return deserialize(path.read_text(encoding="utf-8"))
|
|
41
|
+
except OSError as exc:
|
|
42
|
+
raise TraceSerializationError(f"failed to read trace file {path}: {exc}") from exc
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def write_to_file(trace: Trace, path: Path) -> None:
|
|
46
|
+
"""Write one native trace to disk."""
|
|
47
|
+
try:
|
|
48
|
+
path.write_text(serialize(trace) + "\n", encoding="utf-8")
|
|
49
|
+
except OSError as exc:
|
|
50
|
+
raise TraceSerializationError(f"failed to write trace file {path}: {exc}") from exc
|