fableforge-agent-telemetry 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_telemetry/__init__.py +7 -0
- agent_telemetry/cli.py +212 -0
- agent_telemetry/collector.py +412 -0
- agent_telemetry/dashboard.py +462 -0
- agent_telemetry/error_tracker.py +149 -0
- agent_telemetry/models.py +110 -0
- agent_telemetry/storage.py +501 -0
- agent_telemetry/token_tracker.py +195 -0
- fableforge_agent_telemetry-0.1.0.dist-info/METADATA +21 -0
- fableforge_agent_telemetry-0.1.0.dist-info/RECORD +14 -0
- fableforge_agent_telemetry-0.1.0.dist-info/WHEEL +5 -0
- fableforge_agent_telemetry-0.1.0.dist-info/entry_points.txt +2 -0
- fableforge_agent_telemetry-0.1.0.dist-info/licenses/LICENSE +21 -0
- fableforge_agent_telemetry-0.1.0.dist-info/top_level.txt +1 -0
agent_telemetry/cli.py
ADDED
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
"""CLI for AgentTelemetry — analyze traces, view costs, start dashboard."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import sys
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Optional
|
|
8
|
+
|
|
9
|
+
import click
|
|
10
|
+
from rich.console import Console
|
|
11
|
+
from rich.table import Table
|
|
12
|
+
|
|
13
|
+
from agent_telemetry.collector import (
|
|
14
|
+
auto_detect_format,
|
|
15
|
+
calculate_metrics,
|
|
16
|
+
ingest_trace,
|
|
17
|
+
)
|
|
18
|
+
from agent_telemetry.error_tracker import classify_error, generate_error_report
|
|
19
|
+
from agent_telemetry.models import Span
|
|
20
|
+
from agent_telemetry.storage import TelemetryStorage
|
|
21
|
+
from agent_telemetry.token_tracker import estimate_cost, format_cost_table
|
|
22
|
+
|
|
23
|
+
console = Console()
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@click.group()
|
|
27
|
+
@click.version_option(version="0.1.0")
|
|
28
|
+
def cli() -> None:
|
|
29
|
+
"""AgentTelemetry — Datadog for AI agents."""
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@cli.command()
|
|
33
|
+
@click.argument("trace_file", type=click.Path(exists=True))
|
|
34
|
+
@click.option("--format", "fmt", type=click.Choice(["glint", "armand0e", "vfable", "auto"]), default="auto", help="Trace format")
|
|
35
|
+
@click.option("--store/--no-store", default=True, help="Store results in database")
|
|
36
|
+
def analyze(trace_file: str, fmt: str, store: bool) -> None:
|
|
37
|
+
"""Analyze a trace file and display metrics."""
|
|
38
|
+
if fmt == "auto":
|
|
39
|
+
fmt = auto_detect_format(trace_file)
|
|
40
|
+
console.print(f"[dim]Detected format: {fmt}[/dim]")
|
|
41
|
+
|
|
42
|
+
spans = ingest_trace(trace_file, fmt=fmt)
|
|
43
|
+
if not spans:
|
|
44
|
+
console.print("[red]No spans found in trace file.[/red]")
|
|
45
|
+
sys.exit(1)
|
|
46
|
+
|
|
47
|
+
console.print(f"[green]Loaded {len(spans)} spans[/green]")
|
|
48
|
+
|
|
49
|
+
metrics_result = calculate_metrics(spans)
|
|
50
|
+
session = metrics_result["session"]
|
|
51
|
+
tools = metrics_result["tools"]
|
|
52
|
+
|
|
53
|
+
console.print(f"\n[bold]Session:[/bold] {session.session_id}")
|
|
54
|
+
console.print(f"[bold]Model:[/bold] {session.model}")
|
|
55
|
+
console.print(f"[bold]Duration:[/bold] {session.duration_seconds:.1f}s")
|
|
56
|
+
|
|
57
|
+
metrics_table = Table(title="Session Metrics", show_header=True)
|
|
58
|
+
metrics_table.add_column("Metric", style="cyan")
|
|
59
|
+
metrics_table.add_column("Value", justify="right")
|
|
60
|
+
|
|
61
|
+
metrics_table.add_row("Total Tokens", f"{session.total_tokens:,}")
|
|
62
|
+
metrics_table.add_row("Total Cost", f"${session.total_cost:.6f}")
|
|
63
|
+
metrics_table.add_row("Tool Calls", str(session.tool_calls))
|
|
64
|
+
metrics_table.add_row("Errors", str(session.error_count))
|
|
65
|
+
metrics_table.add_row("Avg Duration", f"{session.avg_tool_duration_ms:.0f}ms")
|
|
66
|
+
metrics_table.add_row("P50 Duration", f"{session.p50_duration_ms:.0f}ms")
|
|
67
|
+
metrics_table.add_row("P95 Duration", f"{session.p95_duration_ms:.0f}ms")
|
|
68
|
+
metrics_table.add_row("P99 Duration", f"{session.p99_duration_ms:.0f}ms")
|
|
69
|
+
metrics_table.add_row("Cache Hit Rate", f"{session.cache_hit_rate:.1%}")
|
|
70
|
+
console.print(metrics_table)
|
|
71
|
+
|
|
72
|
+
tool_table = Table(title="Tool Metrics", show_header=True)
|
|
73
|
+
tool_table.add_column("Tool", style="cyan")
|
|
74
|
+
tool_table.add_column("Calls", justify="right")
|
|
75
|
+
tool_table.add_column("Avg ms", justify="right")
|
|
76
|
+
tool_table.add_column("P95 ms", justify="right")
|
|
77
|
+
tool_table.add_column("Error Rate", justify="right")
|
|
78
|
+
tool_table.add_column("Cost", justify="right", style="green")
|
|
79
|
+
|
|
80
|
+
for name, tm in sorted(tools.items()):
|
|
81
|
+
tool_table.add_row(
|
|
82
|
+
name,
|
|
83
|
+
str(tm.call_count),
|
|
84
|
+
f"{tm.avg_duration_ms:.0f}",
|
|
85
|
+
f"{tm.p95_duration_ms:.0f}",
|
|
86
|
+
f"{tm.error_rate:.1%}",
|
|
87
|
+
f"${tm.total_cost_usd:.6f}",
|
|
88
|
+
)
|
|
89
|
+
console.print(tool_table)
|
|
90
|
+
|
|
91
|
+
if store:
|
|
92
|
+
storage = TelemetryStorage()
|
|
93
|
+
storage.store_spans(spans)
|
|
94
|
+
storage.store_session_metrics(session)
|
|
95
|
+
console.print(f"\n[dim]Stored {len(spans)} spans in database[/dim]")
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
@cli.command()
|
|
99
|
+
@click.option("--host", default="127.0.0.1", help="Host to bind to")
|
|
100
|
+
@click.option("--port", default=8088, type=int, help="Port to bind to")
|
|
101
|
+
def dashboard(host: str, port: int) -> None:
|
|
102
|
+
"""Start the interactive dashboard server."""
|
|
103
|
+
import uvicorn
|
|
104
|
+
from agent_telemetry.dashboard import app
|
|
105
|
+
|
|
106
|
+
console.print(f"[green]Starting AgentTelemetry dashboard on http://{host}:{port}[/green]")
|
|
107
|
+
console.print("[dim]Press Ctrl+C to stop[/dim]")
|
|
108
|
+
uvicorn.run(app, host=host, port=port)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
@cli.command()
|
|
112
|
+
@click.argument("trace_file", type=click.Path(exists=True))
|
|
113
|
+
@click.option("--format", "fmt", type=click.Choice(["glint", "armand0e", "vfable", "auto"]), default="auto", help="Trace format")
|
|
114
|
+
def cost(trace_file: str, fmt: str) -> None:
|
|
115
|
+
"""Show cost breakdown for a trace file."""
|
|
116
|
+
if fmt == "auto":
|
|
117
|
+
fmt = auto_detect_format(trace_file)
|
|
118
|
+
console.print(f"[dim]Detected format: {fmt}[/dim]")
|
|
119
|
+
|
|
120
|
+
spans = ingest_trace(trace_file, fmt=fmt)
|
|
121
|
+
if not spans:
|
|
122
|
+
console.print("[red]No spans found in trace file.[/red]")
|
|
123
|
+
sys.exit(1)
|
|
124
|
+
|
|
125
|
+
models: dict[str, list[Span]] = {}
|
|
126
|
+
for s in spans:
|
|
127
|
+
models.setdefault(s.model, []).append(s)
|
|
128
|
+
|
|
129
|
+
breakdowns = []
|
|
130
|
+
for model, model_spans in sorted(models.items()):
|
|
131
|
+
bd = estimate_cost(
|
|
132
|
+
sum(s.input_tokens for s in model_spans),
|
|
133
|
+
sum(s.output_tokens for s in model_spans),
|
|
134
|
+
model,
|
|
135
|
+
sum(s.cache_read for s in model_spans),
|
|
136
|
+
sum(s.cache_creation for s in model_spans),
|
|
137
|
+
)
|
|
138
|
+
breakdowns.append(bd)
|
|
139
|
+
|
|
140
|
+
console.print(format_cost_table(breakdowns))
|
|
141
|
+
|
|
142
|
+
total = sum(b.total_cost for b in breakdowns)
|
|
143
|
+
console.print(f"\n[bold green]Grand Total: ${total:.6f}[/bold green]")
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
@cli.command()
|
|
147
|
+
@click.argument("trace_file", type=click.Path(exists=True))
|
|
148
|
+
@click.option("--format", "fmt", type=click.Choice(["glint", "armand0e", "vfable", "auto"]), default="auto", help="Trace format")
|
|
149
|
+
def errors(trace_file: str, fmt: str) -> None:
|
|
150
|
+
"""Show error report for a trace file."""
|
|
151
|
+
if fmt == "auto":
|
|
152
|
+
fmt = auto_detect_format(trace_file)
|
|
153
|
+
console.print(f"[dim]Detected format: {fmt}[/dim]")
|
|
154
|
+
|
|
155
|
+
spans = ingest_trace(trace_file, fmt=fmt)
|
|
156
|
+
if not spans:
|
|
157
|
+
console.print("[red]No spans found in trace file.[/red]")
|
|
158
|
+
sys.exit(1)
|
|
159
|
+
|
|
160
|
+
session_id = spans[0].session_id
|
|
161
|
+
report = generate_error_report(session_id, spans=spans)
|
|
162
|
+
|
|
163
|
+
console.print(f"\n[bold]Error Report: {session_id}[/bold]")
|
|
164
|
+
console.print(f"Total Errors: {report.total_errors}")
|
|
165
|
+
console.print(f"Recovered: {report.recovered_errors}")
|
|
166
|
+
console.print(f"Recovery Rate: {report.recovery_rate:.0%}")
|
|
167
|
+
|
|
168
|
+
if report.errors_by_type:
|
|
169
|
+
type_table = Table(title="Errors by Type", show_header=True)
|
|
170
|
+
type_table.add_column("Error Type", style="red")
|
|
171
|
+
type_table.add_column("Count", justify="right")
|
|
172
|
+
|
|
173
|
+
for etype, count in sorted(report.errors_by_type.items(), key=lambda x: -x[1]):
|
|
174
|
+
type_table.add_row(etype, str(count))
|
|
175
|
+
console.print(type_table)
|
|
176
|
+
|
|
177
|
+
if report.errors:
|
|
178
|
+
error_table = Table(title="Error Details", show_header=True)
|
|
179
|
+
error_table.add_column("Span ID", style="dim")
|
|
180
|
+
error_table.add_column("Type", style="red")
|
|
181
|
+
error_table.add_column("Tool", style="cyan")
|
|
182
|
+
error_table.add_column("Message", max_width=60)
|
|
183
|
+
error_table.add_column("Recovered")
|
|
184
|
+
|
|
185
|
+
for e in report.errors[:50]:
|
|
186
|
+
recovered = "[green]✓[/green]" if e.recovered else "[red]✗[/red]"
|
|
187
|
+
error_table.add_row(
|
|
188
|
+
e.span_id[:12] + "...",
|
|
189
|
+
e.error_type,
|
|
190
|
+
e.tool_name,
|
|
191
|
+
e.error_message[:60],
|
|
192
|
+
recovered,
|
|
193
|
+
)
|
|
194
|
+
console.print(error_table)
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
@cli.command()
|
|
198
|
+
@click.argument("text")
|
|
199
|
+
@click.option("--model", default="gpt-4", help="Model name for token counting")
|
|
200
|
+
def tokens(text: str, model: str) -> None:
|
|
201
|
+
"""Count tokens in a text string."""
|
|
202
|
+
from agent_telemetry.token_tracker import count_tokens
|
|
203
|
+
|
|
204
|
+
n = count_tokens(text, model)
|
|
205
|
+
console.print(f"[bold]{n:,}[/bold] tokens ({model})")
|
|
206
|
+
|
|
207
|
+
bd = estimate_cost(n, 0, model)
|
|
208
|
+
console.print(f"Input cost (no output): ${bd.input_cost:.6f}")
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
if __name__ == "__main__":
|
|
212
|
+
cli()
|
|
@@ -0,0 +1,412 @@
|
|
|
1
|
+
"""Trace data ingestion: parse multiple agent trace formats and extract spans."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import statistics
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from agent_telemetry.models import (
|
|
12
|
+
CostReport,
|
|
13
|
+
SessionMetrics,
|
|
14
|
+
Span,
|
|
15
|
+
SpanStatus,
|
|
16
|
+
ToolMetrics,
|
|
17
|
+
)
|
|
18
|
+
from agent_telemetry.token_tracker import estimate_cost
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def parse_glint_trace(jsonl_path: str | Path) -> list[dict[str, Any]]:
|
|
22
|
+
"""Parse Glint-Research format trace files.
|
|
23
|
+
|
|
24
|
+
Expected format per line:
|
|
25
|
+
{
|
|
26
|
+
"type": "tool_call" | "tool_result" | "message" | "error",
|
|
27
|
+
"timestamp": "2025-01-15T10:30:00Z",
|
|
28
|
+
"session_id": "...",
|
|
29
|
+
"span_id": "...",
|
|
30
|
+
"tool": "Bash" | "Read" | "Edit" | "Write" | ...,
|
|
31
|
+
"input": { ... },
|
|
32
|
+
"output": { ... },
|
|
33
|
+
"usage": {
|
|
34
|
+
"input_tokens": 1234,
|
|
35
|
+
"output_tokens": 567,
|
|
36
|
+
"cache_read_input_tokens": 100,
|
|
37
|
+
"cache_creation_input_tokens": 50
|
|
38
|
+
},
|
|
39
|
+
"duration_ms": 1234.5,
|
|
40
|
+
"model": "claude-3.5-sonnet",
|
|
41
|
+
"error": null | "error message"
|
|
42
|
+
}
|
|
43
|
+
"""
|
|
44
|
+
spans: list[dict[str, Any]] = []
|
|
45
|
+
path = Path(jsonl_path)
|
|
46
|
+
|
|
47
|
+
with path.open() as f:
|
|
48
|
+
for line in f:
|
|
49
|
+
line = line.strip()
|
|
50
|
+
if not line:
|
|
51
|
+
continue
|
|
52
|
+
try:
|
|
53
|
+
entry = json.loads(line)
|
|
54
|
+
except json.JSONDecodeError:
|
|
55
|
+
continue
|
|
56
|
+
|
|
57
|
+
if entry.get("type") not in ("tool_call", "message"):
|
|
58
|
+
continue
|
|
59
|
+
|
|
60
|
+
usage = entry.get("usage", {})
|
|
61
|
+
span = {
|
|
62
|
+
"span_id": entry.get("span_id", ""),
|
|
63
|
+
"session_id": entry.get("session_id", "unknown"),
|
|
64
|
+
"tool_name": entry.get("tool", entry.get("type", "unknown")),
|
|
65
|
+
"input_tokens": usage.get("input_tokens", 0),
|
|
66
|
+
"output_tokens": usage.get("output_tokens", 0),
|
|
67
|
+
"cache_read": usage.get("cache_read_input_tokens", 0),
|
|
68
|
+
"cache_creation": usage.get("cache_creation_input_tokens", 0),
|
|
69
|
+
"duration_ms": entry.get("duration_ms", 0.0),
|
|
70
|
+
"status": SpanStatus.ERROR if entry.get("error") else SpanStatus.OK,
|
|
71
|
+
"error": entry.get("error"),
|
|
72
|
+
"model": entry.get("model", "unknown"),
|
|
73
|
+
"timestamp": entry.get("timestamp"),
|
|
74
|
+
"metadata": {},
|
|
75
|
+
}
|
|
76
|
+
cost = estimate_cost(
|
|
77
|
+
span["input_tokens"],
|
|
78
|
+
span["output_tokens"],
|
|
79
|
+
span["model"],
|
|
80
|
+
span["cache_read"],
|
|
81
|
+
span["cache_creation"],
|
|
82
|
+
)
|
|
83
|
+
span["cost_usd"] = cost.total_cost
|
|
84
|
+
spans.append(span)
|
|
85
|
+
|
|
86
|
+
return spans
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def parse_armand0e_trace(jsonl_path: str | Path) -> list[dict[str, Any]]:
|
|
90
|
+
"""Parse armand0e format trace files.
|
|
91
|
+
|
|
92
|
+
Expected format per line:
|
|
93
|
+
{
|
|
94
|
+
"event": "invocation" | "response" | "error",
|
|
95
|
+
"id": "span-id",
|
|
96
|
+
"session": "session-id",
|
|
97
|
+
"timestamp": "2025-01-15T10:30:00Z",
|
|
98
|
+
"tool": {"name": "Bash", "input": {...}},
|
|
99
|
+
"result": {...},
|
|
100
|
+
"tokens": {"in": 1234, "out": 567, "cached": 100},
|
|
101
|
+
"latency_ms": 1234.5,
|
|
102
|
+
"model": "gpt-4o",
|
|
103
|
+
"error": null | "..."
|
|
104
|
+
}
|
|
105
|
+
"""
|
|
106
|
+
spans: list[dict[str, Any]] = []
|
|
107
|
+
path = Path(jsonl_path)
|
|
108
|
+
pending: dict[str, dict[str, Any]] = {}
|
|
109
|
+
|
|
110
|
+
with path.open() as f:
|
|
111
|
+
for line in f:
|
|
112
|
+
line = line.strip()
|
|
113
|
+
if not line:
|
|
114
|
+
continue
|
|
115
|
+
try:
|
|
116
|
+
entry = json.loads(line)
|
|
117
|
+
except json.JSONDecodeError:
|
|
118
|
+
continue
|
|
119
|
+
|
|
120
|
+
event = entry.get("event", "")
|
|
121
|
+
span_id = entry.get("id", "")
|
|
122
|
+
|
|
123
|
+
if event == "invocation":
|
|
124
|
+
pending[span_id] = {
|
|
125
|
+
"span_id": span_id,
|
|
126
|
+
"session_id": entry.get("session", "unknown"),
|
|
127
|
+
"tool_name": entry.get("tool", {}).get("name", "unknown"),
|
|
128
|
+
"timestamp": entry.get("timestamp"),
|
|
129
|
+
"model": entry.get("model", "unknown"),
|
|
130
|
+
"metadata": {"input": entry.get("tool", {}).get("input", {})},
|
|
131
|
+
}
|
|
132
|
+
elif event in ("response", "error"):
|
|
133
|
+
tokens = entry.get("tokens", {})
|
|
134
|
+
span_data = pending.pop(span_id, {
|
|
135
|
+
"span_id": span_id,
|
|
136
|
+
"session_id": entry.get("session", "unknown"),
|
|
137
|
+
"tool_name": "unknown",
|
|
138
|
+
"model": entry.get("model", "unknown"),
|
|
139
|
+
"timestamp": entry.get("timestamp"),
|
|
140
|
+
"metadata": {},
|
|
141
|
+
})
|
|
142
|
+
|
|
143
|
+
span_data["input_tokens"] = tokens.get("in", 0)
|
|
144
|
+
span_data["output_tokens"] = tokens.get("out", 0)
|
|
145
|
+
span_data["cache_read"] = tokens.get("cached", 0)
|
|
146
|
+
span_data["cache_creation"] = tokens.get("cache_write", 0)
|
|
147
|
+
span_data["duration_ms"] = entry.get("latency_ms", 0.0)
|
|
148
|
+
span_data["status"] = SpanStatus.ERROR if event == "error" else SpanStatus.OK
|
|
149
|
+
span_data["error"] = entry.get("error")
|
|
150
|
+
|
|
151
|
+
cost = estimate_cost(
|
|
152
|
+
span_data["input_tokens"],
|
|
153
|
+
span_data["output_tokens"],
|
|
154
|
+
span_data["model"],
|
|
155
|
+
span_data["cache_read"],
|
|
156
|
+
span_data["cache_creation"],
|
|
157
|
+
)
|
|
158
|
+
span_data["cost_usd"] = cost.total_cost
|
|
159
|
+
spans.append(span_data)
|
|
160
|
+
|
|
161
|
+
for span_data in pending.values():
|
|
162
|
+
span_data.setdefault("input_tokens", 0)
|
|
163
|
+
span_data.setdefault("output_tokens", 0)
|
|
164
|
+
span_data.setdefault("cache_read", 0)
|
|
165
|
+
span_data.setdefault("cache_creation", 0)
|
|
166
|
+
span_data.setdefault("duration_ms", 0.0)
|
|
167
|
+
span_data.setdefault("status", SpanStatus.OK)
|
|
168
|
+
span_data.setdefault("error", None)
|
|
169
|
+
span_data.setdefault("cost_usd", 0.0)
|
|
170
|
+
spans.append(span_data)
|
|
171
|
+
|
|
172
|
+
return spans
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def parse_vfable_trace(jsonl_path: str | Path) -> list[dict[str, Any]]:
|
|
176
|
+
"""Parse v-Fable format trace files.
|
|
177
|
+
|
|
178
|
+
Expected format per line:
|
|
179
|
+
{
|
|
180
|
+
"kind": "tool_use" | "tool_result" | "message_start" | "message_end",
|
|
181
|
+
"timestamp": "2025-01-15T10:30:00Z",
|
|
182
|
+
"session_id": "...",
|
|
183
|
+
"span_id": "...",
|
|
184
|
+
"parent_span_id": null | "...",
|
|
185
|
+
"tool_name": "Bash",
|
|
186
|
+
"tokens": {"prompt": 1234, "completion": 567, "cache_read": 100, "cache_write": 50},
|
|
187
|
+
"duration_ms": 1234.5,
|
|
188
|
+
"cost_usd": 0.0234,
|
|
189
|
+
"model": "claude-3.5-sonnet",
|
|
190
|
+
"status": "success" | "error" | "timeout",
|
|
191
|
+
"error_message": null | "..."
|
|
192
|
+
}
|
|
193
|
+
"""
|
|
194
|
+
spans: list[dict[str, Any]] = []
|
|
195
|
+
path = Path(jsonl_path)
|
|
196
|
+
|
|
197
|
+
with path.open() as f:
|
|
198
|
+
for line in f:
|
|
199
|
+
line = line.strip()
|
|
200
|
+
if not line:
|
|
201
|
+
continue
|
|
202
|
+
try:
|
|
203
|
+
entry = json.loads(line)
|
|
204
|
+
except json.JSONDecodeError:
|
|
205
|
+
continue
|
|
206
|
+
|
|
207
|
+
if entry.get("kind") not in ("tool_use", "tool_result", "message_end"):
|
|
208
|
+
continue
|
|
209
|
+
|
|
210
|
+
tokens = entry.get("tokens", {})
|
|
211
|
+
status_str = entry.get("status", "success")
|
|
212
|
+
|
|
213
|
+
if status_str == "error":
|
|
214
|
+
status = SpanStatus.ERROR
|
|
215
|
+
elif status_str == "timeout":
|
|
216
|
+
status = SpanStatus.TIMEOUT
|
|
217
|
+
else:
|
|
218
|
+
status = SpanStatus.OK
|
|
219
|
+
|
|
220
|
+
span = {
|
|
221
|
+
"span_id": entry.get("span_id", ""),
|
|
222
|
+
"session_id": entry.get("session_id", "unknown"),
|
|
223
|
+
"tool_name": entry.get("tool_name", "unknown"),
|
|
224
|
+
"input_tokens": tokens.get("prompt", 0),
|
|
225
|
+
"output_tokens": tokens.get("completion", 0),
|
|
226
|
+
"cache_read": tokens.get("cache_read", 0),
|
|
227
|
+
"cache_creation": tokens.get("cache_write", 0),
|
|
228
|
+
"duration_ms": entry.get("duration_ms", 0.0),
|
|
229
|
+
"cost_usd": entry.get("cost_usd", 0.0),
|
|
230
|
+
"status": status,
|
|
231
|
+
"error": entry.get("error_message"),
|
|
232
|
+
"model": entry.get("model", "unknown"),
|
|
233
|
+
"timestamp": entry.get("timestamp"),
|
|
234
|
+
"metadata": {"parent_span_id": entry.get("parent_span_id")},
|
|
235
|
+
}
|
|
236
|
+
spans.append(span)
|
|
237
|
+
|
|
238
|
+
return spans
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def extract_spans(trace_data: list[dict[str, Any]]) -> list[Span]:
|
|
242
|
+
"""Convert raw trace dicts into validated Span objects."""
|
|
243
|
+
spans = []
|
|
244
|
+
for d in trace_data:
|
|
245
|
+
ts = d.get("timestamp")
|
|
246
|
+
if isinstance(ts, str):
|
|
247
|
+
try:
|
|
248
|
+
ts = datetime.fromisoformat(ts.replace("Z", "+00:00"))
|
|
249
|
+
except (ValueError, AttributeError):
|
|
250
|
+
ts = None
|
|
251
|
+
elif not isinstance(ts, datetime):
|
|
252
|
+
ts = None
|
|
253
|
+
|
|
254
|
+
status_val = d.get("status", "ok")
|
|
255
|
+
if isinstance(status_val, str):
|
|
256
|
+
try:
|
|
257
|
+
status_val = SpanStatus(status_val)
|
|
258
|
+
except ValueError:
|
|
259
|
+
status_val = SpanStatus.OK
|
|
260
|
+
|
|
261
|
+
span = Span(
|
|
262
|
+
span_id=d.get("span_id", ""),
|
|
263
|
+
session_id=d.get("session_id", "unknown"),
|
|
264
|
+
tool_name=d.get("tool_name", "unknown"),
|
|
265
|
+
input_tokens=d.get("input_tokens", 0),
|
|
266
|
+
output_tokens=d.get("output_tokens", 0),
|
|
267
|
+
cache_read=d.get("cache_read", 0),
|
|
268
|
+
cache_creation=d.get("cache_creation", 0),
|
|
269
|
+
duration_ms=d.get("duration_ms", 0.0),
|
|
270
|
+
cost_usd=d.get("cost_usd", 0.0),
|
|
271
|
+
status=status_val,
|
|
272
|
+
error=d.get("error"),
|
|
273
|
+
model=d.get("model", "unknown"),
|
|
274
|
+
timestamp=ts,
|
|
275
|
+
metadata=d.get("metadata", {}),
|
|
276
|
+
)
|
|
277
|
+
spans.append(span)
|
|
278
|
+
return spans
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def _percentile(data: list[float], p: float) -> float:
|
|
282
|
+
if not data:
|
|
283
|
+
return 0.0
|
|
284
|
+
sorted_data = sorted(data)
|
|
285
|
+
k = (len(sorted_data) - 1) * p
|
|
286
|
+
f = int(k)
|
|
287
|
+
c = f + 1
|
|
288
|
+
if c >= len(sorted_data):
|
|
289
|
+
return sorted_data[-1]
|
|
290
|
+
return sorted_data[f] + (k - f) * (sorted_data[c] - sorted_data[f])
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def calculate_metrics(spans: list[Span]) -> dict[str, SessionMetrics | dict[str, ToolMetrics]]:
|
|
294
|
+
"""Calculate session-level and per-tool metrics from spans."""
|
|
295
|
+
if not spans:
|
|
296
|
+
return {
|
|
297
|
+
"session": SessionMetrics(session_id="unknown"),
|
|
298
|
+
"tools": {},
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
session_id = spans[0].session_id
|
|
302
|
+
models = {s.model for s in spans}
|
|
303
|
+
primary_model = next(iter(models)) if len(models) == 1 else "mixed"
|
|
304
|
+
|
|
305
|
+
total_input = sum(s.input_tokens for s in spans)
|
|
306
|
+
total_output = sum(s.output_tokens for s in spans)
|
|
307
|
+
total_cache_read = sum(s.cache_read for s in spans)
|
|
308
|
+
total_cache_creation = sum(s.cache_creation for s in spans)
|
|
309
|
+
total_tokens = total_input + total_output
|
|
310
|
+
total_cost = sum(s.cost_usd for s in spans)
|
|
311
|
+
error_count = sum(1 for s in spans if s.status == SpanStatus.ERROR)
|
|
312
|
+
durations = [s.duration_ms for s in spans if s.duration_ms > 0]
|
|
313
|
+
|
|
314
|
+
cache_hit_rate = total_cache_read / total_input if total_input > 0 else 0.0
|
|
315
|
+
|
|
316
|
+
timestamps = [s.timestamp for s in spans if s.timestamp]
|
|
317
|
+
started_at = min(timestamps) if timestamps else None
|
|
318
|
+
ended_at = max(timestamps) if timestamps else None
|
|
319
|
+
duration_seconds = 0.0
|
|
320
|
+
if started_at and ended_at:
|
|
321
|
+
duration_seconds = (ended_at - started_at).total_seconds()
|
|
322
|
+
|
|
323
|
+
session_metrics = SessionMetrics(
|
|
324
|
+
session_id=session_id,
|
|
325
|
+
total_tokens=total_tokens,
|
|
326
|
+
total_cost=total_cost,
|
|
327
|
+
tool_calls=len(spans),
|
|
328
|
+
error_count=error_count,
|
|
329
|
+
recovery_count=0,
|
|
330
|
+
duration_seconds=duration_seconds,
|
|
331
|
+
avg_tool_duration_ms=statistics.mean(durations) if durations else 0.0,
|
|
332
|
+
p50_duration_ms=_percentile(durations, 0.50),
|
|
333
|
+
p95_duration_ms=_percentile(durations, 0.95),
|
|
334
|
+
p99_duration_ms=_percentile(durations, 0.99),
|
|
335
|
+
cache_hit_rate=cache_hit_rate,
|
|
336
|
+
model=primary_model,
|
|
337
|
+
started_at=started_at,
|
|
338
|
+
ended_at=ended_at,
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
tools_by_name: dict[str, list[Span]] = {}
|
|
342
|
+
for s in spans:
|
|
343
|
+
tools_by_name.setdefault(s.tool_name, []).append(s)
|
|
344
|
+
|
|
345
|
+
tool_metrics: dict[str, ToolMetrics] = {}
|
|
346
|
+
for tool_name, tool_spans in tools_by_name.items():
|
|
347
|
+
t_durations = [s.duration_ms for s in tool_spans if s.duration_ms > 0]
|
|
348
|
+
t_errors = sum(1 for s in tool_spans if s.status == SpanStatus.ERROR)
|
|
349
|
+
|
|
350
|
+
tool_metrics[tool_name] = ToolMetrics(
|
|
351
|
+
tool_name=tool_name,
|
|
352
|
+
call_count=len(tool_spans),
|
|
353
|
+
avg_duration_ms=statistics.mean(t_durations) if t_durations else 0.0,
|
|
354
|
+
p50_duration_ms=_percentile(t_durations, 0.50),
|
|
355
|
+
p95_duration_ms=_percentile(t_durations, 0.95),
|
|
356
|
+
error_rate=t_errors / len(tool_spans) if tool_spans else 0.0,
|
|
357
|
+
total_input_tokens=sum(s.input_tokens for s in tool_spans),
|
|
358
|
+
total_output_tokens=sum(s.output_tokens for s in tool_spans),
|
|
359
|
+
total_cache_read=sum(s.cache_read for s in tool_spans),
|
|
360
|
+
total_cache_creation=sum(s.cache_creation for s in tool_spans),
|
|
361
|
+
total_cost_usd=sum(s.cost_usd for s in tool_spans),
|
|
362
|
+
token_distribution={
|
|
363
|
+
"input": sum(s.input_tokens for s in tool_spans),
|
|
364
|
+
"output": sum(s.output_tokens for s in tool_spans),
|
|
365
|
+
"cache_read": sum(s.cache_read for s in tool_spans),
|
|
366
|
+
"cache_creation": sum(s.cache_creation for s in tool_spans),
|
|
367
|
+
},
|
|
368
|
+
)
|
|
369
|
+
|
|
370
|
+
return {"session": session_metrics, "tools": tool_metrics}
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
def auto_detect_format(jsonl_path: str | Path) -> str:
|
|
374
|
+
"""Auto-detect the trace format from the first few lines."""
|
|
375
|
+
path = Path(jsonl_path)
|
|
376
|
+
with path.open() as f:
|
|
377
|
+
for line in f:
|
|
378
|
+
line = line.strip()
|
|
379
|
+
if not line:
|
|
380
|
+
continue
|
|
381
|
+
try:
|
|
382
|
+
entry = json.loads(line)
|
|
383
|
+
except json.JSONDecodeError:
|
|
384
|
+
continue
|
|
385
|
+
|
|
386
|
+
if "type" in entry and entry.get("type") in ("tool_call", "message", "tool_result"):
|
|
387
|
+
return "glint"
|
|
388
|
+
if "event" in entry and entry.get("event") in ("invocation", "response", "error"):
|
|
389
|
+
return "armand0e"
|
|
390
|
+
if "kind" in entry and entry.get("kind") in ("tool_use", "tool_result", "message_start", "message_end"):
|
|
391
|
+
return "vfable"
|
|
392
|
+
break
|
|
393
|
+
return "unknown"
|
|
394
|
+
|
|
395
|
+
|
|
396
|
+
def ingest_trace(jsonl_path: str | Path, fmt: str | None = None) -> list[Span]:
|
|
397
|
+
"""Ingest a trace file, auto-detecting format if not specified."""
|
|
398
|
+
if fmt is None:
|
|
399
|
+
fmt = auto_detect_format(jsonl_path)
|
|
400
|
+
|
|
401
|
+
parsers = {
|
|
402
|
+
"glint": parse_glint_trace,
|
|
403
|
+
"armand0e": parse_armand0e_trace,
|
|
404
|
+
"vfable": parse_vfable_trace,
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
parser = parsers.get(fmt)
|
|
408
|
+
if parser is None:
|
|
409
|
+
raise ValueError(f"Unknown trace format: {fmt}. Supported: {list(parsers.keys())}")
|
|
410
|
+
|
|
411
|
+
raw_spans = parser(jsonl_path)
|
|
412
|
+
return extract_spans(raw_spans)
|