@townco/debugger 0.1.23 → 0.1.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +10 -8
- package/src/App.tsx +13 -0
- package/src/comparison-db.test.ts +113 -0
- package/src/comparison-db.ts +332 -0
- package/src/components/DebuggerHeader.tsx +62 -2
- package/src/components/SessionTimelineView.tsx +173 -0
- package/src/components/SpanTimeline.tsx +6 -4
- package/src/components/UnifiedTimeline.tsx +691 -0
- package/src/db.ts +71 -0
- package/src/index.ts +2 -0
- package/src/lib/metrics.test.ts +51 -0
- package/src/lib/metrics.ts +136 -0
- package/src/lib/pricing.ts +23 -0
- package/src/lib/turnExtractor.ts +64 -23
- package/src/pages/ComparisonView.tsx +685 -0
- package/src/pages/SessionList.tsx +77 -56
- package/src/pages/SessionView.tsx +3 -64
- package/src/pages/TownHall.tsx +406 -0
- package/src/schemas.ts +15 -0
- package/src/server.ts +345 -12
- package/src/types.ts +87 -0
- package/tsconfig.json +14 -0
package/src/db.ts
CHANGED
|
@@ -80,4 +80,75 @@ export class DebuggerDb {
|
|
|
80
80
|
)
|
|
81
81
|
.all(limit, offset);
|
|
82
82
|
}
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* Get all spans that have a specific session ID in their attributes.
|
|
86
|
+
* This is more reliable than querying by trace.session_id because
|
|
87
|
+
* concurrent sessions can cause race conditions in trace association.
|
|
88
|
+
*/
|
|
89
|
+
getSpansBySessionAttribute(sessionId: string): Span[] {
|
|
90
|
+
// Use JSON extract to find spans where agent.session_id matches
|
|
91
|
+
return this.db
|
|
92
|
+
.query<Span, [string]>(
|
|
93
|
+
`
|
|
94
|
+
SELECT *
|
|
95
|
+
FROM spans
|
|
96
|
+
WHERE json_extract(attributes, '$."agent.session_id"') = ?
|
|
97
|
+
ORDER BY start_time_unix_nano ASC
|
|
98
|
+
`,
|
|
99
|
+
)
|
|
100
|
+
.all(sessionId);
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* Get the first user message for a session by querying span attributes.
|
|
105
|
+
* The user message is stored in the "user.message" attribute of adapter.prompt spans.
|
|
106
|
+
*/
|
|
107
|
+
getFirstUserMessageBySession(sessionId: string): string | null {
|
|
108
|
+
// Use LIKE for pattern matching since json_extract doesn't work reliably
|
|
109
|
+
// with keys containing dots
|
|
110
|
+
const span = this.db
|
|
111
|
+
.query<Span, [string]>(
|
|
112
|
+
`
|
|
113
|
+
SELECT *
|
|
114
|
+
FROM spans
|
|
115
|
+
WHERE name = 'adapter.prompt'
|
|
116
|
+
AND attributes LIKE '%"agent.session_id":"' || ? || '"%'
|
|
117
|
+
ORDER BY start_time_unix_nano ASC
|
|
118
|
+
LIMIT 1
|
|
119
|
+
`,
|
|
120
|
+
)
|
|
121
|
+
.get(sessionId);
|
|
122
|
+
|
|
123
|
+
if (!span || !span.attributes) return null;
|
|
124
|
+
|
|
125
|
+
try {
|
|
126
|
+
const attrs = JSON.parse(span.attributes);
|
|
127
|
+
return attrs["user.message"] || null;
|
|
128
|
+
} catch {
|
|
129
|
+
return null;
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
/**
|
|
134
|
+
* Get trace IDs for a session by querying span attributes.
|
|
135
|
+
* This is more reliable than querying by trace.session_id because
|
|
136
|
+
* concurrent sessions can cause race conditions in trace association.
|
|
137
|
+
* Returns trace IDs with their earliest span timestamp.
|
|
138
|
+
*/
|
|
139
|
+
getTraceIdsBySessionAttribute(
|
|
140
|
+
sessionId: string,
|
|
141
|
+
): Array<{ trace_id: string; start_time_unix_nano: number }> {
|
|
142
|
+
return this.db
|
|
143
|
+
.query<{ trace_id: string; start_time_unix_nano: number }, [string]>(
|
|
144
|
+
`
|
|
145
|
+
SELECT DISTINCT trace_id, MIN(start_time_unix_nano) as start_time_unix_nano
|
|
146
|
+
FROM spans
|
|
147
|
+
WHERE json_extract(attributes, '$."agent.session_id"') = ?
|
|
148
|
+
GROUP BY trace_id
|
|
149
|
+
ORDER BY start_time_unix_nano ASC
|
|
150
|
+
`,
|
|
151
|
+
)
|
|
152
|
+
.all(sessionId);
|
|
153
|
+
}
|
|
83
154
|
}
|
package/src/index.ts
CHANGED
|
@@ -14,12 +14,14 @@ const otlpPort = Number.parseInt(
|
|
|
14
14
|
);
|
|
15
15
|
const dbPath = process.env.DB_PATH ?? "./traces.db";
|
|
16
16
|
const agentName = process.env.AGENT_NAME ?? "Agent";
|
|
17
|
+
const agentServerUrl = process.env.AGENT_SERVER_URL ?? "http://localhost:3100";
|
|
17
18
|
|
|
18
19
|
const { server, otlpServer } = startDebuggerServer({
|
|
19
20
|
port,
|
|
20
21
|
otlpPort,
|
|
21
22
|
dbPath,
|
|
22
23
|
agentName,
|
|
24
|
+
agentServerUrl,
|
|
23
25
|
});
|
|
24
26
|
|
|
25
27
|
console.log(`OTLP server running at ${otlpServer.url}`);
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import { describe, expect, test } from "bun:test";
|
|
2
|
+
import type { Span } from "../types";
|
|
3
|
+
import { calculateCost, extractMetricsFromSpans } from "./metrics";
|
|
4
|
+
|
|
5
|
+
describe("metrics", () => {
|
|
6
|
+
describe("calculateCost", () => {
|
|
7
|
+
test("calculates cost correctly for known model", () => {
|
|
8
|
+
// Claude 3.5 Sonnet: $3 input, $15 output per 1M
|
|
9
|
+
const cost = calculateCost(
|
|
10
|
+
"claude-sonnet-4-5-20250929",
|
|
11
|
+
1_000_000,
|
|
12
|
+
1_000_000,
|
|
13
|
+
);
|
|
14
|
+
expect(cost).toBeCloseTo(18.0);
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
test("returns 0 for unknown model", () => {
|
|
18
|
+
const cost = calculateCost("unknown-model", 1000, 1000);
|
|
19
|
+
expect(cost).toBe(0);
|
|
20
|
+
});
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
describe("extractMetricsFromSpans", () => {
|
|
24
|
+
test("extracts tokens and tool calls", () => {
|
|
25
|
+
const spans: Span[] = [
|
|
26
|
+
{
|
|
27
|
+
name: "gen_ai.generate",
|
|
28
|
+
attributes: JSON.stringify({
|
|
29
|
+
"gen_ai.usage.input_tokens": 100,
|
|
30
|
+
"gen_ai.usage.output_tokens": 50,
|
|
31
|
+
}),
|
|
32
|
+
} as any,
|
|
33
|
+
{
|
|
34
|
+
name: "tool_call:search",
|
|
35
|
+
attributes: "{}",
|
|
36
|
+
} as any,
|
|
37
|
+
];
|
|
38
|
+
|
|
39
|
+
const metrics = extractMetricsFromSpans(
|
|
40
|
+
spans,
|
|
41
|
+
"claude-sonnet-4-5-20250929",
|
|
42
|
+
);
|
|
43
|
+
|
|
44
|
+
expect(metrics.inputTokens).toBe(100);
|
|
45
|
+
expect(metrics.outputTokens).toBe(50);
|
|
46
|
+
expect(metrics.totalTokens).toBe(150);
|
|
47
|
+
expect(metrics.toolCallCount).toBe(1);
|
|
48
|
+
expect(metrics.estimatedCost).toBeGreaterThan(0);
|
|
49
|
+
});
|
|
50
|
+
});
|
|
51
|
+
});
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
import type { SessionMetrics, Span, Trace } from "../types";
|
|
2
|
+
import { getModelPricing } from "./pricing";
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Calculate estimated cost based on model and token counts
|
|
6
|
+
*/
|
|
7
|
+
export function calculateCost(
|
|
8
|
+
model: string,
|
|
9
|
+
inputTokens: number,
|
|
10
|
+
outputTokens: number,
|
|
11
|
+
): number {
|
|
12
|
+
const pricing = getModelPricing(model);
|
|
13
|
+
if (!pricing) {
|
|
14
|
+
// Unknown model, return 0
|
|
15
|
+
return 0;
|
|
16
|
+
}
|
|
17
|
+
return (
|
|
18
|
+
(inputTokens * pricing.input + outputTokens * pricing.output) / 1_000_000
|
|
19
|
+
);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Extract metrics from trace spans
|
|
24
|
+
*/
|
|
25
|
+
export function extractMetricsFromSpans(
|
|
26
|
+
spans: Span[],
|
|
27
|
+
model: string,
|
|
28
|
+
): Omit<SessionMetrics, "durationMs"> {
|
|
29
|
+
let inputTokens = 0;
|
|
30
|
+
let outputTokens = 0;
|
|
31
|
+
let toolCallCount = 0;
|
|
32
|
+
|
|
33
|
+
for (const span of spans) {
|
|
34
|
+
const attrs = span.attributes ? JSON.parse(span.attributes) : {};
|
|
35
|
+
|
|
36
|
+
// Extract token usage from gen_ai attributes
|
|
37
|
+
if (attrs["gen_ai.usage.input_tokens"]) {
|
|
38
|
+
inputTokens += attrs["gen_ai.usage.input_tokens"];
|
|
39
|
+
}
|
|
40
|
+
if (attrs["gen_ai.usage.output_tokens"]) {
|
|
41
|
+
outputTokens += attrs["gen_ai.usage.output_tokens"];
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// Count tool calls
|
|
45
|
+
if (span.name.includes("tool_call") || span.name.startsWith("tool:")) {
|
|
46
|
+
toolCallCount++;
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
const totalTokens = inputTokens + outputTokens;
|
|
51
|
+
const estimatedCost = calculateCost(model, inputTokens, outputTokens);
|
|
52
|
+
|
|
53
|
+
return {
|
|
54
|
+
inputTokens,
|
|
55
|
+
outputTokens,
|
|
56
|
+
totalTokens,
|
|
57
|
+
estimatedCost,
|
|
58
|
+
toolCallCount,
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Extract full session metrics from traces
|
|
64
|
+
*/
|
|
65
|
+
export function extractSessionMetrics(
|
|
66
|
+
traces: Trace[],
|
|
67
|
+
spans: Span[],
|
|
68
|
+
model: string,
|
|
69
|
+
): SessionMetrics {
|
|
70
|
+
// Calculate total duration from traces
|
|
71
|
+
let minStartTime = Number.MAX_SAFE_INTEGER;
|
|
72
|
+
let maxEndTime = 0;
|
|
73
|
+
|
|
74
|
+
for (const trace of traces) {
|
|
75
|
+
if (trace.start_time_unix_nano < minStartTime) {
|
|
76
|
+
minStartTime = trace.start_time_unix_nano;
|
|
77
|
+
}
|
|
78
|
+
if (trace.end_time_unix_nano > maxEndTime) {
|
|
79
|
+
maxEndTime = trace.end_time_unix_nano;
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
const durationMs =
|
|
84
|
+
minStartTime < Number.MAX_SAFE_INTEGER
|
|
85
|
+
? (maxEndTime - minStartTime) / 1_000_000
|
|
86
|
+
: 0;
|
|
87
|
+
|
|
88
|
+
// Extract token metrics from spans
|
|
89
|
+
const tokenMetrics = extractMetricsFromSpans(spans, model);
|
|
90
|
+
|
|
91
|
+
return {
|
|
92
|
+
durationMs,
|
|
93
|
+
...tokenMetrics,
|
|
94
|
+
};
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Format duration in a human-readable way
|
|
99
|
+
*/
|
|
100
|
+
export function formatDuration(ms: number | undefined): string {
|
|
101
|
+
if (ms === undefined || ms === null) {
|
|
102
|
+
return "0ms";
|
|
103
|
+
}
|
|
104
|
+
if (ms < 1000) {
|
|
105
|
+
return `${Math.round(ms)}ms`;
|
|
106
|
+
}
|
|
107
|
+
if (ms < 60000) {
|
|
108
|
+
return `${(ms / 1000).toFixed(1)}s`;
|
|
109
|
+
}
|
|
110
|
+
const minutes = Math.floor(ms / 60000);
|
|
111
|
+
const seconds = ((ms % 60000) / 1000).toFixed(0);
|
|
112
|
+
return `${minutes}m ${seconds}s`;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Format cost in USD
|
|
117
|
+
*/
|
|
118
|
+
export function formatCost(cost: number | undefined): string {
|
|
119
|
+
if (cost === undefined || cost === null) {
|
|
120
|
+
return "$0.00";
|
|
121
|
+
}
|
|
122
|
+
if (cost < 0.01) {
|
|
123
|
+
return `$${cost.toFixed(4)}`;
|
|
124
|
+
}
|
|
125
|
+
return `$${cost.toFixed(2)}`;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
/**
|
|
129
|
+
* Format token count with commas
|
|
130
|
+
*/
|
|
131
|
+
export function formatTokens(tokens: number | undefined): string {
|
|
132
|
+
if (tokens === undefined || tokens === null) {
|
|
133
|
+
return "0";
|
|
134
|
+
}
|
|
135
|
+
return tokens.toLocaleString();
|
|
136
|
+
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
export interface ModelPricing {
|
|
2
|
+
input: number; // Cost per 1M tokens
|
|
3
|
+
output: number; // Cost per 1M tokens
|
|
4
|
+
}
|
|
5
|
+
|
|
6
|
+
// Model pricing per 1M tokens (USD)
|
|
7
|
+
export const DEFAULT_MODEL_PRICING: Record<string, ModelPricing> = {
|
|
8
|
+
// Anthropic models
|
|
9
|
+
"claude-sonnet-4-5-20250929": { input: 3.0, output: 15.0 },
|
|
10
|
+
"claude-3-5-haiku-20241022": { input: 0.25, output: 1.25 },
|
|
11
|
+
"claude-opus-4-5-20251101": { input: 15.0, output: 75.0 },
|
|
12
|
+
// Google Gemini models
|
|
13
|
+
"gemini-2.0-flash": { input: 0.075, output: 0.3 },
|
|
14
|
+
"gemini-1.5-pro": { input: 1.25, output: 5.0 },
|
|
15
|
+
"gemini-1.5-flash": { input: 0.075, output: 0.3 },
|
|
16
|
+
};
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Get pricing for a specific model
|
|
20
|
+
*/
|
|
21
|
+
export function getModelPricing(model: string): ModelPricing | undefined {
|
|
22
|
+
return DEFAULT_MODEL_PRICING[model];
|
|
23
|
+
}
|
package/src/lib/turnExtractor.ts
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
import type { Log, Span } from "../types";
|
|
1
|
+
import type { AgentMessage, Log, Span } from "../types";
|
|
2
2
|
|
|
3
3
|
export interface TurnMessages {
|
|
4
4
|
userInput: string | null;
|
|
5
5
|
llmOutput: string | null;
|
|
6
|
+
agentMessages: AgentMessage[];
|
|
6
7
|
}
|
|
7
8
|
|
|
8
9
|
export interface TokenUsage {
|
|
@@ -42,6 +43,7 @@ export function extractTurnMessages(spans: Span[], logs?: Log[]): TurnMessages {
|
|
|
42
43
|
const result: TurnMessages = {
|
|
43
44
|
userInput: null,
|
|
44
45
|
llmOutput: null,
|
|
46
|
+
agentMessages: [],
|
|
45
47
|
};
|
|
46
48
|
|
|
47
49
|
// Extract user input from logs
|
|
@@ -69,31 +71,70 @@ export function extractTurnMessages(spans: Span[], logs?: Log[]): TurnMessages {
|
|
|
69
71
|
})
|
|
70
72
|
.sort((a, b) => a.start_time_unix_nano - b.start_time_unix_nano);
|
|
71
73
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
// Extract LLM output from last chat span
|
|
74
|
+
// Extract LLM output from ALL chat spans and tool calls
|
|
75
75
|
try {
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
.
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
76
|
+
// Find all tool call spans
|
|
77
|
+
const toolCallSpans = spans
|
|
78
|
+
.filter((span) => span.name === "agent.tool_call")
|
|
79
|
+
.sort((a, b) => a.start_time_unix_nano - b.start_time_unix_nano);
|
|
80
|
+
|
|
81
|
+
// Combine chat spans and tool calls, sorted by time
|
|
82
|
+
const allEvents = [
|
|
83
|
+
...chatSpans.map((s) => ({ span: s, type: "chat" as const })),
|
|
84
|
+
...toolCallSpans.map((s) => ({ span: s, type: "tool_call" as const })),
|
|
85
|
+
].sort((a, b) => a.span.end_time_unix_nano - b.span.end_time_unix_nano);
|
|
86
|
+
|
|
87
|
+
for (const event of allEvents) {
|
|
88
|
+
const attrs = parseAttributes(event.span.attributes);
|
|
89
|
+
|
|
90
|
+
if (event.type === "chat") {
|
|
91
|
+
const outputMessages = attrs["gen_ai.output.messages"];
|
|
92
|
+
|
|
93
|
+
if (outputMessages) {
|
|
94
|
+
const messages: Message[] =
|
|
95
|
+
typeof outputMessages === "string"
|
|
96
|
+
? JSON.parse(outputMessages)
|
|
97
|
+
: outputMessages;
|
|
98
|
+
|
|
99
|
+
// Look for assistant message - handle both "assistant" (OpenAI) and "ai" (LangChain) roles
|
|
100
|
+
const assistantMessage = messages
|
|
101
|
+
.filter((msg) => msg.role === "assistant" || msg.role === "ai")
|
|
102
|
+
.pop();
|
|
103
|
+
|
|
104
|
+
if (assistantMessage) {
|
|
105
|
+
const content = extractMessageContent(assistantMessage);
|
|
106
|
+
// Only include messages with actual text content (not empty or whitespace-only)
|
|
107
|
+
if (content && content.trim().length > 0) {
|
|
108
|
+
result.agentMessages.push({
|
|
109
|
+
content,
|
|
110
|
+
spanId: event.span.span_id,
|
|
111
|
+
timestamp: event.span.end_time_unix_nano,
|
|
112
|
+
type: "chat",
|
|
113
|
+
});
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
} else if (event.type === "tool_call") {
|
|
118
|
+
const toolName = attrs["tool.name"] as string;
|
|
119
|
+
if (toolName) {
|
|
120
|
+
result.agentMessages.push({
|
|
121
|
+
content: toolName,
|
|
122
|
+
spanId: event.span.span_id,
|
|
123
|
+
timestamp: event.span.end_time_unix_nano,
|
|
124
|
+
type: "tool_call",
|
|
125
|
+
toolName,
|
|
126
|
+
});
|
|
127
|
+
}
|
|
95
128
|
}
|
|
96
129
|
}
|
|
130
|
+
|
|
131
|
+
// Set llmOutput to the last chat message for backwards compatibility
|
|
132
|
+
const lastChatMessage = result.agentMessages
|
|
133
|
+
.filter((m) => m.type === "chat")
|
|
134
|
+
.pop();
|
|
135
|
+
if (lastChatMessage) {
|
|
136
|
+
result.llmOutput = lastChatMessage.content;
|
|
137
|
+
}
|
|
97
138
|
} catch (error) {
|
|
98
139
|
console.error("Failed to extract LLM output:", error);
|
|
99
140
|
}
|