@townco/debugger 0.1.29 → 0.1.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +4 -4
- package/src/analysis/analyzer.ts +38 -1
- package/src/analysis/schema.ts +24 -0
- package/src/analysis/types.ts +25 -0
- package/src/components/SessionAnalysisDialog.tsx +106 -21
- package/src/lib/metrics.ts +31 -7
- package/src/pages/ComparisonView.tsx +420 -102
- package/src/pages/FindSessions.tsx +28 -8
- package/src/server.ts +88 -2
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@townco/debugger",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.31",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"engines": {
|
|
6
6
|
"bun": ">=1.3.0"
|
|
@@ -22,8 +22,8 @@
|
|
|
22
22
|
"@radix-ui/react-select": "^2.2.6",
|
|
23
23
|
"@radix-ui/react-slot": "^1.2.3",
|
|
24
24
|
"@radix-ui/react-tabs": "^1.1.0",
|
|
25
|
-
"@townco/otlp-server": "0.1.
|
|
26
|
-
"@townco/ui": "0.1.
|
|
25
|
+
"@townco/otlp-server": "0.1.31",
|
|
26
|
+
"@townco/ui": "0.1.76",
|
|
27
27
|
"bun-plugin-tailwind": "^0.1.2",
|
|
28
28
|
"class-variance-authority": "^0.7.1",
|
|
29
29
|
"clsx": "^2.1.1",
|
|
@@ -35,7 +35,7 @@
|
|
|
35
35
|
"zod": "^4.1.13"
|
|
36
36
|
},
|
|
37
37
|
"devDependencies": {
|
|
38
|
-
"@townco/tsconfig": "0.1.
|
|
38
|
+
"@townco/tsconfig": "0.1.73",
|
|
39
39
|
"@types/bun": "latest",
|
|
40
40
|
"@types/react": "^19",
|
|
41
41
|
"@types/react-dom": "^19",
|
package/src/analysis/analyzer.ts
CHANGED
|
@@ -5,6 +5,8 @@
|
|
|
5
5
|
import Anthropic from "@anthropic-ai/sdk";
|
|
6
6
|
import { LLMAnalysisOutputSchema, SessionAnalysisSchema } from "./schema";
|
|
7
7
|
import type {
|
|
8
|
+
AnalysisMetrics,
|
|
9
|
+
DetailedToolCall,
|
|
8
10
|
LLMAnalysisOutput,
|
|
9
11
|
PreComputedFields,
|
|
10
12
|
SessionAnalysis,
|
|
@@ -168,12 +170,23 @@ function extractJSON(text: string): string {
|
|
|
168
170
|
return text.trim();
|
|
169
171
|
}
|
|
170
172
|
|
|
173
|
+
/**
|
|
174
|
+
* Options for session analysis
|
|
175
|
+
*/
|
|
176
|
+
export interface AnalyzeSessionOptions {
|
|
177
|
+
session: StoredSession;
|
|
178
|
+
metrics?: AnalysisMetrics;
|
|
179
|
+
toolCalls?: DetailedToolCall[];
|
|
180
|
+
}
|
|
181
|
+
|
|
171
182
|
/**
|
|
172
183
|
* Analyze a session using Claude
|
|
173
184
|
*/
|
|
174
185
|
export async function analyzeSession(
|
|
175
|
-
|
|
186
|
+
options: AnalyzeSessionOptions,
|
|
176
187
|
): Promise<SessionAnalysis> {
|
|
188
|
+
const { session, metrics, toolCalls } = options;
|
|
189
|
+
|
|
177
190
|
// 1. Pre-compute extractable fields
|
|
178
191
|
const preComputed = extractPreComputedFields(session);
|
|
179
192
|
|
|
@@ -206,6 +219,19 @@ export async function analyzeSession(
|
|
|
206
219
|
const parsed = JSON.parse(jsonText);
|
|
207
220
|
const llmOutput = LLMAnalysisOutputSchema.parse(parsed);
|
|
208
221
|
|
|
222
|
+
// Use provided metrics or create defaults
|
|
223
|
+
const sessionDurationMs = metrics?.durationMs ?? calculateDurationMs(session);
|
|
224
|
+
const analysisMetrics: AnalysisMetrics = metrics ?? {
|
|
225
|
+
inputTokens: 0,
|
|
226
|
+
outputTokens: 0,
|
|
227
|
+
totalTokens: 0,
|
|
228
|
+
estimatedCost: 0,
|
|
229
|
+
durationMs: sessionDurationMs,
|
|
230
|
+
};
|
|
231
|
+
|
|
232
|
+
// Use provided tool calls or empty array
|
|
233
|
+
const detailedToolCalls: DetailedToolCall[] = toolCalls ?? [];
|
|
234
|
+
|
|
209
235
|
// 6. Combine pre-computed and LLM data
|
|
210
236
|
const analysis: SessionAnalysis = {
|
|
211
237
|
session_id: session.sessionId,
|
|
@@ -222,14 +248,25 @@ export async function analyzeSession(
|
|
|
222
248
|
tools_used: preComputed.toolsUsed,
|
|
223
249
|
num_steps: preComputed.numSteps,
|
|
224
250
|
num_tool_calls: preComputed.numToolCalls,
|
|
251
|
+
tool_calls: detailedToolCalls,
|
|
225
252
|
},
|
|
226
253
|
outcome: {
|
|
227
254
|
status: llmOutput.status,
|
|
228
255
|
answer_type: llmOutput.answer_type,
|
|
229
256
|
assessment: llmOutput.assessment,
|
|
230
257
|
},
|
|
258
|
+
metrics: analysisMetrics,
|
|
231
259
|
};
|
|
232
260
|
|
|
233
261
|
// 7. Validate final schema
|
|
234
262
|
return SessionAnalysisSchema.parse(analysis);
|
|
235
263
|
}
|
|
264
|
+
|
|
265
|
+
/**
|
|
266
|
+
* Calculate duration from session timestamps
|
|
267
|
+
*/
|
|
268
|
+
function calculateDurationMs(session: StoredSession): number {
|
|
269
|
+
const startTime = new Date(session.metadata.createdAt).getTime();
|
|
270
|
+
const endTime = new Date(session.metadata.updatedAt).getTime();
|
|
271
|
+
return endTime - startTime;
|
|
272
|
+
}
|
package/src/analysis/schema.ts
CHANGED
|
@@ -40,6 +40,28 @@ export const LLMAnalysisOutputSchema = z.object({
|
|
|
40
40
|
.describe("Explanation of why the status and answer_type were chosen"),
|
|
41
41
|
});
|
|
42
42
|
|
|
43
|
+
/**
|
|
44
|
+
* Detailed tool call schema
|
|
45
|
+
*/
|
|
46
|
+
export const DetailedToolCallSchema = z.object({
|
|
47
|
+
name: z.string(),
|
|
48
|
+
input: z.unknown(),
|
|
49
|
+
output: z.unknown(),
|
|
50
|
+
startTimeUnixNano: z.number().optional(),
|
|
51
|
+
endTimeUnixNano: z.number().optional(),
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Metrics schema
|
|
56
|
+
*/
|
|
57
|
+
export const AnalysisMetricsSchema = z.object({
|
|
58
|
+
inputTokens: z.number(),
|
|
59
|
+
outputTokens: z.number(),
|
|
60
|
+
totalTokens: z.number(),
|
|
61
|
+
estimatedCost: z.number(),
|
|
62
|
+
durationMs: z.number(),
|
|
63
|
+
});
|
|
64
|
+
|
|
43
65
|
/**
|
|
44
66
|
* Complete session analysis schema
|
|
45
67
|
*/
|
|
@@ -58,10 +80,12 @@ export const SessionAnalysisSchema = z.object({
|
|
|
58
80
|
tools_used: z.array(z.string()),
|
|
59
81
|
num_steps: z.number(),
|
|
60
82
|
num_tool_calls: z.number(),
|
|
83
|
+
tool_calls: z.array(DetailedToolCallSchema),
|
|
61
84
|
}),
|
|
62
85
|
outcome: z.object({
|
|
63
86
|
status: OutcomeStatusSchema,
|
|
64
87
|
answer_type: AnswerTypeSchema,
|
|
65
88
|
assessment: z.string(),
|
|
66
89
|
}),
|
|
90
|
+
metrics: AnalysisMetricsSchema,
|
|
67
91
|
});
|
package/src/analysis/types.ts
CHANGED
|
@@ -80,6 +80,28 @@ export enum OutcomeStatus {
|
|
|
80
80
|
PARTIAL_SUCCESS = "PARTIAL_SUCCESS", // Some goals achieved
|
|
81
81
|
}
|
|
82
82
|
|
|
83
|
+
/**
|
|
84
|
+
* Detailed tool call with input/output
|
|
85
|
+
*/
|
|
86
|
+
export interface DetailedToolCall {
|
|
87
|
+
name: string;
|
|
88
|
+
input: unknown;
|
|
89
|
+
output: unknown;
|
|
90
|
+
startTimeUnixNano?: number | undefined;
|
|
91
|
+
endTimeUnixNano?: number | undefined;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* Session metrics (tokens, cost, etc.)
|
|
96
|
+
*/
|
|
97
|
+
export interface AnalysisMetrics {
|
|
98
|
+
inputTokens: number;
|
|
99
|
+
outputTokens: number;
|
|
100
|
+
totalTokens: number;
|
|
101
|
+
estimatedCost: number;
|
|
102
|
+
durationMs: number;
|
|
103
|
+
}
|
|
104
|
+
|
|
83
105
|
/**
|
|
84
106
|
* Complete session analysis result
|
|
85
107
|
*/
|
|
@@ -100,6 +122,7 @@ export interface SessionAnalysis {
|
|
|
100
122
|
tools_used: string[]; // Pre-computed from tool calls
|
|
101
123
|
num_steps: number; // Pre-computed (count assistant messages)
|
|
102
124
|
num_tool_calls: number; // Pre-computed
|
|
125
|
+
tool_calls: DetailedToolCall[]; // Detailed tool call info with args/results
|
|
103
126
|
};
|
|
104
127
|
|
|
105
128
|
outcome: {
|
|
@@ -107,6 +130,8 @@ export interface SessionAnalysis {
|
|
|
107
130
|
answer_type: AnswerType; // LLM-selected from enum
|
|
108
131
|
assessment: string; // LLM-generated explanation of status and answer_type
|
|
109
132
|
};
|
|
133
|
+
|
|
134
|
+
metrics: AnalysisMetrics; // Token counts, cost, duration
|
|
110
135
|
}
|
|
111
136
|
|
|
112
137
|
/**
|
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
import type { SessionAnalysis } from "../analysis/types";
|
|
1
|
+
import type { DetailedToolCall, SessionAnalysis } from "../analysis/types";
|
|
2
|
+
import { formatCost, formatDuration, formatTokens } from "../lib/metrics";
|
|
2
3
|
import { Dialog, DialogContent, DialogHeader, DialogTitle } from "./ui/dialog";
|
|
3
4
|
|
|
4
5
|
interface Props {
|
|
@@ -11,22 +12,9 @@ function formatDate(isoString: string): string {
|
|
|
11
12
|
return new Date(isoString).toLocaleString();
|
|
12
13
|
}
|
|
13
14
|
|
|
14
|
-
function
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
const durationMs = endTime - startTime;
|
|
18
|
-
|
|
19
|
-
const seconds = Math.floor(durationMs / 1000);
|
|
20
|
-
const minutes = Math.floor(seconds / 60);
|
|
21
|
-
const hours = Math.floor(minutes / 60);
|
|
22
|
-
|
|
23
|
-
if (hours > 0) {
|
|
24
|
-
return `${hours}h ${minutes % 60}m ${seconds % 60}s`;
|
|
25
|
-
}
|
|
26
|
-
if (minutes > 0) {
|
|
27
|
-
return `${minutes}m ${seconds % 60}s`;
|
|
28
|
-
}
|
|
29
|
-
return `${seconds}s`;
|
|
15
|
+
function formatToolTime(ns?: number): string {
|
|
16
|
+
if (!ns) return "";
|
|
17
|
+
return new Date(ns / 1_000_000).toLocaleTimeString();
|
|
30
18
|
}
|
|
31
19
|
|
|
32
20
|
function Section({
|
|
@@ -75,6 +63,49 @@ function Metric({ label, value }: { label: string; value: number }) {
|
|
|
75
63
|
);
|
|
76
64
|
}
|
|
77
65
|
|
|
66
|
+
function ToolCallDetails({ toolCalls }: { toolCalls: DetailedToolCall[] }) {
|
|
67
|
+
if (!toolCalls || toolCalls.length === 0) {
|
|
68
|
+
return <div className="text-xs text-muted-foreground">No tool calls</div>;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
return (
|
|
72
|
+
<div className="space-y-2">
|
|
73
|
+
{toolCalls.map((call, idx) => (
|
|
74
|
+
<details
|
|
75
|
+
key={`${call.name}-${call.startTimeUnixNano ?? idx}`}
|
|
76
|
+
className="rounded-md border px-3 py-2 bg-muted/50"
|
|
77
|
+
>
|
|
78
|
+
<summary className="text-xs font-medium cursor-pointer flex items-center justify-between">
|
|
79
|
+
<span>
|
|
80
|
+
{call.name}{" "}
|
|
81
|
+
{call.startTimeUnixNano ? (
|
|
82
|
+
<span className="text-muted-foreground">
|
|
83
|
+
@ {formatToolTime(call.startTimeUnixNano)}
|
|
84
|
+
</span>
|
|
85
|
+
) : null}
|
|
86
|
+
</span>
|
|
87
|
+
<span className="text-muted-foreground text-[11px]">view</span>
|
|
88
|
+
</summary>
|
|
89
|
+
<div className="mt-2 text-[11px] space-y-1 break-words">
|
|
90
|
+
<div>
|
|
91
|
+
<span className="font-semibold">Args:</span>{" "}
|
|
92
|
+
<pre className="break-words whitespace-pre-wrap bg-muted rounded p-2 mt-1 overflow-x-auto max-h-40">
|
|
93
|
+
{JSON.stringify(call.input, null, 2)}
|
|
94
|
+
</pre>
|
|
95
|
+
</div>
|
|
96
|
+
<div>
|
|
97
|
+
<span className="font-semibold">Result:</span>{" "}
|
|
98
|
+
<pre className="break-words whitespace-pre-wrap bg-muted rounded p-2 mt-1 overflow-x-auto max-h-40">
|
|
99
|
+
{JSON.stringify(call.output, null, 2)}
|
|
100
|
+
</pre>
|
|
101
|
+
</div>
|
|
102
|
+
</div>
|
|
103
|
+
</details>
|
|
104
|
+
))}
|
|
105
|
+
</div>
|
|
106
|
+
);
|
|
107
|
+
}
|
|
108
|
+
|
|
78
109
|
export function SessionAnalysisDialog({ open, onClose, analysis }: Props) {
|
|
79
110
|
return (
|
|
80
111
|
<Dialog open={open} onOpenChange={onClose}>
|
|
@@ -121,6 +152,16 @@ export function SessionAnalysisDialog({ open, onClose, analysis }: Props) {
|
|
|
121
152
|
</div>
|
|
122
153
|
</div>
|
|
123
154
|
)}
|
|
155
|
+
{/* Detailed Tool Calls */}
|
|
156
|
+
{analysis.trajectory.tool_calls &&
|
|
157
|
+
analysis.trajectory.tool_calls.length > 0 && (
|
|
158
|
+
<div className="space-y-2 pt-2">
|
|
159
|
+
<div className="text-xs font-medium text-muted-foreground">
|
|
160
|
+
Tool Call Details
|
|
161
|
+
</div>
|
|
162
|
+
<ToolCallDetails toolCalls={analysis.trajectory.tool_calls} />
|
|
163
|
+
</div>
|
|
164
|
+
)}
|
|
124
165
|
</Section>
|
|
125
166
|
|
|
126
167
|
{/* Outcome Section */}
|
|
@@ -132,16 +173,60 @@ export function SessionAnalysisDialog({ open, onClose, analysis }: Props) {
|
|
|
132
173
|
<Field label="Assessment" value={analysis.outcome.assessment} />
|
|
133
174
|
</Section>
|
|
134
175
|
|
|
176
|
+
{/* Metrics Section */}
|
|
177
|
+
{analysis.metrics && (
|
|
178
|
+
<Section title="Metrics">
|
|
179
|
+
<div className="grid grid-cols-5 gap-4">
|
|
180
|
+
<div className="space-y-1">
|
|
181
|
+
<div className="text-xs font-medium text-muted-foreground">
|
|
182
|
+
Duration
|
|
183
|
+
</div>
|
|
184
|
+
<div className="text-lg font-semibold">
|
|
185
|
+
{formatDuration(analysis.metrics.durationMs)}
|
|
186
|
+
</div>
|
|
187
|
+
</div>
|
|
188
|
+
<div className="space-y-1">
|
|
189
|
+
<div className="text-xs font-medium text-muted-foreground">
|
|
190
|
+
Input Tokens
|
|
191
|
+
</div>
|
|
192
|
+
<div className="text-lg font-semibold">
|
|
193
|
+
{formatTokens(analysis.metrics.inputTokens)}
|
|
194
|
+
</div>
|
|
195
|
+
</div>
|
|
196
|
+
<div className="space-y-1">
|
|
197
|
+
<div className="text-xs font-medium text-muted-foreground">
|
|
198
|
+
Output Tokens
|
|
199
|
+
</div>
|
|
200
|
+
<div className="text-lg font-semibold">
|
|
201
|
+
{formatTokens(analysis.metrics.outputTokens)}
|
|
202
|
+
</div>
|
|
203
|
+
</div>
|
|
204
|
+
<div className="space-y-1">
|
|
205
|
+
<div className="text-xs font-medium text-muted-foreground">
|
|
206
|
+
Total Tokens
|
|
207
|
+
</div>
|
|
208
|
+
<div className="text-lg font-semibold">
|
|
209
|
+
{formatTokens(analysis.metrics.totalTokens)}
|
|
210
|
+
</div>
|
|
211
|
+
</div>
|
|
212
|
+
<div className="space-y-1">
|
|
213
|
+
<div className="text-xs font-medium text-muted-foreground">
|
|
214
|
+
Estimated Cost
|
|
215
|
+
</div>
|
|
216
|
+
<div className="text-lg font-semibold text-green-600 dark:text-green-400">
|
|
217
|
+
{formatCost(analysis.metrics.estimatedCost)}
|
|
218
|
+
</div>
|
|
219
|
+
</div>
|
|
220
|
+
</div>
|
|
221
|
+
</Section>
|
|
222
|
+
)}
|
|
223
|
+
|
|
135
224
|
{/* Metadata Section */}
|
|
136
225
|
<Section title="Metadata">
|
|
137
226
|
<div className="grid grid-cols-2 gap-4">
|
|
138
227
|
<Field label="Started" value={formatDate(analysis.started_at)} />
|
|
139
228
|
<Field label="Ended" value={formatDate(analysis.ended_at)} />
|
|
140
229
|
</div>
|
|
141
|
-
<Field
|
|
142
|
-
label="Duration"
|
|
143
|
-
value={calculateDuration(analysis.started_at, analysis.ended_at)}
|
|
144
|
-
/>
|
|
145
230
|
<Field label="Agent" value={analysis.agent_name} />
|
|
146
231
|
<Field
|
|
147
232
|
label="Session ID"
|
package/src/lib/metrics.ts
CHANGED
|
@@ -136,10 +136,15 @@ export function extractMetricsFromSpans(
|
|
|
136
136
|
const totalTokens = inputTokens + outputTokens;
|
|
137
137
|
const estimatedCost = calculateCost(model, inputTokens, outputTokens);
|
|
138
138
|
// Dedupe tool calls using name + start time to avoid double counting when captured in multiple places
|
|
139
|
+
// Prefer entries with actual output over entries with null output
|
|
139
140
|
const deduped = new Map<string, ToolCall>();
|
|
140
141
|
for (const call of toolCalls) {
|
|
141
142
|
const key = `${call.name}-${call.startTimeUnixNano ?? ""}`;
|
|
142
|
-
|
|
143
|
+
const existing = deduped.get(key);
|
|
144
|
+
if (!existing) {
|
|
145
|
+
deduped.set(key, call);
|
|
146
|
+
} else if (existing.output == null && call.output != null) {
|
|
147
|
+
// Replace null-output entry with one that has actual output
|
|
143
148
|
deduped.set(key, call);
|
|
144
149
|
}
|
|
145
150
|
}
|
|
@@ -163,8 +168,10 @@ export function extractSessionMetrics(
|
|
|
163
168
|
spans: Span[],
|
|
164
169
|
model: string,
|
|
165
170
|
): SessionMetrics {
|
|
166
|
-
// Calculate total duration from traces
|
|
167
|
-
|
|
171
|
+
// Calculate total duration from traces first
|
|
172
|
+
// Note: Using Infinity instead of Number.MAX_SAFE_INTEGER because nanosecond
|
|
173
|
+
// timestamps exceed MAX_SAFE_INTEGER and JS number comparison doesn't work correctly
|
|
174
|
+
let minStartTime = Infinity;
|
|
168
175
|
let maxEndTime = 0;
|
|
169
176
|
|
|
170
177
|
for (const trace of traces) {
|
|
@@ -176,10 +183,27 @@ export function extractSessionMetrics(
|
|
|
176
183
|
}
|
|
177
184
|
}
|
|
178
185
|
|
|
179
|
-
|
|
180
|
-
minStartTime <
|
|
181
|
-
|
|
182
|
-
|
|
186
|
+
let durationMs =
|
|
187
|
+
minStartTime < Infinity ? (maxEndTime - minStartTime) / 1_000_000 : 0;
|
|
188
|
+
|
|
189
|
+
// If traces didn't give us duration, calculate from spans as fallback
|
|
190
|
+
if (durationMs === 0 && spans.length > 0) {
|
|
191
|
+
let spanMinStart = Infinity;
|
|
192
|
+
let spanMaxEnd = 0;
|
|
193
|
+
|
|
194
|
+
for (const span of spans) {
|
|
195
|
+
if (span.start_time_unix_nano < spanMinStart) {
|
|
196
|
+
spanMinStart = span.start_time_unix_nano;
|
|
197
|
+
}
|
|
198
|
+
if (span.end_time_unix_nano > spanMaxEnd) {
|
|
199
|
+
spanMaxEnd = span.end_time_unix_nano;
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
if (spanMinStart < Infinity) {
|
|
204
|
+
durationMs = (spanMaxEnd - spanMinStart) / 1_000_000;
|
|
205
|
+
}
|
|
206
|
+
}
|
|
183
207
|
|
|
184
208
|
// Extract token metrics from spans
|
|
185
209
|
const tokenMetrics = extractMetricsFromSpans(spans, model);
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { ChevronDown, ChevronUp, Loader2 } from "lucide-react";
|
|
1
2
|
import { useCallback, useEffect, useState } from "react";
|
|
2
3
|
import { Button } from "@/components/ui/button";
|
|
3
4
|
import {
|
|
@@ -7,6 +8,7 @@ import {
|
|
|
7
8
|
CardHeader,
|
|
8
9
|
CardTitle,
|
|
9
10
|
} from "@/components/ui/card";
|
|
11
|
+
import type { SessionAnalysis } from "../analysis/types";
|
|
10
12
|
import { DebuggerLayout } from "../components/DebuggerLayout";
|
|
11
13
|
import { formatCost, formatDuration, formatTokens } from "../lib/metrics";
|
|
12
14
|
import type { ComparisonConfig, ComparisonRun, SessionMetrics } from "../types";
|
|
@@ -33,6 +35,259 @@ const AGENT_SERVER_URL =
|
|
|
33
35
|
? window.location.origin.replace(":4000", ":3100")
|
|
34
36
|
: "http://localhost:3100";
|
|
35
37
|
|
|
38
|
+
// Expandable Session Analysis Panel
|
|
39
|
+
function SessionAnalysisPanel({
|
|
40
|
+
analysis,
|
|
41
|
+
isLoading,
|
|
42
|
+
isExpanded,
|
|
43
|
+
onToggle,
|
|
44
|
+
accentColor,
|
|
45
|
+
}: {
|
|
46
|
+
analysis: SessionAnalysis | null;
|
|
47
|
+
isLoading: boolean;
|
|
48
|
+
isExpanded: boolean;
|
|
49
|
+
onToggle: () => void;
|
|
50
|
+
accentColor: "blue" | "orange";
|
|
51
|
+
}) {
|
|
52
|
+
const colorClasses =
|
|
53
|
+
accentColor === "blue"
|
|
54
|
+
? "border-blue-200 dark:border-blue-800 bg-blue-50/50 dark:bg-blue-950/30"
|
|
55
|
+
: "border-orange-200 dark:border-orange-800 bg-orange-50/50 dark:bg-orange-950/30";
|
|
56
|
+
|
|
57
|
+
const headerColorClasses =
|
|
58
|
+
accentColor === "blue"
|
|
59
|
+
? "hover:bg-blue-100/50 dark:hover:bg-blue-900/30"
|
|
60
|
+
: "hover:bg-orange-100/50 dark:hover:bg-orange-900/30";
|
|
61
|
+
|
|
62
|
+
if (isLoading) {
|
|
63
|
+
return (
|
|
64
|
+
<div className={`border rounded-md p-3 ${colorClasses}`}>
|
|
65
|
+
<div className="flex items-center gap-2 text-xs text-muted-foreground">
|
|
66
|
+
<Loader2 className="w-3 h-3 animate-spin" />
|
|
67
|
+
Loading analysis...
|
|
68
|
+
</div>
|
|
69
|
+
</div>
|
|
70
|
+
);
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
if (!analysis) {
|
|
74
|
+
return null;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
return (
|
|
78
|
+
<div className={`border rounded-md overflow-hidden ${colorClasses}`}>
|
|
79
|
+
<button
|
|
80
|
+
type="button"
|
|
81
|
+
onClick={onToggle}
|
|
82
|
+
className={`w-full px-3 py-2 flex items-center justify-between text-left transition-colors ${headerColorClasses}`}
|
|
83
|
+
>
|
|
84
|
+
<div className="flex items-center gap-2">
|
|
85
|
+
<span className="text-xs font-semibold">Session Analysis</span>
|
|
86
|
+
<span
|
|
87
|
+
className={`text-[10px] px-1.5 py-0.5 rounded ${
|
|
88
|
+
analysis.outcome.status === "SUCCESS"
|
|
89
|
+
? "bg-green-100 text-green-700 dark:bg-green-900/50 dark:text-green-300"
|
|
90
|
+
: analysis.outcome.status === "FAILURE"
|
|
91
|
+
? "bg-red-100 text-red-700 dark:bg-red-900/50 dark:text-red-300"
|
|
92
|
+
: "bg-yellow-100 text-yellow-700 dark:bg-yellow-900/50 dark:text-yellow-300"
|
|
93
|
+
}`}
|
|
94
|
+
>
|
|
95
|
+
{analysis.outcome.status}
|
|
96
|
+
</span>
|
|
97
|
+
</div>
|
|
98
|
+
{isExpanded ? (
|
|
99
|
+
<ChevronUp className="w-4 h-4 text-muted-foreground" />
|
|
100
|
+
) : (
|
|
101
|
+
<ChevronDown className="w-4 h-4 text-muted-foreground" />
|
|
102
|
+
)}
|
|
103
|
+
</button>
|
|
104
|
+
|
|
105
|
+
{isExpanded && (
|
|
106
|
+
<div className="px-3 pb-3 space-y-3 text-xs">
|
|
107
|
+
{/* Task */}
|
|
108
|
+
<div>
|
|
109
|
+
<div className="font-semibold text-muted-foreground mb-1">
|
|
110
|
+
Task Summary
|
|
111
|
+
</div>
|
|
112
|
+
<div className="text-foreground">{analysis.task.task_summary}</div>
|
|
113
|
+
</div>
|
|
114
|
+
|
|
115
|
+
{/* Intent */}
|
|
116
|
+
<div className="flex items-center gap-2">
|
|
117
|
+
<span className="font-semibold text-muted-foreground">Intent:</span>
|
|
118
|
+
<span className="px-2 py-0.5 bg-primary/10 text-primary rounded text-[11px] font-medium">
|
|
119
|
+
{analysis.task.intent_type}
|
|
120
|
+
</span>
|
|
121
|
+
</div>
|
|
122
|
+
|
|
123
|
+
{/* Trajectory */}
|
|
124
|
+
<div>
|
|
125
|
+
<div className="font-semibold text-muted-foreground mb-1">
|
|
126
|
+
High Level Plan
|
|
127
|
+
</div>
|
|
128
|
+
<div className="text-foreground text-[11px] leading-relaxed">
|
|
129
|
+
{analysis.trajectory.high_level_plan}
|
|
130
|
+
</div>
|
|
131
|
+
</div>
|
|
132
|
+
|
|
133
|
+
{/* Outcome */}
|
|
134
|
+
<div>
|
|
135
|
+
<div className="font-semibold text-muted-foreground mb-1">
|
|
136
|
+
Assessment
|
|
137
|
+
</div>
|
|
138
|
+
<div className="text-foreground text-[11px] leading-relaxed">
|
|
139
|
+
{analysis.outcome.assessment}
|
|
140
|
+
</div>
|
|
141
|
+
</div>
|
|
142
|
+
|
|
143
|
+
{/* Answer Type */}
|
|
144
|
+
<div className="flex items-center gap-2">
|
|
145
|
+
<span className="font-semibold text-muted-foreground">
|
|
146
|
+
Answer Type:
|
|
147
|
+
</span>
|
|
148
|
+
<span className="px-2 py-0.5 bg-secondary text-secondary-foreground rounded text-[11px] font-medium">
|
|
149
|
+
{analysis.outcome.answer_type}
|
|
150
|
+
</span>
|
|
151
|
+
</div>
|
|
152
|
+
|
|
153
|
+
{/* Metrics Summary */}
|
|
154
|
+
{analysis.metrics && (
|
|
155
|
+
<div className="grid grid-cols-5 gap-2 pt-2 border-t border-border/50">
|
|
156
|
+
<div>
|
|
157
|
+
<div className="text-[10px] text-muted-foreground">
|
|
158
|
+
Duration
|
|
159
|
+
</div>
|
|
160
|
+
<div className="font-medium">
|
|
161
|
+
{formatDuration(analysis.metrics.durationMs)}
|
|
162
|
+
</div>
|
|
163
|
+
</div>
|
|
164
|
+
<div>
|
|
165
|
+
<div className="text-[10px] text-muted-foreground">Input</div>
|
|
166
|
+
<div className="font-medium">
|
|
167
|
+
{formatTokens(analysis.metrics.inputTokens)}
|
|
168
|
+
</div>
|
|
169
|
+
</div>
|
|
170
|
+
<div>
|
|
171
|
+
<div className="text-[10px] text-muted-foreground">Output</div>
|
|
172
|
+
<div className="font-medium">
|
|
173
|
+
{formatTokens(analysis.metrics.outputTokens)}
|
|
174
|
+
</div>
|
|
175
|
+
</div>
|
|
176
|
+
<div>
|
|
177
|
+
<div className="text-[10px] text-muted-foreground">Total</div>
|
|
178
|
+
<div className="font-medium">
|
|
179
|
+
{formatTokens(analysis.metrics.totalTokens)}
|
|
180
|
+
</div>
|
|
181
|
+
</div>
|
|
182
|
+
<div>
|
|
183
|
+
<div className="text-[10px] text-muted-foreground">Cost</div>
|
|
184
|
+
<div className="font-medium text-green-600 dark:text-green-400">
|
|
185
|
+
{formatCost(analysis.metrics.estimatedCost)}
|
|
186
|
+
</div>
|
|
187
|
+
</div>
|
|
188
|
+
</div>
|
|
189
|
+
)}
|
|
190
|
+
</div>
|
|
191
|
+
)}
|
|
192
|
+
</div>
|
|
193
|
+
);
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
// Collapsible Tool Calls Panel
|
|
197
|
+
function ToolCallsPanel({
|
|
198
|
+
toolCalls,
|
|
199
|
+
isExpanded,
|
|
200
|
+
onToggle,
|
|
201
|
+
accentColor,
|
|
202
|
+
}: {
|
|
203
|
+
toolCalls: SessionMetrics["toolCalls"];
|
|
204
|
+
isExpanded: boolean;
|
|
205
|
+
onToggle: () => void;
|
|
206
|
+
accentColor: "blue" | "orange";
|
|
207
|
+
}) {
|
|
208
|
+
const colorClasses =
|
|
209
|
+
accentColor === "blue"
|
|
210
|
+
? "border-blue-200 dark:border-blue-800 bg-blue-50/50 dark:bg-blue-950/30"
|
|
211
|
+
: "border-orange-200 dark:border-orange-800 bg-orange-50/50 dark:bg-orange-950/30";
|
|
212
|
+
|
|
213
|
+
const headerColorClasses =
|
|
214
|
+
accentColor === "blue"
|
|
215
|
+
? "hover:bg-blue-100/50 dark:hover:bg-blue-900/30"
|
|
216
|
+
: "hover:bg-orange-100/50 dark:hover:bg-orange-900/30";
|
|
217
|
+
|
|
218
|
+
const toolCallCount = toolCalls?.length ?? 0;
|
|
219
|
+
|
|
220
|
+
return (
|
|
221
|
+
<div className={`border rounded-md overflow-hidden ${colorClasses}`}>
|
|
222
|
+
<button
|
|
223
|
+
type="button"
|
|
224
|
+
onClick={onToggle}
|
|
225
|
+
className={`w-full px-3 py-2 flex items-center justify-between text-left transition-colors ${headerColorClasses}`}
|
|
226
|
+
>
|
|
227
|
+
<div className="flex items-center gap-2">
|
|
228
|
+
<span className="text-xs font-semibold">Tool Calls</span>
|
|
229
|
+
<span className="text-[10px] px-1.5 py-0.5 rounded bg-secondary text-secondary-foreground">
|
|
230
|
+
{toolCallCount}
|
|
231
|
+
</span>
|
|
232
|
+
</div>
|
|
233
|
+
{isExpanded ? (
|
|
234
|
+
<ChevronUp className="w-4 h-4 text-muted-foreground" />
|
|
235
|
+
) : (
|
|
236
|
+
<ChevronDown className="w-4 h-4 text-muted-foreground" />
|
|
237
|
+
)}
|
|
238
|
+
</button>
|
|
239
|
+
|
|
240
|
+
{isExpanded && (
|
|
241
|
+
<div className="px-3 pb-3">
|
|
242
|
+
{!toolCalls || toolCalls.length === 0 ? (
|
|
243
|
+
<div className="text-xs text-muted-foreground">No tool calls</div>
|
|
244
|
+
) : (
|
|
245
|
+
<div className="space-y-2">
|
|
246
|
+
{toolCalls.map((call, idx) => (
|
|
247
|
+
<details
|
|
248
|
+
key={`${call.name}-${call.startTimeUnixNano ?? idx}`}
|
|
249
|
+
className="rounded-md border px-3 py-2 bg-background/50"
|
|
250
|
+
>
|
|
251
|
+
<summary className="text-xs font-medium cursor-pointer flex items-center justify-between">
|
|
252
|
+
<span>
|
|
253
|
+
{call.name}{" "}
|
|
254
|
+
{call.startTimeUnixNano ? (
|
|
255
|
+
<span className="text-muted-foreground">
|
|
256
|
+
@{" "}
|
|
257
|
+
{new Date(
|
|
258
|
+
call.startTimeUnixNano / 1_000_000,
|
|
259
|
+
).toLocaleTimeString()}
|
|
260
|
+
</span>
|
|
261
|
+
) : null}
|
|
262
|
+
</span>
|
|
263
|
+
<span className="text-muted-foreground text-[11px]">
|
|
264
|
+
view
|
|
265
|
+
</span>
|
|
266
|
+
</summary>
|
|
267
|
+
<div className="mt-2 text-[11px] space-y-1 break-words">
|
|
268
|
+
<div>
|
|
269
|
+
<span className="font-semibold">Args:</span>{" "}
|
|
270
|
+
<pre className="break-words whitespace-pre-wrap bg-muted rounded p-2 mt-1 overflow-x-auto max-h-40">
|
|
271
|
+
{JSON.stringify(call.input, null, 2)}
|
|
272
|
+
</pre>
|
|
273
|
+
</div>
|
|
274
|
+
<div>
|
|
275
|
+
<span className="font-semibold">Result:</span>{" "}
|
|
276
|
+
<pre className="break-words whitespace-pre-wrap bg-muted rounded p-2 mt-1 overflow-x-auto max-h-40">
|
|
277
|
+
{JSON.stringify(call.output, null, 2)}
|
|
278
|
+
</pre>
|
|
279
|
+
</div>
|
|
280
|
+
</div>
|
|
281
|
+
</details>
|
|
282
|
+
))}
|
|
283
|
+
</div>
|
|
284
|
+
)}
|
|
285
|
+
</div>
|
|
286
|
+
)}
|
|
287
|
+
</div>
|
|
288
|
+
);
|
|
289
|
+
}
|
|
290
|
+
|
|
36
291
|
export function ComparisonView({ runId }: ComparisonViewProps) {
|
|
37
292
|
const [run, setRun] = useState<ComparisonRun | null>(null);
|
|
38
293
|
const [config, setConfig] = useState<ComparisonConfig | null>(null);
|
|
@@ -58,6 +313,28 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
|
|
|
58
313
|
const [isRunning, setIsRunning] = useState(false);
|
|
59
314
|
const [hasRun, setHasRun] = useState(false);
|
|
60
315
|
|
|
316
|
+
// Session analysis state
|
|
317
|
+
const [controlAnalysis, setControlAnalysis] =
|
|
318
|
+
useState<SessionAnalysis | null>(null);
|
|
319
|
+
const [variantAnalysis, setVariantAnalysis] =
|
|
320
|
+
useState<SessionAnalysis | null>(null);
|
|
321
|
+
const [controlAnalysisLoading, setControlAnalysisLoading] = useState(false);
|
|
322
|
+
const [variantAnalysisLoading, setVariantAnalysisLoading] = useState(false);
|
|
323
|
+
const [analysisExpanded, setAnalysisExpanded] = useState<{
|
|
324
|
+
control: boolean;
|
|
325
|
+
variant: boolean;
|
|
326
|
+
}>({
|
|
327
|
+
control: false,
|
|
328
|
+
variant: false,
|
|
329
|
+
});
|
|
330
|
+
const [toolCallsExpanded, setToolCallsExpanded] = useState<{
|
|
331
|
+
control: boolean;
|
|
332
|
+
variant: boolean;
|
|
333
|
+
}>({
|
|
334
|
+
control: false,
|
|
335
|
+
variant: false,
|
|
336
|
+
});
|
|
337
|
+
|
|
61
338
|
// Fetch comparison run details and restore saved messages
|
|
62
339
|
useEffect(() => {
|
|
63
340
|
Promise.all([
|
|
@@ -99,8 +376,10 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
|
|
|
99
376
|
}
|
|
100
377
|
}
|
|
101
378
|
|
|
102
|
-
// Fetch the config
|
|
103
|
-
return fetch(`/api/comparison-config`).then((res) =>
|
|
379
|
+
// Fetch the config by the run's configId (not the latest config!)
|
|
380
|
+
return fetch(`/api/comparison-config/${runData.configId}`).then((res) =>
|
|
381
|
+
res.json(),
|
|
382
|
+
);
|
|
104
383
|
})
|
|
105
384
|
.then((configData) => {
|
|
106
385
|
setConfig(configData);
|
|
@@ -489,6 +768,91 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
|
|
|
489
768
|
}
|
|
490
769
|
}, [run, config, runId]);
|
|
491
770
|
|
|
771
|
+
// Function to fetch existing or trigger new session analysis
|
|
772
|
+
const triggerAnalysis = useCallback(
|
|
773
|
+
async (sessionId: string, type: "control" | "variant") => {
|
|
774
|
+
const setLoading =
|
|
775
|
+
type === "control"
|
|
776
|
+
? setControlAnalysisLoading
|
|
777
|
+
: setVariantAnalysisLoading;
|
|
778
|
+
const setAnalysis =
|
|
779
|
+
type === "control" ? setControlAnalysis : setVariantAnalysis;
|
|
780
|
+
|
|
781
|
+
setLoading(true);
|
|
782
|
+
try {
|
|
783
|
+
// First try to fetch existing analysis from cache
|
|
784
|
+
const existingRes = await fetch(
|
|
785
|
+
`/api/session-analyses?sessionId=${sessionId}`,
|
|
786
|
+
);
|
|
787
|
+
if (existingRes.ok) {
|
|
788
|
+
const existingAnalysis = await existingRes.json();
|
|
789
|
+
if (existingAnalysis && !existingAnalysis.error) {
|
|
790
|
+
setAnalysis(existingAnalysis);
|
|
791
|
+
setAnalysisExpanded((prev) => ({ ...prev, [type]: true }));
|
|
792
|
+
return;
|
|
793
|
+
}
|
|
794
|
+
}
|
|
795
|
+
|
|
796
|
+
// No existing analysis, trigger new one
|
|
797
|
+
const res = await fetch(`/api/analyze-session/${sessionId}`, {
|
|
798
|
+
method: "POST",
|
|
799
|
+
});
|
|
800
|
+
if (res.ok) {
|
|
801
|
+
const analysis = await res.json();
|
|
802
|
+
setAnalysis(analysis);
|
|
803
|
+
// Auto-expand when analysis completes
|
|
804
|
+
setAnalysisExpanded((prev) => ({ ...prev, [type]: true }));
|
|
805
|
+
}
|
|
806
|
+
} catch (err) {
|
|
807
|
+
console.error(`Failed to analyze ${type} session:`, err);
|
|
808
|
+
} finally {
|
|
809
|
+
setLoading(false);
|
|
810
|
+
}
|
|
811
|
+
},
|
|
812
|
+
[],
|
|
813
|
+
);
|
|
814
|
+
|
|
815
|
+
// Auto-trigger analysis when sessions complete
|
|
816
|
+
useEffect(() => {
|
|
817
|
+
// Control session completed
|
|
818
|
+
if (
|
|
819
|
+
controlState.sessionId &&
|
|
820
|
+
!controlState.isStreaming &&
|
|
821
|
+
controlState.metrics &&
|
|
822
|
+
!controlAnalysis &&
|
|
823
|
+
!controlAnalysisLoading
|
|
824
|
+
) {
|
|
825
|
+
triggerAnalysis(controlState.sessionId, "control");
|
|
826
|
+
}
|
|
827
|
+
}, [
|
|
828
|
+
controlState.sessionId,
|
|
829
|
+
controlState.isStreaming,
|
|
830
|
+
controlState.metrics,
|
|
831
|
+
controlAnalysis,
|
|
832
|
+
controlAnalysisLoading,
|
|
833
|
+
triggerAnalysis,
|
|
834
|
+
]);
|
|
835
|
+
|
|
836
|
+
useEffect(() => {
|
|
837
|
+
// Variant session completed
|
|
838
|
+
if (
|
|
839
|
+
variantState.sessionId &&
|
|
840
|
+
!variantState.isStreaming &&
|
|
841
|
+
variantState.metrics &&
|
|
842
|
+
!variantAnalysis &&
|
|
843
|
+
!variantAnalysisLoading
|
|
844
|
+
) {
|
|
845
|
+
triggerAnalysis(variantState.sessionId, "variant");
|
|
846
|
+
}
|
|
847
|
+
}, [
|
|
848
|
+
variantState.sessionId,
|
|
849
|
+
variantState.isStreaming,
|
|
850
|
+
variantState.metrics,
|
|
851
|
+
variantAnalysis,
|
|
852
|
+
variantAnalysisLoading,
|
|
853
|
+
triggerAnalysis,
|
|
854
|
+
]);
|
|
855
|
+
|
|
492
856
|
if (loading) {
|
|
493
857
|
return (
|
|
494
858
|
<DebuggerLayout title="Comparison" showBackButton backHref="/town-hall">
|
|
@@ -555,54 +919,6 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
|
|
|
555
919
|
return config.dimensions.map((d) => d.replace("_", " ")).join(", ");
|
|
556
920
|
};
|
|
557
921
|
|
|
558
|
-
const formatToolTime = (ns?: number) => {
|
|
559
|
-
if (!ns) return "";
|
|
560
|
-
return new Date(ns / 1_000_000).toLocaleTimeString();
|
|
561
|
-
};
|
|
562
|
-
|
|
563
|
-
const renderToolCalls = (toolCalls?: SessionMetrics["toolCalls"]) => {
|
|
564
|
-
if (!toolCalls || toolCalls.length === 0) {
|
|
565
|
-
return <div className="text-xs text-muted-foreground">No tool calls</div>;
|
|
566
|
-
}
|
|
567
|
-
|
|
568
|
-
return (
|
|
569
|
-
<div className="space-y-2">
|
|
570
|
-
{toolCalls.map((call, idx) => (
|
|
571
|
-
<details
|
|
572
|
-
key={`${call.name}-${call.startTimeUnixNano ?? idx}`}
|
|
573
|
-
className="rounded-md border px-3 py-2 bg-muted/50"
|
|
574
|
-
>
|
|
575
|
-
<summary className="text-xs font-medium cursor-pointer flex items-center justify-between">
|
|
576
|
-
<span>
|
|
577
|
-
{call.name}{" "}
|
|
578
|
-
{call.startTimeUnixNano ? (
|
|
579
|
-
<span className="text-muted-foreground">
|
|
580
|
-
@ {formatToolTime(call.startTimeUnixNano)}
|
|
581
|
-
</span>
|
|
582
|
-
) : null}
|
|
583
|
-
</span>
|
|
584
|
-
<span className="text-muted-foreground text-[11px]">view</span>
|
|
585
|
-
</summary>
|
|
586
|
-
<div className="mt-2 text-[11px] space-y-1 break-words">
|
|
587
|
-
<div>
|
|
588
|
-
<span className="font-semibold">Args:</span>{" "}
|
|
589
|
-
<code className="break-words">
|
|
590
|
-
{JSON.stringify(call.input, null, 2)}
|
|
591
|
-
</code>
|
|
592
|
-
</div>
|
|
593
|
-
<div>
|
|
594
|
-
<span className="font-semibold">Result:</span>{" "}
|
|
595
|
-
<code className="break-words">
|
|
596
|
-
{JSON.stringify(call.output, null, 2)}
|
|
597
|
-
</code>
|
|
598
|
-
</div>
|
|
599
|
-
</div>
|
|
600
|
-
</details>
|
|
601
|
-
))}
|
|
602
|
-
</div>
|
|
603
|
-
);
|
|
604
|
-
};
|
|
605
|
-
|
|
606
922
|
return (
|
|
607
923
|
<DebuggerLayout title="Comparison" showBackButton backHref="/town-hall">
|
|
608
924
|
<div className="container mx-auto p-4 h-[calc(100vh-4rem)] flex flex-col overflow-hidden">
|
|
@@ -703,33 +1019,34 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
|
|
|
703
1019
|
</div>
|
|
704
1020
|
)}
|
|
705
1021
|
</CardContent>
|
|
706
|
-
{/*
|
|
1022
|
+
{/* Session Analysis & Tool Calls */}
|
|
707
1023
|
{controlState.metrics && (
|
|
708
|
-
<div className="border-t p-3 shrink-0 bg-muted/50">
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
1024
|
+
<div className="border-t p-3 shrink-0 bg-muted/50 space-y-3">
|
|
1025
|
+
{/* Session Analysis */}
|
|
1026
|
+
<SessionAnalysisPanel
|
|
1027
|
+
analysis={controlAnalysis}
|
|
1028
|
+
isLoading={controlAnalysisLoading}
|
|
1029
|
+
isExpanded={analysisExpanded.control}
|
|
1030
|
+
onToggle={() =>
|
|
1031
|
+
setAnalysisExpanded((prev) => ({
|
|
1032
|
+
...prev,
|
|
1033
|
+
control: !prev.control,
|
|
1034
|
+
}))
|
|
1035
|
+
}
|
|
1036
|
+
accentColor="blue"
|
|
1037
|
+
/>
|
|
1038
|
+
{/* Tool Calls */}
|
|
1039
|
+
<ToolCallsPanel
|
|
1040
|
+
toolCalls={controlState.metrics.toolCalls}
|
|
1041
|
+
isExpanded={toolCallsExpanded.control}
|
|
1042
|
+
onToggle={() =>
|
|
1043
|
+
setToolCallsExpanded((prev) => ({
|
|
1044
|
+
...prev,
|
|
1045
|
+
control: !prev.control,
|
|
1046
|
+
}))
|
|
1047
|
+
}
|
|
1048
|
+
accentColor="blue"
|
|
1049
|
+
/>
|
|
733
1050
|
</div>
|
|
734
1051
|
)}
|
|
735
1052
|
</Card>
|
|
@@ -770,33 +1087,34 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
|
|
|
770
1087
|
</div>
|
|
771
1088
|
)}
|
|
772
1089
|
</CardContent>
|
|
773
|
-
{/*
|
|
1090
|
+
{/* Session Analysis & Tool Calls */}
|
|
774
1091
|
{variantState.metrics && (
|
|
775
|
-
<div className="border-t p-3 shrink-0 bg-muted/50">
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
1092
|
+
<div className="border-t p-3 shrink-0 bg-muted/50 space-y-3">
|
|
1093
|
+
{/* Session Analysis */}
|
|
1094
|
+
<SessionAnalysisPanel
|
|
1095
|
+
analysis={variantAnalysis}
|
|
1096
|
+
isLoading={variantAnalysisLoading}
|
|
1097
|
+
isExpanded={analysisExpanded.variant}
|
|
1098
|
+
onToggle={() =>
|
|
1099
|
+
setAnalysisExpanded((prev) => ({
|
|
1100
|
+
...prev,
|
|
1101
|
+
variant: !prev.variant,
|
|
1102
|
+
}))
|
|
1103
|
+
}
|
|
1104
|
+
accentColor="orange"
|
|
1105
|
+
/>
|
|
1106
|
+
{/* Tool Calls */}
|
|
1107
|
+
<ToolCallsPanel
|
|
1108
|
+
toolCalls={variantState.metrics.toolCalls}
|
|
1109
|
+
isExpanded={toolCallsExpanded.variant}
|
|
1110
|
+
onToggle={() =>
|
|
1111
|
+
setToolCallsExpanded((prev) => ({
|
|
1112
|
+
...prev,
|
|
1113
|
+
variant: !prev.variant,
|
|
1114
|
+
}))
|
|
1115
|
+
}
|
|
1116
|
+
accentColor="orange"
|
|
1117
|
+
/>
|
|
800
1118
|
</div>
|
|
801
1119
|
)}
|
|
802
1120
|
</Card>
|
|
@@ -21,6 +21,7 @@ import {
|
|
|
21
21
|
interface SimilarSession {
|
|
22
22
|
session_id: string;
|
|
23
23
|
distance: number;
|
|
24
|
+
analysis?: SessionAnalysis;
|
|
24
25
|
}
|
|
25
26
|
|
|
26
27
|
export function FindSessions() {
|
|
@@ -65,7 +66,29 @@ export function FindSessions() {
|
|
|
65
66
|
}
|
|
66
67
|
|
|
67
68
|
const data = await response.json();
|
|
68
|
-
|
|
69
|
+
|
|
70
|
+
// Fetch full analysis data for each similar session
|
|
71
|
+
const similarWithAnalysis = await Promise.all(
|
|
72
|
+
data.similar.map(async (similar: SimilarSession) => {
|
|
73
|
+
try {
|
|
74
|
+
const analysisRes = await fetch(
|
|
75
|
+
`/api/session-analyses?sessionId=${similar.session_id}`,
|
|
76
|
+
);
|
|
77
|
+
if (analysisRes.ok) {
|
|
78
|
+
const analysis = await analysisRes.json();
|
|
79
|
+
return { ...similar, analysis };
|
|
80
|
+
}
|
|
81
|
+
} catch (err) {
|
|
82
|
+
console.error(
|
|
83
|
+
`Failed to fetch analysis for ${similar.session_id}:`,
|
|
84
|
+
err,
|
|
85
|
+
);
|
|
86
|
+
}
|
|
87
|
+
return similar;
|
|
88
|
+
}),
|
|
89
|
+
);
|
|
90
|
+
|
|
91
|
+
setSimilarSessions(similarWithAnalysis);
|
|
69
92
|
} catch (err) {
|
|
70
93
|
setError(err instanceof Error ? err.message : "Unknown error");
|
|
71
94
|
} finally {
|
|
@@ -179,10 +202,7 @@ export function FindSessions() {
|
|
|
179
202
|
<CardContent>
|
|
180
203
|
<div className="space-y-3">
|
|
181
204
|
{similarSessions.map((similar) => {
|
|
182
|
-
|
|
183
|
-
(s) => s.session_id === similar.session_id,
|
|
184
|
-
);
|
|
185
|
-
if (!session) return null;
|
|
205
|
+
if (!similar.analysis) return null;
|
|
186
206
|
|
|
187
207
|
return (
|
|
188
208
|
<a
|
|
@@ -197,14 +217,14 @@ export function FindSessions() {
|
|
|
197
217
|
{similar.session_id}
|
|
198
218
|
</span>
|
|
199
219
|
<span className="text-xs px-2 py-0.5 bg-primary/10 text-primary rounded">
|
|
200
|
-
{
|
|
220
|
+
{similar.analysis.task.intent_type}
|
|
201
221
|
</span>
|
|
202
222
|
</div>
|
|
203
223
|
<p className="text-sm line-clamp-2">
|
|
204
|
-
{
|
|
224
|
+
{similar.analysis.task.user_query}
|
|
205
225
|
</p>
|
|
206
226
|
<p className="text-xs text-muted-foreground line-clamp-1">
|
|
207
|
-
{
|
|
227
|
+
{similar.analysis.task.task_summary}
|
|
208
228
|
</p>
|
|
209
229
|
</div>
|
|
210
230
|
<div className="flex flex-col items-end gap-1">
|
package/src/server.ts
CHANGED
|
@@ -285,6 +285,20 @@ export function startDebuggerServer(
|
|
|
285
285
|
},
|
|
286
286
|
},
|
|
287
287
|
|
|
288
|
+
"/api/comparison-config/:configId": {
|
|
289
|
+
GET(req) {
|
|
290
|
+
const configId = req.params.configId;
|
|
291
|
+
const config = comparisonDb.getConfig(configId);
|
|
292
|
+
if (!config) {
|
|
293
|
+
return Response.json(
|
|
294
|
+
{ error: "Comparison config not found" },
|
|
295
|
+
{ status: 404 },
|
|
296
|
+
);
|
|
297
|
+
}
|
|
298
|
+
return Response.json(config);
|
|
299
|
+
},
|
|
300
|
+
},
|
|
301
|
+
|
|
288
302
|
"/api/comparison-session-ids": {
|
|
289
303
|
GET() {
|
|
290
304
|
const sessionIds = comparisonDb.getComparisonSessionIds();
|
|
@@ -530,8 +544,43 @@ export function startDebuggerServer(
|
|
|
530
544
|
|
|
531
545
|
const sessionData = await sessionResponse.json();
|
|
532
546
|
|
|
547
|
+
// Fetch agent config to get model for cost calculation
|
|
548
|
+
const agentConfig = await fetchAgentConfig();
|
|
549
|
+
const model = agentConfig?.model || "unknown";
|
|
550
|
+
|
|
551
|
+
// Fetch metrics from OTLP spans
|
|
552
|
+
const allSpans = db.getSpansBySessionAttribute(sessionId);
|
|
553
|
+
const traces = db.listTraces(100, 0, sessionId);
|
|
554
|
+
const sessionMetrics = extractSessionMetrics(
|
|
555
|
+
traces,
|
|
556
|
+
allSpans,
|
|
557
|
+
model,
|
|
558
|
+
);
|
|
559
|
+
|
|
560
|
+
// Convert to AnalysisMetrics format
|
|
561
|
+
const metrics = {
|
|
562
|
+
inputTokens: sessionMetrics.inputTokens,
|
|
563
|
+
outputTokens: sessionMetrics.outputTokens,
|
|
564
|
+
totalTokens: sessionMetrics.totalTokens,
|
|
565
|
+
estimatedCost: sessionMetrics.estimatedCost,
|
|
566
|
+
durationMs: sessionMetrics.durationMs,
|
|
567
|
+
};
|
|
568
|
+
|
|
569
|
+
// Convert tool calls to DetailedToolCall format
|
|
570
|
+
const toolCalls = (sessionMetrics.toolCalls || []).map((tc) => ({
|
|
571
|
+
name: tc.name,
|
|
572
|
+
input: tc.input,
|
|
573
|
+
output: tc.output,
|
|
574
|
+
startTimeUnixNano: tc.startTimeUnixNano,
|
|
575
|
+
endTimeUnixNano: tc.endTimeUnixNano,
|
|
576
|
+
}));
|
|
577
|
+
|
|
533
578
|
// Analyze with LLM
|
|
534
|
-
const analysis = await analyzeSession(
|
|
579
|
+
const analysis = await analyzeSession({
|
|
580
|
+
session: sessionData,
|
|
581
|
+
metrics,
|
|
582
|
+
toolCalls,
|
|
583
|
+
});
|
|
535
584
|
|
|
536
585
|
// Persist to database
|
|
537
586
|
analysisDb.saveAnalysis(analysis);
|
|
@@ -581,6 +630,10 @@ export function startDebuggerServer(
|
|
|
581
630
|
// Import analyzer dynamically
|
|
582
631
|
const { analyzeSession } = await import("./analysis/analyzer.js");
|
|
583
632
|
|
|
633
|
+
// Fetch agent config once for all sessions
|
|
634
|
+
const agentConfig = await fetchAgentConfig();
|
|
635
|
+
const model = agentConfig?.model || "unknown";
|
|
636
|
+
|
|
584
637
|
// Process in batches of 25
|
|
585
638
|
const BATCH_SIZE = 25;
|
|
586
639
|
const results: Array<{
|
|
@@ -616,8 +669,41 @@ export function startDebuggerServer(
|
|
|
616
669
|
|
|
617
670
|
const sessionData = await sessionResponse.json();
|
|
618
671
|
|
|
672
|
+
// Fetch metrics from OTLP spans
|
|
673
|
+
const allSpans = db.getSpansBySessionAttribute(sessionId);
|
|
674
|
+
const traces = db.listTraces(100, 0, sessionId);
|
|
675
|
+
const sessionMetrics = extractSessionMetrics(
|
|
676
|
+
traces,
|
|
677
|
+
allSpans,
|
|
678
|
+
model,
|
|
679
|
+
);
|
|
680
|
+
|
|
681
|
+
// Convert to AnalysisMetrics format
|
|
682
|
+
const metrics = {
|
|
683
|
+
inputTokens: sessionMetrics.inputTokens,
|
|
684
|
+
outputTokens: sessionMetrics.outputTokens,
|
|
685
|
+
totalTokens: sessionMetrics.totalTokens,
|
|
686
|
+
estimatedCost: sessionMetrics.estimatedCost,
|
|
687
|
+
durationMs: sessionMetrics.durationMs,
|
|
688
|
+
};
|
|
689
|
+
|
|
690
|
+
// Convert tool calls to DetailedToolCall format
|
|
691
|
+
const toolCalls = (sessionMetrics.toolCalls || []).map(
|
|
692
|
+
(tc) => ({
|
|
693
|
+
name: tc.name,
|
|
694
|
+
input: tc.input,
|
|
695
|
+
output: tc.output,
|
|
696
|
+
startTimeUnixNano: tc.startTimeUnixNano,
|
|
697
|
+
endTimeUnixNano: tc.endTimeUnixNano,
|
|
698
|
+
}),
|
|
699
|
+
);
|
|
700
|
+
|
|
619
701
|
// Analyze
|
|
620
|
-
const analysis = await analyzeSession(
|
|
702
|
+
const analysis = await analyzeSession({
|
|
703
|
+
session: sessionData,
|
|
704
|
+
metrics,
|
|
705
|
+
toolCalls,
|
|
706
|
+
});
|
|
621
707
|
|
|
622
708
|
// Persist
|
|
623
709
|
analysisDb.saveAnalysis(analysis);
|