@townco/debugger 0.1.23 → 0.1.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +10 -8
- package/src/App.tsx +13 -0
- package/src/comparison-db.test.ts +113 -0
- package/src/comparison-db.ts +332 -0
- package/src/components/DebuggerHeader.tsx +62 -2
- package/src/components/SessionTimelineView.tsx +173 -0
- package/src/components/SpanTimeline.tsx +6 -4
- package/src/components/UnifiedTimeline.tsx +691 -0
- package/src/db.ts +71 -0
- package/src/index.ts +2 -0
- package/src/lib/metrics.test.ts +51 -0
- package/src/lib/metrics.ts +136 -0
- package/src/lib/pricing.ts +23 -0
- package/src/lib/turnExtractor.ts +64 -23
- package/src/pages/ComparisonView.tsx +685 -0
- package/src/pages/SessionList.tsx +77 -56
- package/src/pages/SessionView.tsx +3 -64
- package/src/pages/TownHall.tsx +406 -0
- package/src/schemas.ts +15 -0
- package/src/server.ts +345 -12
- package/src/types.ts +87 -0
- package/tsconfig.json +14 -0
|
@@ -0,0 +1,685 @@
|
|
|
1
|
+
import { useCallback, useEffect, useState } from "react";
|
|
2
|
+
import { Button } from "@/components/ui/button";
|
|
3
|
+
import {
|
|
4
|
+
Card,
|
|
5
|
+
CardContent,
|
|
6
|
+
CardDescription,
|
|
7
|
+
CardHeader,
|
|
8
|
+
CardTitle,
|
|
9
|
+
} from "@/components/ui/card";
|
|
10
|
+
import { DebuggerLayout } from "../components/DebuggerLayout";
|
|
11
|
+
import { formatCost, formatDuration, formatTokens } from "../lib/metrics";
|
|
12
|
+
import type { ComparisonConfig, ComparisonRun, SessionMetrics } from "../types";
|
|
13
|
+
|
|
14
|
+
interface ComparisonViewProps {
|
|
15
|
+
runId: string;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
interface ChatMessage {
|
|
19
|
+
role: "user" | "assistant";
|
|
20
|
+
content: string;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
interface SessionState {
|
|
24
|
+
sessionId: string | null;
|
|
25
|
+
messages: ChatMessage[];
|
|
26
|
+
isStreaming: boolean;
|
|
27
|
+
metrics: SessionMetrics | null;
|
|
28
|
+
error: string | null;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
const AGENT_SERVER_URL =
|
|
32
|
+
typeof window !== "undefined"
|
|
33
|
+
? window.location.origin.replace(":4000", ":3100")
|
|
34
|
+
: "http://localhost:3100";
|
|
35
|
+
|
|
36
|
+
export function ComparisonView({ runId }: ComparisonViewProps) {
|
|
37
|
+
const [run, setRun] = useState<ComparisonRun | null>(null);
|
|
38
|
+
const [config, setConfig] = useState<ComparisonConfig | null>(null);
|
|
39
|
+
const [loading, setLoading] = useState(true);
|
|
40
|
+
const [error, setError] = useState<string | null>(null);
|
|
41
|
+
|
|
42
|
+
// Session states
|
|
43
|
+
const [controlState, setControlState] = useState<SessionState>({
|
|
44
|
+
sessionId: null,
|
|
45
|
+
messages: [],
|
|
46
|
+
isStreaming: false,
|
|
47
|
+
metrics: null,
|
|
48
|
+
error: null,
|
|
49
|
+
});
|
|
50
|
+
const [variantState, setVariantState] = useState<SessionState>({
|
|
51
|
+
sessionId: null,
|
|
52
|
+
messages: [],
|
|
53
|
+
isStreaming: false,
|
|
54
|
+
metrics: null,
|
|
55
|
+
error: null,
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
const [isRunning, setIsRunning] = useState(false);
|
|
59
|
+
const [hasRun, setHasRun] = useState(false);
|
|
60
|
+
|
|
61
|
+
// Fetch comparison run details and restore saved messages
|
|
62
|
+
useEffect(() => {
|
|
63
|
+
Promise.all([
|
|
64
|
+
fetch(`/api/comparison-run/${runId}`).then((res) => res.json()),
|
|
65
|
+
])
|
|
66
|
+
.then(([runData]) => {
|
|
67
|
+
setRun(runData);
|
|
68
|
+
|
|
69
|
+
// Restore saved messages if the run has been completed
|
|
70
|
+
if (runData.status === "completed" || runData.status === "running") {
|
|
71
|
+
setHasRun(true);
|
|
72
|
+
|
|
73
|
+
// Restore control messages
|
|
74
|
+
if (runData.controlResponse) {
|
|
75
|
+
setControlState({
|
|
76
|
+
sessionId: runData.controlSessionId,
|
|
77
|
+
messages: [
|
|
78
|
+
{ role: "user", content: runData.firstUserMessage },
|
|
79
|
+
{ role: "assistant", content: runData.controlResponse },
|
|
80
|
+
],
|
|
81
|
+
isStreaming: false,
|
|
82
|
+
metrics: runData.controlMetrics,
|
|
83
|
+
error: null,
|
|
84
|
+
});
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// Restore variant messages
|
|
88
|
+
if (runData.variantResponse) {
|
|
89
|
+
setVariantState({
|
|
90
|
+
sessionId: runData.variantSessionId,
|
|
91
|
+
messages: [
|
|
92
|
+
{ role: "user", content: runData.firstUserMessage },
|
|
93
|
+
{ role: "assistant", content: runData.variantResponse },
|
|
94
|
+
],
|
|
95
|
+
isStreaming: false,
|
|
96
|
+
metrics: runData.variantMetrics,
|
|
97
|
+
error: null,
|
|
98
|
+
});
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// Fetch the config
|
|
103
|
+
return fetch(`/api/comparison-config`).then((res) => res.json());
|
|
104
|
+
})
|
|
105
|
+
.then((configData) => {
|
|
106
|
+
setConfig(configData);
|
|
107
|
+
setLoading(false);
|
|
108
|
+
})
|
|
109
|
+
.catch((err) => {
|
|
110
|
+
setError(err.message);
|
|
111
|
+
setLoading(false);
|
|
112
|
+
});
|
|
113
|
+
}, [runId]);
|
|
114
|
+
|
|
115
|
+
// Create a new session with the agent server
|
|
116
|
+
const createSession = async (
|
|
117
|
+
configOverrides?: Record<string, unknown>,
|
|
118
|
+
): Promise<string> => {
|
|
119
|
+
const initRes = await fetch(`${AGENT_SERVER_URL}/rpc`, {
|
|
120
|
+
method: "POST",
|
|
121
|
+
headers: { "Content-Type": "application/json" },
|
|
122
|
+
body: JSON.stringify({
|
|
123
|
+
jsonrpc: "2.0",
|
|
124
|
+
id: `init-${Date.now()}`,
|
|
125
|
+
method: "initialize",
|
|
126
|
+
params: {
|
|
127
|
+
protocolVersion: 1,
|
|
128
|
+
clientCapabilities: {},
|
|
129
|
+
},
|
|
130
|
+
}),
|
|
131
|
+
});
|
|
132
|
+
await initRes.json();
|
|
133
|
+
|
|
134
|
+
const sessionRes = await fetch(`${AGENT_SERVER_URL}/rpc`, {
|
|
135
|
+
method: "POST",
|
|
136
|
+
headers: { "Content-Type": "application/json" },
|
|
137
|
+
body: JSON.stringify({
|
|
138
|
+
jsonrpc: "2.0",
|
|
139
|
+
id: `session-${Date.now()}`,
|
|
140
|
+
method: "session/new",
|
|
141
|
+
params: {
|
|
142
|
+
cwd: "/",
|
|
143
|
+
mcpServers: [],
|
|
144
|
+
_meta: configOverrides ? { configOverrides } : undefined,
|
|
145
|
+
},
|
|
146
|
+
}),
|
|
147
|
+
});
|
|
148
|
+
const sessionData = await sessionRes.json();
|
|
149
|
+
return sessionData.result.sessionId;
|
|
150
|
+
};
|
|
151
|
+
|
|
152
|
+
// Send a message and collect the response via SSE
|
|
153
|
+
const sendMessageAndCollect = async (
|
|
154
|
+
sessionId: string,
|
|
155
|
+
message: string,
|
|
156
|
+
onUpdate: (content: string) => void,
|
|
157
|
+
): Promise<void> => {
|
|
158
|
+
let accumulatedContent = "";
|
|
159
|
+
let abortController: AbortController | null = new AbortController();
|
|
160
|
+
|
|
161
|
+
// Start SSE connection (don't await - runs in background)
|
|
162
|
+
const ssePromise = fetch(`${AGENT_SERVER_URL}/events`, {
|
|
163
|
+
headers: {
|
|
164
|
+
"X-Session-ID": sessionId,
|
|
165
|
+
},
|
|
166
|
+
signal: abortController.signal,
|
|
167
|
+
})
|
|
168
|
+
.then(async (response) => {
|
|
169
|
+
const reader = response.body?.getReader();
|
|
170
|
+
if (!reader) return;
|
|
171
|
+
|
|
172
|
+
const decoder = new TextDecoder();
|
|
173
|
+
let buffer = "";
|
|
174
|
+
|
|
175
|
+
try {
|
|
176
|
+
while (true) {
|
|
177
|
+
const { done, value } = await reader.read();
|
|
178
|
+
if (done) break;
|
|
179
|
+
|
|
180
|
+
buffer += decoder.decode(value, { stream: true });
|
|
181
|
+
|
|
182
|
+
// Process complete SSE events
|
|
183
|
+
const lines = buffer.split("\n");
|
|
184
|
+
buffer = lines.pop() || ""; // Keep incomplete line in buffer
|
|
185
|
+
|
|
186
|
+
for (const line of lines) {
|
|
187
|
+
if (line.startsWith("data: ")) {
|
|
188
|
+
try {
|
|
189
|
+
const data = JSON.parse(line.slice(6));
|
|
190
|
+
// Handle session/update notifications
|
|
191
|
+
if (
|
|
192
|
+
data.method === "session/update" &&
|
|
193
|
+
data.params?.update?.sessionUpdate === "agent_message_chunk"
|
|
194
|
+
) {
|
|
195
|
+
const content = data.params.update.content;
|
|
196
|
+
if (content?.type === "text" && content.text) {
|
|
197
|
+
accumulatedContent += content.text;
|
|
198
|
+
onUpdate(accumulatedContent);
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
} catch {
|
|
202
|
+
// Ignore parse errors
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
} catch (err) {
|
|
208
|
+
// AbortError is expected when we cancel the stream
|
|
209
|
+
if (err instanceof Error && err.name !== "AbortError") {
|
|
210
|
+
throw err;
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
})
|
|
214
|
+
.catch((err) => {
|
|
215
|
+
// AbortError is expected when we cancel the stream
|
|
216
|
+
if (err instanceof Error && err.name !== "AbortError") {
|
|
217
|
+
console.error("SSE error:", err);
|
|
218
|
+
}
|
|
219
|
+
});
|
|
220
|
+
|
|
221
|
+
// Small delay to ensure SSE connection is established
|
|
222
|
+
await new Promise((r) => setTimeout(r, 100));
|
|
223
|
+
|
|
224
|
+
// Send the prompt and wait for it to complete
|
|
225
|
+
// When the prompt RPC returns, the agent has finished responding
|
|
226
|
+
await fetch(`${AGENT_SERVER_URL}/rpc`, {
|
|
227
|
+
method: "POST",
|
|
228
|
+
headers: { "Content-Type": "application/json" },
|
|
229
|
+
body: JSON.stringify({
|
|
230
|
+
jsonrpc: "2.0",
|
|
231
|
+
id: `prompt-${Date.now()}`,
|
|
232
|
+
method: "session/prompt",
|
|
233
|
+
params: {
|
|
234
|
+
sessionId,
|
|
235
|
+
prompt: [{ type: "text", text: message }],
|
|
236
|
+
},
|
|
237
|
+
}),
|
|
238
|
+
});
|
|
239
|
+
|
|
240
|
+
// Give a small delay for any final SSE chunks to arrive
|
|
241
|
+
await new Promise((r) => setTimeout(r, 200));
|
|
242
|
+
|
|
243
|
+
// Abort the SSE connection since we're done
|
|
244
|
+
abortController.abort();
|
|
245
|
+
abortController = null;
|
|
246
|
+
};
|
|
247
|
+
|
|
248
|
+
// Run the comparison
|
|
249
|
+
const runComparison = useCallback(async () => {
|
|
250
|
+
if (!run || !config) return;
|
|
251
|
+
|
|
252
|
+
setIsRunning(true);
|
|
253
|
+
setHasRun(true);
|
|
254
|
+
|
|
255
|
+
const firstMessage = run.firstUserMessage;
|
|
256
|
+
|
|
257
|
+
// Reset states
|
|
258
|
+
setControlState({
|
|
259
|
+
sessionId: null,
|
|
260
|
+
messages: [{ role: "user", content: firstMessage }],
|
|
261
|
+
isStreaming: true,
|
|
262
|
+
metrics: null,
|
|
263
|
+
error: null,
|
|
264
|
+
});
|
|
265
|
+
setVariantState({
|
|
266
|
+
sessionId: null,
|
|
267
|
+
messages: [{ role: "user", content: firstMessage }],
|
|
268
|
+
isStreaming: true,
|
|
269
|
+
metrics: null,
|
|
270
|
+
error: null,
|
|
271
|
+
});
|
|
272
|
+
|
|
273
|
+
try {
|
|
274
|
+
// Build config overrides based on dimension
|
|
275
|
+
const variantOverrides: Record<string, unknown> = {};
|
|
276
|
+
if (config.dimension === "model" && config.variantModel) {
|
|
277
|
+
variantOverrides.model = config.variantModel;
|
|
278
|
+
}
|
|
279
|
+
if (config.dimension === "system_prompt" && config.variantSystemPrompt) {
|
|
280
|
+
variantOverrides.systemPrompt = config.variantSystemPrompt;
|
|
281
|
+
}
|
|
282
|
+
if (config.dimension === "tools" && config.variantTools) {
|
|
283
|
+
variantOverrides.tools = config.variantTools;
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
// Create sessions in parallel
|
|
287
|
+
const [controlSessionId, variantSessionId] = await Promise.all([
|
|
288
|
+
createSession(), // Control - no overrides
|
|
289
|
+
createSession(variantOverrides), // Variant - with overrides
|
|
290
|
+
]);
|
|
291
|
+
|
|
292
|
+
setControlState((prev) => ({ ...prev, sessionId: controlSessionId }));
|
|
293
|
+
setVariantState((prev) => ({ ...prev, sessionId: variantSessionId }));
|
|
294
|
+
|
|
295
|
+
// Update run with session IDs
|
|
296
|
+
await fetch(`/api/comparison-run/${runId}/update`, {
|
|
297
|
+
method: "POST",
|
|
298
|
+
headers: { "Content-Type": "application/json" },
|
|
299
|
+
body: JSON.stringify({
|
|
300
|
+
status: "running",
|
|
301
|
+
controlSessionId,
|
|
302
|
+
variantSessionId,
|
|
303
|
+
}),
|
|
304
|
+
});
|
|
305
|
+
|
|
306
|
+
// Run both sessions in parallel
|
|
307
|
+
const startTime = Date.now();
|
|
308
|
+
|
|
309
|
+
// Track final responses and metrics
|
|
310
|
+
let finalControlResponse = "";
|
|
311
|
+
let finalVariantResponse = "";
|
|
312
|
+
let finalControlMetrics: SessionMetrics | null = null;
|
|
313
|
+
let finalVariantMetrics: SessionMetrics | null = null;
|
|
314
|
+
|
|
315
|
+
await Promise.all([
|
|
316
|
+
// Control session
|
|
317
|
+
sendMessageAndCollect(controlSessionId, firstMessage, (content) => {
|
|
318
|
+
finalControlResponse = content;
|
|
319
|
+
setControlState((prev) => ({
|
|
320
|
+
...prev,
|
|
321
|
+
messages: [
|
|
322
|
+
{ role: "user", content: firstMessage },
|
|
323
|
+
{ role: "assistant", content },
|
|
324
|
+
],
|
|
325
|
+
}));
|
|
326
|
+
})
|
|
327
|
+
.then(async () => {
|
|
328
|
+
const duration = Date.now() - startTime;
|
|
329
|
+
// Wait for telemetry data to be written to the database
|
|
330
|
+
await new Promise((r) => setTimeout(r, 2000));
|
|
331
|
+
// Fetch metrics - use control model for cost calculation
|
|
332
|
+
const controlModel =
|
|
333
|
+
config.controlModel || "claude-sonnet-4-5-20250929";
|
|
334
|
+
try {
|
|
335
|
+
const metricsRes = await fetch(
|
|
336
|
+
`/api/session-metrics/${controlSessionId}?model=${encodeURIComponent(controlModel)}`,
|
|
337
|
+
);
|
|
338
|
+
const metrics = await metricsRes.json();
|
|
339
|
+
metrics.durationMs = duration;
|
|
340
|
+
finalControlMetrics = metrics;
|
|
341
|
+
setControlState((prev) => ({
|
|
342
|
+
...prev,
|
|
343
|
+
isStreaming: false,
|
|
344
|
+
metrics,
|
|
345
|
+
}));
|
|
346
|
+
} catch {
|
|
347
|
+
finalControlMetrics = {
|
|
348
|
+
durationMs: duration,
|
|
349
|
+
inputTokens: 0,
|
|
350
|
+
outputTokens: 0,
|
|
351
|
+
totalTokens: 0,
|
|
352
|
+
estimatedCost: 0,
|
|
353
|
+
toolCallCount: 0,
|
|
354
|
+
};
|
|
355
|
+
setControlState((prev) => ({
|
|
356
|
+
...prev,
|
|
357
|
+
isStreaming: false,
|
|
358
|
+
metrics: finalControlMetrics,
|
|
359
|
+
}));
|
|
360
|
+
}
|
|
361
|
+
})
|
|
362
|
+
.catch((err) => {
|
|
363
|
+
setControlState((prev) => ({
|
|
364
|
+
...prev,
|
|
365
|
+
isStreaming: false,
|
|
366
|
+
error: err.message,
|
|
367
|
+
}));
|
|
368
|
+
}),
|
|
369
|
+
|
|
370
|
+
// Variant session
|
|
371
|
+
sendMessageAndCollect(variantSessionId, firstMessage, (content) => {
|
|
372
|
+
finalVariantResponse = content;
|
|
373
|
+
setVariantState((prev) => ({
|
|
374
|
+
...prev,
|
|
375
|
+
messages: [
|
|
376
|
+
{ role: "user", content: firstMessage },
|
|
377
|
+
{ role: "assistant", content },
|
|
378
|
+
],
|
|
379
|
+
}));
|
|
380
|
+
})
|
|
381
|
+
.then(async () => {
|
|
382
|
+
const duration = Date.now() - startTime;
|
|
383
|
+
// Wait for telemetry data to be written to the database
|
|
384
|
+
await new Promise((r) => setTimeout(r, 2000));
|
|
385
|
+
// Fetch metrics - use variant model for cost calculation
|
|
386
|
+
const variantModel =
|
|
387
|
+
config.variantModel ||
|
|
388
|
+
config.controlModel ||
|
|
389
|
+
"claude-sonnet-4-5-20250929";
|
|
390
|
+
try {
|
|
391
|
+
const metricsRes = await fetch(
|
|
392
|
+
`/api/session-metrics/${variantSessionId}?model=${encodeURIComponent(variantModel)}`,
|
|
393
|
+
);
|
|
394
|
+
const metrics = await metricsRes.json();
|
|
395
|
+
metrics.durationMs = duration;
|
|
396
|
+
finalVariantMetrics = metrics;
|
|
397
|
+
setVariantState((prev) => ({
|
|
398
|
+
...prev,
|
|
399
|
+
isStreaming: false,
|
|
400
|
+
metrics,
|
|
401
|
+
}));
|
|
402
|
+
} catch {
|
|
403
|
+
finalVariantMetrics = {
|
|
404
|
+
durationMs: duration,
|
|
405
|
+
inputTokens: 0,
|
|
406
|
+
outputTokens: 0,
|
|
407
|
+
totalTokens: 0,
|
|
408
|
+
estimatedCost: 0,
|
|
409
|
+
toolCallCount: 0,
|
|
410
|
+
};
|
|
411
|
+
setVariantState((prev) => ({
|
|
412
|
+
...prev,
|
|
413
|
+
isStreaming: false,
|
|
414
|
+
metrics: finalVariantMetrics,
|
|
415
|
+
}));
|
|
416
|
+
}
|
|
417
|
+
})
|
|
418
|
+
.catch((err) => {
|
|
419
|
+
setVariantState((prev) => ({
|
|
420
|
+
...prev,
|
|
421
|
+
isStreaming: false,
|
|
422
|
+
error: err.message,
|
|
423
|
+
}));
|
|
424
|
+
}),
|
|
425
|
+
]);
|
|
426
|
+
|
|
427
|
+
// Update run status with responses and metrics
|
|
428
|
+
await fetch(`/api/comparison-run/${runId}/update`, {
|
|
429
|
+
method: "POST",
|
|
430
|
+
headers: { "Content-Type": "application/json" },
|
|
431
|
+
body: JSON.stringify({
|
|
432
|
+
status: "completed",
|
|
433
|
+
controlMetrics: finalControlMetrics,
|
|
434
|
+
variantMetrics: finalVariantMetrics,
|
|
435
|
+
controlResponse: finalControlResponse,
|
|
436
|
+
variantResponse: finalVariantResponse,
|
|
437
|
+
}),
|
|
438
|
+
});
|
|
439
|
+
} catch (err) {
|
|
440
|
+
setError(err instanceof Error ? err.message : "Failed to run comparison");
|
|
441
|
+
} finally {
|
|
442
|
+
setIsRunning(false);
|
|
443
|
+
}
|
|
444
|
+
}, [run, config, runId]);
|
|
445
|
+
|
|
446
|
+
if (loading) {
|
|
447
|
+
return (
|
|
448
|
+
<DebuggerLayout title="Comparison" showBackButton backHref="/town-hall">
|
|
449
|
+
<div className="container mx-auto p-8">
|
|
450
|
+
<div className="text-muted-foreground">Loading comparison...</div>
|
|
451
|
+
</div>
|
|
452
|
+
</DebuggerLayout>
|
|
453
|
+
);
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
if (error || !run) {
|
|
457
|
+
return (
|
|
458
|
+
<DebuggerLayout title="Comparison" showBackButton backHref="/town-hall">
|
|
459
|
+
<div className="container mx-auto p-8">
|
|
460
|
+
<div className="text-red-500">Error: {error || "Run not found"}</div>
|
|
461
|
+
</div>
|
|
462
|
+
</DebuggerLayout>
|
|
463
|
+
);
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
const getControlDimensionLabel = () => {
|
|
467
|
+
if (!config) return "";
|
|
468
|
+
switch (config.dimension) {
|
|
469
|
+
case "model":
|
|
470
|
+
return `Model: ${config.controlModel || "unknown"}`;
|
|
471
|
+
case "system_prompt":
|
|
472
|
+
return "System Prompt (original)";
|
|
473
|
+
case "tools":
|
|
474
|
+
return "Tools (original)";
|
|
475
|
+
default:
|
|
476
|
+
return "";
|
|
477
|
+
}
|
|
478
|
+
};
|
|
479
|
+
|
|
480
|
+
const getDimensionLabel = () => {
|
|
481
|
+
if (!config) return "";
|
|
482
|
+
switch (config.dimension) {
|
|
483
|
+
case "model":
|
|
484
|
+
return `Model: ${config.variantModel}`;
|
|
485
|
+
case "system_prompt":
|
|
486
|
+
return "System Prompt (modified)";
|
|
487
|
+
case "tools":
|
|
488
|
+
return `Tools: ${config.variantTools?.join(", ")}`;
|
|
489
|
+
default:
|
|
490
|
+
return "";
|
|
491
|
+
}
|
|
492
|
+
};
|
|
493
|
+
|
|
494
|
+
return (
|
|
495
|
+
<DebuggerLayout title="Comparison" showBackButton backHref="/town-hall">
|
|
496
|
+
<div className="container mx-auto p-4 h-[calc(100vh-4rem)] flex flex-col overflow-hidden">
|
|
497
|
+
{/* Header */}
|
|
498
|
+
<div className="flex items-center justify-between mb-4">
|
|
499
|
+
<div>
|
|
500
|
+
<h2 className="text-lg font-semibold">A/B Comparison</h2>
|
|
501
|
+
<p className="text-sm text-muted-foreground">
|
|
502
|
+
Comparing: {config?.dimension?.replace("_", " ")} -{" "}
|
|
503
|
+
{getDimensionLabel()}
|
|
504
|
+
</p>
|
|
505
|
+
</div>
|
|
506
|
+
{!hasRun && (
|
|
507
|
+
<Button onClick={runComparison} disabled={isRunning}>
|
|
508
|
+
{isRunning ? "Running..." : "Run Comparison"}
|
|
509
|
+
</Button>
|
|
510
|
+
)}
|
|
511
|
+
</div>
|
|
512
|
+
|
|
513
|
+
{/* Pre-run state */}
|
|
514
|
+
{!hasRun && (
|
|
515
|
+
<div className="flex-1 flex items-center justify-center">
|
|
516
|
+
<Card className="max-w-md w-full">
|
|
517
|
+
<CardHeader className="text-center">
|
|
518
|
+
<CardTitle>Ready to Compare</CardTitle>
|
|
519
|
+
<CardDescription>
|
|
520
|
+
This comparison will send the same prompt to both
|
|
521
|
+
configurations and display the results side by side.
|
|
522
|
+
</CardDescription>
|
|
523
|
+
</CardHeader>
|
|
524
|
+
<CardContent className="space-y-4">
|
|
525
|
+
<div className="bg-muted rounded-lg p-4">
|
|
526
|
+
<div className="text-xs font-medium uppercase text-muted-foreground mb-2">
|
|
527
|
+
First message
|
|
528
|
+
</div>
|
|
529
|
+
<div className="text-sm">{run?.firstUserMessage}</div>
|
|
530
|
+
</div>
|
|
531
|
+
<div className="grid grid-cols-2 gap-4 text-sm">
|
|
532
|
+
<div className="space-y-1">
|
|
533
|
+
<div className="flex items-center gap-2">
|
|
534
|
+
<span className="w-2 h-2 rounded-full bg-blue-500" />
|
|
535
|
+
<span className="font-medium">Control</span>
|
|
536
|
+
</div>
|
|
537
|
+
<div className="text-muted-foreground text-xs">
|
|
538
|
+
{getControlDimensionLabel()}
|
|
539
|
+
</div>
|
|
540
|
+
</div>
|
|
541
|
+
<div className="space-y-1">
|
|
542
|
+
<div className="flex items-center gap-2">
|
|
543
|
+
<span className="w-2 h-2 rounded-full bg-orange-500" />
|
|
544
|
+
<span className="font-medium">Variant</span>
|
|
545
|
+
</div>
|
|
546
|
+
<div className="text-muted-foreground text-xs">
|
|
547
|
+
{getDimensionLabel()}
|
|
548
|
+
</div>
|
|
549
|
+
</div>
|
|
550
|
+
</div>
|
|
551
|
+
</CardContent>
|
|
552
|
+
</Card>
|
|
553
|
+
</div>
|
|
554
|
+
)}
|
|
555
|
+
|
|
556
|
+
{/* Side-by-side comparison */}
|
|
557
|
+
{hasRun && (
|
|
558
|
+
<div className="grid grid-cols-2 gap-4 flex-1 min-h-0">
|
|
559
|
+
{/* Control */}
|
|
560
|
+
<Card className="flex flex-col h-full min-h-0 overflow-hidden">
|
|
561
|
+
<CardHeader className="py-3 border-b shrink-0">
|
|
562
|
+
<CardTitle className="text-sm flex items-center gap-2">
|
|
563
|
+
<span className="w-2 h-2 rounded-full bg-blue-500" />
|
|
564
|
+
Control (Original)
|
|
565
|
+
</CardTitle>
|
|
566
|
+
<CardDescription className="text-xs">
|
|
567
|
+
{getControlDimensionLabel()}
|
|
568
|
+
</CardDescription>
|
|
569
|
+
</CardHeader>
|
|
570
|
+
<CardContent className="flex-1 overflow-auto py-4">
|
|
571
|
+
{controlState.messages.map((msg, i) => (
|
|
572
|
+
<div
|
|
573
|
+
key={i}
|
|
574
|
+
className={`mb-4 ${msg.role === "user" ? "text-blue-600 dark:text-blue-400" : ""}`}
|
|
575
|
+
>
|
|
576
|
+
<div className="text-xs font-medium uppercase mb-1">
|
|
577
|
+
{msg.role}
|
|
578
|
+
</div>
|
|
579
|
+
<div className="text-sm whitespace-pre-wrap">
|
|
580
|
+
{msg.content}
|
|
581
|
+
{controlState.isStreaming &&
|
|
582
|
+
msg.role === "assistant" &&
|
|
583
|
+
i === controlState.messages.length - 1 && (
|
|
584
|
+
<span className="animate-pulse">▊</span>
|
|
585
|
+
)}
|
|
586
|
+
</div>
|
|
587
|
+
</div>
|
|
588
|
+
))}
|
|
589
|
+
{controlState.error && (
|
|
590
|
+
<div className="text-red-500 text-sm">
|
|
591
|
+
Error: {controlState.error}
|
|
592
|
+
</div>
|
|
593
|
+
)}
|
|
594
|
+
</CardContent>
|
|
595
|
+
{/* Metrics */}
|
|
596
|
+
{controlState.metrics && (
|
|
597
|
+
<div className="border-t p-3 shrink-0 bg-muted/50">
|
|
598
|
+
<div className="grid grid-cols-4 gap-2 text-xs">
|
|
599
|
+
<div>
|
|
600
|
+
<span className="text-muted-foreground">Duration:</span>{" "}
|
|
601
|
+
{formatDuration(controlState.metrics.durationMs)}
|
|
602
|
+
</div>
|
|
603
|
+
<div>
|
|
604
|
+
<span className="text-muted-foreground">Tokens:</span>{" "}
|
|
605
|
+
{formatTokens(controlState.metrics.totalTokens)}
|
|
606
|
+
</div>
|
|
607
|
+
<div>
|
|
608
|
+
<span className="text-muted-foreground">Cost:</span>{" "}
|
|
609
|
+
{formatCost(controlState.metrics.estimatedCost)}
|
|
610
|
+
</div>
|
|
611
|
+
<div>
|
|
612
|
+
<span className="text-muted-foreground">Tools:</span>{" "}
|
|
613
|
+
{controlState.metrics.toolCallCount}
|
|
614
|
+
</div>
|
|
615
|
+
</div>
|
|
616
|
+
</div>
|
|
617
|
+
)}
|
|
618
|
+
</Card>
|
|
619
|
+
|
|
620
|
+
{/* Variant */}
|
|
621
|
+
<Card className="flex flex-col h-full min-h-0 overflow-hidden">
|
|
622
|
+
<CardHeader className="py-3 border-b shrink-0">
|
|
623
|
+
<CardTitle className="text-sm flex items-center gap-2">
|
|
624
|
+
<span className="w-2 h-2 rounded-full bg-orange-500" />
|
|
625
|
+
Variant
|
|
626
|
+
</CardTitle>
|
|
627
|
+
<CardDescription className="text-xs">
|
|
628
|
+
{getDimensionLabel()}
|
|
629
|
+
</CardDescription>
|
|
630
|
+
</CardHeader>
|
|
631
|
+
<CardContent className="flex-1 overflow-auto py-4">
|
|
632
|
+
{variantState.messages.map((msg, i) => (
|
|
633
|
+
<div
|
|
634
|
+
key={i}
|
|
635
|
+
className={`mb-4 ${msg.role === "user" ? "text-orange-600 dark:text-orange-400" : ""}`}
|
|
636
|
+
>
|
|
637
|
+
<div className="text-xs font-medium uppercase mb-1">
|
|
638
|
+
{msg.role}
|
|
639
|
+
</div>
|
|
640
|
+
<div className="text-sm whitespace-pre-wrap">
|
|
641
|
+
{msg.content}
|
|
642
|
+
{variantState.isStreaming &&
|
|
643
|
+
msg.role === "assistant" &&
|
|
644
|
+
i === variantState.messages.length - 1 && (
|
|
645
|
+
<span className="animate-pulse">▊</span>
|
|
646
|
+
)}
|
|
647
|
+
</div>
|
|
648
|
+
</div>
|
|
649
|
+
))}
|
|
650
|
+
{variantState.error && (
|
|
651
|
+
<div className="text-red-500 text-sm">
|
|
652
|
+
Error: {variantState.error}
|
|
653
|
+
</div>
|
|
654
|
+
)}
|
|
655
|
+
</CardContent>
|
|
656
|
+
{/* Metrics */}
|
|
657
|
+
{variantState.metrics && (
|
|
658
|
+
<div className="border-t p-3 shrink-0 bg-muted/50">
|
|
659
|
+
<div className="grid grid-cols-4 gap-2 text-xs">
|
|
660
|
+
<div>
|
|
661
|
+
<span className="text-muted-foreground">Duration:</span>{" "}
|
|
662
|
+
{formatDuration(variantState.metrics.durationMs)}
|
|
663
|
+
</div>
|
|
664
|
+
<div>
|
|
665
|
+
<span className="text-muted-foreground">Tokens:</span>{" "}
|
|
666
|
+
{formatTokens(variantState.metrics.totalTokens)}
|
|
667
|
+
</div>
|
|
668
|
+
<div>
|
|
669
|
+
<span className="text-muted-foreground">Cost:</span>{" "}
|
|
670
|
+
{formatCost(variantState.metrics.estimatedCost)}
|
|
671
|
+
</div>
|
|
672
|
+
<div>
|
|
673
|
+
<span className="text-muted-foreground">Tools:</span>{" "}
|
|
674
|
+
{variantState.metrics.toolCallCount}
|
|
675
|
+
</div>
|
|
676
|
+
</div>
|
|
677
|
+
</div>
|
|
678
|
+
)}
|
|
679
|
+
</Card>
|
|
680
|
+
</div>
|
|
681
|
+
)}
|
|
682
|
+
</div>
|
|
683
|
+
</DebuggerLayout>
|
|
684
|
+
);
|
|
685
|
+
}
|