@townco/debugger 0.1.28 → 0.1.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +7 -4
- package/src/App.tsx +6 -0
- package/src/analysis/analyzer.ts +235 -0
- package/src/analysis/embeddings.ts +97 -0
- package/src/analysis/schema.ts +67 -0
- package/src/analysis/types.ts +132 -0
- package/src/analysis-db.ts +238 -0
- package/src/comparison-db.test.ts +28 -5
- package/src/comparison-db.ts +57 -9
- package/src/components/AnalyzeAllButton.tsx +81 -0
- package/src/components/DebuggerHeader.tsx +12 -0
- package/src/components/SessionAnalysisButton.tsx +109 -0
- package/src/components/SessionAnalysisDialog.tsx +155 -0
- package/src/components/UnifiedTimeline.tsx +3 -3
- package/src/components/ui/dialog.tsx +120 -0
- package/src/db.ts +3 -2
- package/src/lib/metrics.ts +101 -5
- package/src/pages/ComparisonView.tsx +258 -135
- package/src/pages/FindSessions.tsx +230 -0
- package/src/pages/SessionList.tsx +76 -10
- package/src/pages/SessionView.tsx +33 -1
- package/src/pages/TownHall.tsx +345 -187
- package/src/schemas.ts +27 -8
- package/src/server.ts +337 -3
- package/src/types.ts +11 -2
|
@@ -112,6 +112,14 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
|
|
|
112
112
|
});
|
|
113
113
|
}, [runId]);
|
|
114
114
|
|
|
115
|
+
const generateRequestId = (prefix: string, sessionId?: string) => {
|
|
116
|
+
const randomPart =
|
|
117
|
+
typeof crypto !== "undefined" && "randomUUID" in crypto
|
|
118
|
+
? crypto.randomUUID()
|
|
119
|
+
: `${Math.random().toString(16).slice(2)}-${Date.now().toString(16)}`;
|
|
120
|
+
return `${prefix}-${sessionId ? `${sessionId}-` : ""}${randomPart}`;
|
|
121
|
+
};
|
|
122
|
+
|
|
115
123
|
// Create a new session with the agent server
|
|
116
124
|
const createSession = async (
|
|
117
125
|
configOverrides?: Record<string, unknown>,
|
|
@@ -121,7 +129,7 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
|
|
|
121
129
|
headers: { "Content-Type": "application/json" },
|
|
122
130
|
body: JSON.stringify({
|
|
123
131
|
jsonrpc: "2.0",
|
|
124
|
-
id:
|
|
132
|
+
id: generateRequestId("init"),
|
|
125
133
|
method: "initialize",
|
|
126
134
|
params: {
|
|
127
135
|
protocolVersion: 1,
|
|
@@ -136,7 +144,7 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
|
|
|
136
144
|
headers: { "Content-Type": "application/json" },
|
|
137
145
|
body: JSON.stringify({
|
|
138
146
|
jsonrpc: "2.0",
|
|
139
|
-
id:
|
|
147
|
+
id: generateRequestId("session"),
|
|
140
148
|
method: "session/new",
|
|
141
149
|
params: {
|
|
142
150
|
cwd: "/",
|
|
@@ -154,7 +162,7 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
|
|
|
154
162
|
sessionId: string,
|
|
155
163
|
message: string,
|
|
156
164
|
onUpdate: (content: string) => void,
|
|
157
|
-
): Promise<
|
|
165
|
+
): Promise<string> => {
|
|
158
166
|
let accumulatedContent = "";
|
|
159
167
|
let abortController: AbortController | null = new AbortController();
|
|
160
168
|
|
|
@@ -228,7 +236,7 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
|
|
|
228
236
|
headers: { "Content-Type": "application/json" },
|
|
229
237
|
body: JSON.stringify({
|
|
230
238
|
jsonrpc: "2.0",
|
|
231
|
-
id:
|
|
239
|
+
id: generateRequestId("prompt", sessionId),
|
|
232
240
|
method: "session/prompt",
|
|
233
241
|
params: {
|
|
234
242
|
sessionId,
|
|
@@ -243,6 +251,9 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
|
|
|
243
251
|
// Abort the SSE connection since we're done
|
|
244
252
|
abortController.abort();
|
|
245
253
|
abortController = null;
|
|
254
|
+
|
|
255
|
+
// Return the accumulated content
|
|
256
|
+
return accumulatedContent;
|
|
246
257
|
};
|
|
247
258
|
|
|
248
259
|
// Run the comparison
|
|
@@ -271,15 +282,16 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
|
|
|
271
282
|
});
|
|
272
283
|
|
|
273
284
|
try {
|
|
274
|
-
// Build config overrides based on
|
|
285
|
+
// Build config overrides based on all selected dimensions
|
|
275
286
|
const variantOverrides: Record<string, unknown> = {};
|
|
276
|
-
|
|
287
|
+
const dimensions = config.dimensions || [];
|
|
288
|
+
if (dimensions.includes("model") && config.variantModel) {
|
|
277
289
|
variantOverrides.model = config.variantModel;
|
|
278
290
|
}
|
|
279
|
-
if (
|
|
291
|
+
if (dimensions.includes("system_prompt") && config.variantSystemPrompt) {
|
|
280
292
|
variantOverrides.systemPrompt = config.variantSystemPrompt;
|
|
281
293
|
}
|
|
282
|
-
if (
|
|
294
|
+
if (dimensions.includes("tools") && config.variantTools) {
|
|
283
295
|
variantOverrides.tools = config.variantTools;
|
|
284
296
|
}
|
|
285
297
|
|
|
@@ -307,123 +319,157 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
|
|
|
307
319
|
const startTime = Date.now();
|
|
308
320
|
|
|
309
321
|
// Track final responses and metrics
|
|
310
|
-
let
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
322
|
+
let finalControlMetrics: SessionMetrics = {
|
|
323
|
+
durationMs: 0,
|
|
324
|
+
inputTokens: 0,
|
|
325
|
+
outputTokens: 0,
|
|
326
|
+
totalTokens: 0,
|
|
327
|
+
estimatedCost: 0,
|
|
328
|
+
toolCallCount: 0,
|
|
329
|
+
};
|
|
330
|
+
let finalVariantMetrics: SessionMetrics = {
|
|
331
|
+
durationMs: 0,
|
|
332
|
+
inputTokens: 0,
|
|
333
|
+
outputTokens: 0,
|
|
334
|
+
totalTokens: 0,
|
|
335
|
+
estimatedCost: 0,
|
|
336
|
+
toolCallCount: 0,
|
|
337
|
+
};
|
|
338
|
+
|
|
339
|
+
// Helper to run a session and fetch metrics
|
|
340
|
+
const runSession = async (
|
|
341
|
+
sessionId: string,
|
|
342
|
+
model: string,
|
|
343
|
+
setState: typeof setControlState,
|
|
344
|
+
onContentUpdate: (content: string) => void,
|
|
345
|
+
): Promise<{ response: string; metrics: SessionMetrics }> => {
|
|
346
|
+
try {
|
|
347
|
+
const response = await sendMessageAndCollect(
|
|
348
|
+
sessionId,
|
|
349
|
+
firstMessage,
|
|
350
|
+
onContentUpdate,
|
|
351
|
+
);
|
|
352
|
+
|
|
353
|
+
const duration = Date.now() - startTime;
|
|
354
|
+
|
|
355
|
+
// Poll metrics until they stabilize or we hit a max wait window.
|
|
356
|
+
const fetchMetricsWithRetry = async (): Promise<SessionMetrics> => {
|
|
357
|
+
const maxWaitMs = 60_000;
|
|
358
|
+
const pollIntervalMs = 2_000;
|
|
359
|
+
let elapsed = 0;
|
|
360
|
+
let previousTokens = -1;
|
|
361
|
+
let previousTools = -1;
|
|
362
|
+
let lastMetrics: SessionMetrics | null = null;
|
|
363
|
+
|
|
364
|
+
while (elapsed <= maxWaitMs) {
|
|
365
|
+
try {
|
|
366
|
+
const metricsRes = await fetch(
|
|
367
|
+
`/api/session-metrics/${sessionId}?model=${encodeURIComponent(model)}`,
|
|
368
|
+
);
|
|
369
|
+
const metrics = await metricsRes.json();
|
|
370
|
+
lastMetrics = { ...metrics, durationMs: duration };
|
|
371
|
+
|
|
372
|
+
// If tokens/tool calls stopped changing and we have data, treat as final.
|
|
373
|
+
if (
|
|
374
|
+
metrics.totalTokens > 0 &&
|
|
375
|
+
metrics.totalTokens === previousTokens &&
|
|
376
|
+
metrics.toolCallCount === previousTools
|
|
377
|
+
) {
|
|
378
|
+
return lastMetrics!;
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
previousTokens = metrics.totalTokens ?? 0;
|
|
382
|
+
previousTools = metrics.toolCallCount ?? 0;
|
|
383
|
+
} catch {
|
|
384
|
+
// swallow and retry
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
await new Promise((r) => setTimeout(r, pollIntervalMs));
|
|
388
|
+
elapsed += pollIntervalMs;
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
// Return whatever we last saw (or zeros if nothing ever arrived)
|
|
392
|
+
return (
|
|
393
|
+
lastMetrics ?? {
|
|
348
394
|
durationMs: duration,
|
|
349
395
|
inputTokens: 0,
|
|
350
396
|
outputTokens: 0,
|
|
351
397
|
totalTokens: 0,
|
|
352
398
|
estimatedCost: 0,
|
|
353
399
|
toolCallCount: 0,
|
|
354
|
-
}
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
}));
|
|
360
|
-
}
|
|
361
|
-
})
|
|
362
|
-
.catch((err) => {
|
|
363
|
-
setControlState((prev) => ({
|
|
364
|
-
...prev,
|
|
365
|
-
isStreaming: false,
|
|
366
|
-
error: err.message,
|
|
367
|
-
}));
|
|
368
|
-
}),
|
|
400
|
+
}
|
|
401
|
+
);
|
|
402
|
+
};
|
|
403
|
+
|
|
404
|
+
const metrics = await fetchMetricsWithRetry();
|
|
369
405
|
|
|
370
|
-
|
|
371
|
-
sendMessageAndCollect(variantSessionId, firstMessage, (content) => {
|
|
372
|
-
finalVariantResponse = content;
|
|
373
|
-
setVariantState((prev) => ({
|
|
406
|
+
setState((prev) => ({
|
|
374
407
|
...prev,
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
{ role: "assistant", content },
|
|
378
|
-
],
|
|
408
|
+
isStreaming: false,
|
|
409
|
+
metrics,
|
|
379
410
|
}));
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
411
|
+
|
|
412
|
+
return { response, metrics };
|
|
413
|
+
} catch (err) {
|
|
414
|
+
setState((prev) => ({
|
|
415
|
+
...prev,
|
|
416
|
+
isStreaming: false,
|
|
417
|
+
error: err instanceof Error ? err.message : "Unknown error",
|
|
418
|
+
}));
|
|
419
|
+
return {
|
|
420
|
+
response: "",
|
|
421
|
+
metrics: {
|
|
422
|
+
durationMs: 0,
|
|
423
|
+
inputTokens: 0,
|
|
424
|
+
outputTokens: 0,
|
|
425
|
+
totalTokens: 0,
|
|
426
|
+
estimatedCost: 0,
|
|
427
|
+
toolCallCount: 0,
|
|
428
|
+
},
|
|
429
|
+
};
|
|
430
|
+
}
|
|
431
|
+
};
|
|
432
|
+
|
|
433
|
+
const controlModel = config.controlModel || "claude-sonnet-4-5-20250929";
|
|
434
|
+
const variantModel =
|
|
435
|
+
config.variantModel ||
|
|
436
|
+
config.controlModel ||
|
|
437
|
+
"claude-sonnet-4-5-20250929";
|
|
438
|
+
|
|
439
|
+
const [controlResult, variantResult] = await Promise.all([
|
|
440
|
+
runSession(
|
|
441
|
+
controlSessionId,
|
|
442
|
+
controlModel,
|
|
443
|
+
setControlState,
|
|
444
|
+
(content) => {
|
|
445
|
+
setControlState((prev) => ({
|
|
446
|
+
...prev,
|
|
447
|
+
messages: [
|
|
448
|
+
{ role: "user", content: firstMessage },
|
|
449
|
+
{ role: "assistant", content },
|
|
450
|
+
],
|
|
451
|
+
}));
|
|
452
|
+
},
|
|
453
|
+
),
|
|
454
|
+
runSession(
|
|
455
|
+
variantSessionId,
|
|
456
|
+
variantModel,
|
|
457
|
+
setVariantState,
|
|
458
|
+
(content) => {
|
|
419
459
|
setVariantState((prev) => ({
|
|
420
460
|
...prev,
|
|
421
|
-
|
|
422
|
-
|
|
461
|
+
messages: [
|
|
462
|
+
{ role: "user", content: firstMessage },
|
|
463
|
+
{ role: "assistant", content },
|
|
464
|
+
],
|
|
423
465
|
}));
|
|
424
|
-
}
|
|
466
|
+
},
|
|
467
|
+
),
|
|
425
468
|
]);
|
|
426
469
|
|
|
470
|
+
finalControlMetrics = controlResult.metrics;
|
|
471
|
+
finalVariantMetrics = variantResult.metrics;
|
|
472
|
+
|
|
427
473
|
// Update run status with responses and metrics
|
|
428
474
|
await fetch(`/api/comparison-run/${runId}/update`, {
|
|
429
475
|
method: "POST",
|
|
@@ -432,8 +478,8 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
|
|
|
432
478
|
status: "completed",
|
|
433
479
|
controlMetrics: finalControlMetrics,
|
|
434
480
|
variantMetrics: finalVariantMetrics,
|
|
435
|
-
controlResponse:
|
|
436
|
-
variantResponse:
|
|
481
|
+
controlResponse: controlResult.response,
|
|
482
|
+
variantResponse: variantResult.response,
|
|
437
483
|
}),
|
|
438
484
|
});
|
|
439
485
|
} catch (err) {
|
|
@@ -464,31 +510,97 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
|
|
|
464
510
|
}
|
|
465
511
|
|
|
466
512
|
const getControlDimensionLabel = () => {
|
|
467
|
-
if (!config
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
513
|
+
if (!config || !config.dimensions || config.dimensions.length === 0)
|
|
514
|
+
return "";
|
|
515
|
+
const labels: string[] = [];
|
|
516
|
+
for (const dim of config.dimensions) {
|
|
517
|
+
switch (dim) {
|
|
518
|
+
case "model":
|
|
519
|
+
labels.push(`Model: ${config.controlModel || "original"}`);
|
|
520
|
+
break;
|
|
521
|
+
case "system_prompt":
|
|
522
|
+
labels.push("System Prompt: original");
|
|
523
|
+
break;
|
|
524
|
+
case "tools":
|
|
525
|
+
labels.push("Tools: original");
|
|
526
|
+
break;
|
|
527
|
+
}
|
|
477
528
|
}
|
|
529
|
+
return labels.join(" | ");
|
|
478
530
|
};
|
|
479
531
|
|
|
480
532
|
const getDimensionLabel = () => {
|
|
481
|
-
if (!config
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
533
|
+
if (!config || !config.dimensions || config.dimensions.length === 0)
|
|
534
|
+
return "";
|
|
535
|
+
const labels: string[] = [];
|
|
536
|
+
for (const dim of config.dimensions) {
|
|
537
|
+
switch (dim) {
|
|
538
|
+
case "model":
|
|
539
|
+
labels.push(`Model: ${config.variantModel}`);
|
|
540
|
+
break;
|
|
541
|
+
case "system_prompt":
|
|
542
|
+
labels.push("System Prompt: modified");
|
|
543
|
+
break;
|
|
544
|
+
case "tools":
|
|
545
|
+
labels.push(`Tools: ${config.variantTools?.join(", ")}`);
|
|
546
|
+
break;
|
|
547
|
+
}
|
|
491
548
|
}
|
|
549
|
+
return labels.join(" | ");
|
|
550
|
+
};
|
|
551
|
+
|
|
552
|
+
const getDimensionsSummary = () => {
|
|
553
|
+
if (!config || !config.dimensions || config.dimensions.length === 0)
|
|
554
|
+
return "";
|
|
555
|
+
return config.dimensions.map((d) => d.replace("_", " ")).join(", ");
|
|
556
|
+
};
|
|
557
|
+
|
|
558
|
+
const formatToolTime = (ns?: number) => {
|
|
559
|
+
if (!ns) return "";
|
|
560
|
+
return new Date(ns / 1_000_000).toLocaleTimeString();
|
|
561
|
+
};
|
|
562
|
+
|
|
563
|
+
const renderToolCalls = (toolCalls?: SessionMetrics["toolCalls"]) => {
|
|
564
|
+
if (!toolCalls || toolCalls.length === 0) {
|
|
565
|
+
return <div className="text-xs text-muted-foreground">No tool calls</div>;
|
|
566
|
+
}
|
|
567
|
+
|
|
568
|
+
return (
|
|
569
|
+
<div className="space-y-2">
|
|
570
|
+
{toolCalls.map((call, idx) => (
|
|
571
|
+
<details
|
|
572
|
+
key={`${call.name}-${call.startTimeUnixNano ?? idx}`}
|
|
573
|
+
className="rounded-md border px-3 py-2 bg-muted/50"
|
|
574
|
+
>
|
|
575
|
+
<summary className="text-xs font-medium cursor-pointer flex items-center justify-between">
|
|
576
|
+
<span>
|
|
577
|
+
{call.name}{" "}
|
|
578
|
+
{call.startTimeUnixNano ? (
|
|
579
|
+
<span className="text-muted-foreground">
|
|
580
|
+
@ {formatToolTime(call.startTimeUnixNano)}
|
|
581
|
+
</span>
|
|
582
|
+
) : null}
|
|
583
|
+
</span>
|
|
584
|
+
<span className="text-muted-foreground text-[11px]">view</span>
|
|
585
|
+
</summary>
|
|
586
|
+
<div className="mt-2 text-[11px] space-y-1 break-words">
|
|
587
|
+
<div>
|
|
588
|
+
<span className="font-semibold">Args:</span>{" "}
|
|
589
|
+
<code className="break-words">
|
|
590
|
+
{JSON.stringify(call.input, null, 2)}
|
|
591
|
+
</code>
|
|
592
|
+
</div>
|
|
593
|
+
<div>
|
|
594
|
+
<span className="font-semibold">Result:</span>{" "}
|
|
595
|
+
<code className="break-words">
|
|
596
|
+
{JSON.stringify(call.output, null, 2)}
|
|
597
|
+
</code>
|
|
598
|
+
</div>
|
|
599
|
+
</div>
|
|
600
|
+
</details>
|
|
601
|
+
))}
|
|
602
|
+
</div>
|
|
603
|
+
);
|
|
492
604
|
};
|
|
493
605
|
|
|
494
606
|
return (
|
|
@@ -499,8 +611,7 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
|
|
|
499
611
|
<div>
|
|
500
612
|
<h2 className="text-lg font-semibold">A/B Comparison</h2>
|
|
501
613
|
<p className="text-sm text-muted-foreground">
|
|
502
|
-
Comparing: {
|
|
503
|
-
{getDimensionLabel()}
|
|
614
|
+
Comparing: {getDimensionsSummary()}
|
|
504
615
|
</p>
|
|
505
616
|
</div>
|
|
506
617
|
{!hasRun && (
|
|
@@ -613,6 +724,12 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
|
|
|
613
724
|
{controlState.metrics.toolCallCount}
|
|
614
725
|
</div>
|
|
615
726
|
</div>
|
|
727
|
+
<div className="mt-3">
|
|
728
|
+
<div className="text-[11px] font-semibold mb-1">
|
|
729
|
+
Tool calls
|
|
730
|
+
</div>
|
|
731
|
+
{renderToolCalls(controlState.metrics.toolCalls)}
|
|
732
|
+
</div>
|
|
616
733
|
</div>
|
|
617
734
|
)}
|
|
618
735
|
</Card>
|
|
@@ -674,6 +791,12 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
|
|
|
674
791
|
{variantState.metrics.toolCallCount}
|
|
675
792
|
</div>
|
|
676
793
|
</div>
|
|
794
|
+
<div className="mt-3">
|
|
795
|
+
<div className="text-[11px] font-semibold mb-1">
|
|
796
|
+
Tool calls
|
|
797
|
+
</div>
|
|
798
|
+
{renderToolCalls(variantState.metrics.toolCalls)}
|
|
799
|
+
</div>
|
|
677
800
|
</div>
|
|
678
801
|
)}
|
|
679
802
|
</Card>
|