@townco/debugger 0.1.22 → 0.1.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,685 @@
1
+ import { useCallback, useEffect, useState } from "react";
2
+ import { Button } from "@/components/ui/button";
3
+ import {
4
+ Card,
5
+ CardContent,
6
+ CardDescription,
7
+ CardHeader,
8
+ CardTitle,
9
+ } from "@/components/ui/card";
10
+ import { DebuggerLayout } from "../components/DebuggerLayout";
11
+ import { formatCost, formatDuration, formatTokens } from "../lib/metrics";
12
+ import type { ComparisonConfig, ComparisonRun, SessionMetrics } from "../types";
13
+
14
+ interface ComparisonViewProps {
15
+ runId: string;
16
+ }
17
+
18
+ interface ChatMessage {
19
+ role: "user" | "assistant";
20
+ content: string;
21
+ }
22
+
23
+ interface SessionState {
24
+ sessionId: string | null;
25
+ messages: ChatMessage[];
26
+ isStreaming: boolean;
27
+ metrics: SessionMetrics | null;
28
+ error: string | null;
29
+ }
30
+
31
+ const AGENT_SERVER_URL =
32
+ typeof window !== "undefined"
33
+ ? window.location.origin.replace(":4000", ":3100")
34
+ : "http://localhost:3100";
35
+
36
+ export function ComparisonView({ runId }: ComparisonViewProps) {
37
+ const [run, setRun] = useState<ComparisonRun | null>(null);
38
+ const [config, setConfig] = useState<ComparisonConfig | null>(null);
39
+ const [loading, setLoading] = useState(true);
40
+ const [error, setError] = useState<string | null>(null);
41
+
42
+ // Session states
43
+ const [controlState, setControlState] = useState<SessionState>({
44
+ sessionId: null,
45
+ messages: [],
46
+ isStreaming: false,
47
+ metrics: null,
48
+ error: null,
49
+ });
50
+ const [variantState, setVariantState] = useState<SessionState>({
51
+ sessionId: null,
52
+ messages: [],
53
+ isStreaming: false,
54
+ metrics: null,
55
+ error: null,
56
+ });
57
+
58
+ const [isRunning, setIsRunning] = useState(false);
59
+ const [hasRun, setHasRun] = useState(false);
60
+
61
+ // Fetch comparison run details and restore saved messages
62
+ useEffect(() => {
63
+ Promise.all([
64
+ fetch(`/api/comparison-run/${runId}`).then((res) => res.json()),
65
+ ])
66
+ .then(([runData]) => {
67
+ setRun(runData);
68
+
69
+ // Restore saved messages if the run has been completed
70
+ if (runData.status === "completed" || runData.status === "running") {
71
+ setHasRun(true);
72
+
73
+ // Restore control messages
74
+ if (runData.controlResponse) {
75
+ setControlState({
76
+ sessionId: runData.controlSessionId,
77
+ messages: [
78
+ { role: "user", content: runData.firstUserMessage },
79
+ { role: "assistant", content: runData.controlResponse },
80
+ ],
81
+ isStreaming: false,
82
+ metrics: runData.controlMetrics,
83
+ error: null,
84
+ });
85
+ }
86
+
87
+ // Restore variant messages
88
+ if (runData.variantResponse) {
89
+ setVariantState({
90
+ sessionId: runData.variantSessionId,
91
+ messages: [
92
+ { role: "user", content: runData.firstUserMessage },
93
+ { role: "assistant", content: runData.variantResponse },
94
+ ],
95
+ isStreaming: false,
96
+ metrics: runData.variantMetrics,
97
+ error: null,
98
+ });
99
+ }
100
+ }
101
+
102
+ // Fetch the config
103
+ return fetch(`/api/comparison-config`).then((res) => res.json());
104
+ })
105
+ .then((configData) => {
106
+ setConfig(configData);
107
+ setLoading(false);
108
+ })
109
+ .catch((err) => {
110
+ setError(err.message);
111
+ setLoading(false);
112
+ });
113
+ }, [runId]);
114
+
115
+ // Create a new session with the agent server
116
+ const createSession = async (
117
+ configOverrides?: Record<string, unknown>,
118
+ ): Promise<string> => {
119
+ const initRes = await fetch(`${AGENT_SERVER_URL}/rpc`, {
120
+ method: "POST",
121
+ headers: { "Content-Type": "application/json" },
122
+ body: JSON.stringify({
123
+ jsonrpc: "2.0",
124
+ id: `init-${Date.now()}`,
125
+ method: "initialize",
126
+ params: {
127
+ protocolVersion: 1,
128
+ clientCapabilities: {},
129
+ },
130
+ }),
131
+ });
132
+ await initRes.json();
133
+
134
+ const sessionRes = await fetch(`${AGENT_SERVER_URL}/rpc`, {
135
+ method: "POST",
136
+ headers: { "Content-Type": "application/json" },
137
+ body: JSON.stringify({
138
+ jsonrpc: "2.0",
139
+ id: `session-${Date.now()}`,
140
+ method: "session/new",
141
+ params: {
142
+ cwd: "/",
143
+ mcpServers: [],
144
+ _meta: configOverrides ? { configOverrides } : undefined,
145
+ },
146
+ }),
147
+ });
148
+ const sessionData = await sessionRes.json();
149
+ return sessionData.result.sessionId;
150
+ };
151
+
152
+ // Send a message and collect the response via SSE
153
+ const sendMessageAndCollect = async (
154
+ sessionId: string,
155
+ message: string,
156
+ onUpdate: (content: string) => void,
157
+ ): Promise<void> => {
158
+ let accumulatedContent = "";
159
+ let abortController: AbortController | null = new AbortController();
160
+
161
+ // Start SSE connection (don't await - runs in background)
162
+ const ssePromise = fetch(`${AGENT_SERVER_URL}/events`, {
163
+ headers: {
164
+ "X-Session-ID": sessionId,
165
+ },
166
+ signal: abortController.signal,
167
+ })
168
+ .then(async (response) => {
169
+ const reader = response.body?.getReader();
170
+ if (!reader) return;
171
+
172
+ const decoder = new TextDecoder();
173
+ let buffer = "";
174
+
175
+ try {
176
+ while (true) {
177
+ const { done, value } = await reader.read();
178
+ if (done) break;
179
+
180
+ buffer += decoder.decode(value, { stream: true });
181
+
182
+ // Process complete SSE events
183
+ const lines = buffer.split("\n");
184
+ buffer = lines.pop() || ""; // Keep incomplete line in buffer
185
+
186
+ for (const line of lines) {
187
+ if (line.startsWith("data: ")) {
188
+ try {
189
+ const data = JSON.parse(line.slice(6));
190
+ // Handle session/update notifications
191
+ if (
192
+ data.method === "session/update" &&
193
+ data.params?.update?.sessionUpdate === "agent_message_chunk"
194
+ ) {
195
+ const content = data.params.update.content;
196
+ if (content?.type === "text" && content.text) {
197
+ accumulatedContent += content.text;
198
+ onUpdate(accumulatedContent);
199
+ }
200
+ }
201
+ } catch {
202
+ // Ignore parse errors
203
+ }
204
+ }
205
+ }
206
+ }
207
+ } catch (err) {
208
+ // AbortError is expected when we cancel the stream
209
+ if (err instanceof Error && err.name !== "AbortError") {
210
+ throw err;
211
+ }
212
+ }
213
+ })
214
+ .catch((err) => {
215
+ // AbortError is expected when we cancel the stream
216
+ if (err instanceof Error && err.name !== "AbortError") {
217
+ console.error("SSE error:", err);
218
+ }
219
+ });
220
+
221
+ // Small delay to ensure SSE connection is established
222
+ await new Promise((r) => setTimeout(r, 100));
223
+
224
+ // Send the prompt and wait for it to complete
225
+ // When the prompt RPC returns, the agent has finished responding
226
+ await fetch(`${AGENT_SERVER_URL}/rpc`, {
227
+ method: "POST",
228
+ headers: { "Content-Type": "application/json" },
229
+ body: JSON.stringify({
230
+ jsonrpc: "2.0",
231
+ id: `prompt-${Date.now()}`,
232
+ method: "session/prompt",
233
+ params: {
234
+ sessionId,
235
+ prompt: [{ type: "text", text: message }],
236
+ },
237
+ }),
238
+ });
239
+
240
+ // Give a small delay for any final SSE chunks to arrive
241
+ await new Promise((r) => setTimeout(r, 200));
242
+
243
+ // Abort the SSE connection since we're done
244
+ abortController.abort();
245
+ abortController = null;
246
+ };
247
+
248
+ // Run the comparison
249
+ const runComparison = useCallback(async () => {
250
+ if (!run || !config) return;
251
+
252
+ setIsRunning(true);
253
+ setHasRun(true);
254
+
255
+ const firstMessage = run.firstUserMessage;
256
+
257
+ // Reset states
258
+ setControlState({
259
+ sessionId: null,
260
+ messages: [{ role: "user", content: firstMessage }],
261
+ isStreaming: true,
262
+ metrics: null,
263
+ error: null,
264
+ });
265
+ setVariantState({
266
+ sessionId: null,
267
+ messages: [{ role: "user", content: firstMessage }],
268
+ isStreaming: true,
269
+ metrics: null,
270
+ error: null,
271
+ });
272
+
273
+ try {
274
+ // Build config overrides based on dimension
275
+ const variantOverrides: Record<string, unknown> = {};
276
+ if (config.dimension === "model" && config.variantModel) {
277
+ variantOverrides.model = config.variantModel;
278
+ }
279
+ if (config.dimension === "system_prompt" && config.variantSystemPrompt) {
280
+ variantOverrides.systemPrompt = config.variantSystemPrompt;
281
+ }
282
+ if (config.dimension === "tools" && config.variantTools) {
283
+ variantOverrides.tools = config.variantTools;
284
+ }
285
+
286
+ // Create sessions in parallel
287
+ const [controlSessionId, variantSessionId] = await Promise.all([
288
+ createSession(), // Control - no overrides
289
+ createSession(variantOverrides), // Variant - with overrides
290
+ ]);
291
+
292
+ setControlState((prev) => ({ ...prev, sessionId: controlSessionId }));
293
+ setVariantState((prev) => ({ ...prev, sessionId: variantSessionId }));
294
+
295
+ // Update run with session IDs
296
+ await fetch(`/api/comparison-run/${runId}/update`, {
297
+ method: "POST",
298
+ headers: { "Content-Type": "application/json" },
299
+ body: JSON.stringify({
300
+ status: "running",
301
+ controlSessionId,
302
+ variantSessionId,
303
+ }),
304
+ });
305
+
306
+ // Run both sessions in parallel
307
+ const startTime = Date.now();
308
+
309
+ // Track final responses and metrics
310
+ let finalControlResponse = "";
311
+ let finalVariantResponse = "";
312
+ let finalControlMetrics: SessionMetrics | null = null;
313
+ let finalVariantMetrics: SessionMetrics | null = null;
314
+
315
+ await Promise.all([
316
+ // Control session
317
+ sendMessageAndCollect(controlSessionId, firstMessage, (content) => {
318
+ finalControlResponse = content;
319
+ setControlState((prev) => ({
320
+ ...prev,
321
+ messages: [
322
+ { role: "user", content: firstMessage },
323
+ { role: "assistant", content },
324
+ ],
325
+ }));
326
+ })
327
+ .then(async () => {
328
+ const duration = Date.now() - startTime;
329
+ // Wait for telemetry data to be written to the database
330
+ await new Promise((r) => setTimeout(r, 2000));
331
+ // Fetch metrics - use control model for cost calculation
332
+ const controlModel =
333
+ config.controlModel || "claude-sonnet-4-5-20250929";
334
+ try {
335
+ const metricsRes = await fetch(
336
+ `/api/session-metrics/${controlSessionId}?model=${encodeURIComponent(controlModel)}`,
337
+ );
338
+ const metrics = await metricsRes.json();
339
+ metrics.durationMs = duration;
340
+ finalControlMetrics = metrics;
341
+ setControlState((prev) => ({
342
+ ...prev,
343
+ isStreaming: false,
344
+ metrics,
345
+ }));
346
+ } catch {
347
+ finalControlMetrics = {
348
+ durationMs: duration,
349
+ inputTokens: 0,
350
+ outputTokens: 0,
351
+ totalTokens: 0,
352
+ estimatedCost: 0,
353
+ toolCallCount: 0,
354
+ };
355
+ setControlState((prev) => ({
356
+ ...prev,
357
+ isStreaming: false,
358
+ metrics: finalControlMetrics,
359
+ }));
360
+ }
361
+ })
362
+ .catch((err) => {
363
+ setControlState((prev) => ({
364
+ ...prev,
365
+ isStreaming: false,
366
+ error: err.message,
367
+ }));
368
+ }),
369
+
370
+ // Variant session
371
+ sendMessageAndCollect(variantSessionId, firstMessage, (content) => {
372
+ finalVariantResponse = content;
373
+ setVariantState((prev) => ({
374
+ ...prev,
375
+ messages: [
376
+ { role: "user", content: firstMessage },
377
+ { role: "assistant", content },
378
+ ],
379
+ }));
380
+ })
381
+ .then(async () => {
382
+ const duration = Date.now() - startTime;
383
+ // Wait for telemetry data to be written to the database
384
+ await new Promise((r) => setTimeout(r, 2000));
385
+ // Fetch metrics - use variant model for cost calculation
386
+ const variantModel =
387
+ config.variantModel ||
388
+ config.controlModel ||
389
+ "claude-sonnet-4-5-20250929";
390
+ try {
391
+ const metricsRes = await fetch(
392
+ `/api/session-metrics/${variantSessionId}?model=${encodeURIComponent(variantModel)}`,
393
+ );
394
+ const metrics = await metricsRes.json();
395
+ metrics.durationMs = duration;
396
+ finalVariantMetrics = metrics;
397
+ setVariantState((prev) => ({
398
+ ...prev,
399
+ isStreaming: false,
400
+ metrics,
401
+ }));
402
+ } catch {
403
+ finalVariantMetrics = {
404
+ durationMs: duration,
405
+ inputTokens: 0,
406
+ outputTokens: 0,
407
+ totalTokens: 0,
408
+ estimatedCost: 0,
409
+ toolCallCount: 0,
410
+ };
411
+ setVariantState((prev) => ({
412
+ ...prev,
413
+ isStreaming: false,
414
+ metrics: finalVariantMetrics,
415
+ }));
416
+ }
417
+ })
418
+ .catch((err) => {
419
+ setVariantState((prev) => ({
420
+ ...prev,
421
+ isStreaming: false,
422
+ error: err.message,
423
+ }));
424
+ }),
425
+ ]);
426
+
427
+ // Update run status with responses and metrics
428
+ await fetch(`/api/comparison-run/${runId}/update`, {
429
+ method: "POST",
430
+ headers: { "Content-Type": "application/json" },
431
+ body: JSON.stringify({
432
+ status: "completed",
433
+ controlMetrics: finalControlMetrics,
434
+ variantMetrics: finalVariantMetrics,
435
+ controlResponse: finalControlResponse,
436
+ variantResponse: finalVariantResponse,
437
+ }),
438
+ });
439
+ } catch (err) {
440
+ setError(err instanceof Error ? err.message : "Failed to run comparison");
441
+ } finally {
442
+ setIsRunning(false);
443
+ }
444
+ }, [run, config, runId]);
445
+
446
+ if (loading) {
447
+ return (
448
+ <DebuggerLayout title="Comparison" showBackButton backHref="/town-hall">
449
+ <div className="container mx-auto p-8">
450
+ <div className="text-muted-foreground">Loading comparison...</div>
451
+ </div>
452
+ </DebuggerLayout>
453
+ );
454
+ }
455
+
456
+ if (error || !run) {
457
+ return (
458
+ <DebuggerLayout title="Comparison" showBackButton backHref="/town-hall">
459
+ <div className="container mx-auto p-8">
460
+ <div className="text-red-500">Error: {error || "Run not found"}</div>
461
+ </div>
462
+ </DebuggerLayout>
463
+ );
464
+ }
465
+
466
+ const getControlDimensionLabel = () => {
467
+ if (!config) return "";
468
+ switch (config.dimension) {
469
+ case "model":
470
+ return `Model: ${config.controlModel || "unknown"}`;
471
+ case "system_prompt":
472
+ return "System Prompt (original)";
473
+ case "tools":
474
+ return "Tools (original)";
475
+ default:
476
+ return "";
477
+ }
478
+ };
479
+
480
+ const getDimensionLabel = () => {
481
+ if (!config) return "";
482
+ switch (config.dimension) {
483
+ case "model":
484
+ return `Model: ${config.variantModel}`;
485
+ case "system_prompt":
486
+ return "System Prompt (modified)";
487
+ case "tools":
488
+ return `Tools: ${config.variantTools?.join(", ")}`;
489
+ default:
490
+ return "";
491
+ }
492
+ };
493
+
494
+ return (
495
+ <DebuggerLayout title="Comparison" showBackButton backHref="/town-hall">
496
+ <div className="container mx-auto p-4 h-[calc(100vh-4rem)] flex flex-col overflow-hidden">
497
+ {/* Header */}
498
+ <div className="flex items-center justify-between mb-4">
499
+ <div>
500
+ <h2 className="text-lg font-semibold">A/B Comparison</h2>
501
+ <p className="text-sm text-muted-foreground">
502
+ Comparing: {config?.dimension?.replace("_", " ")} -{" "}
503
+ {getDimensionLabel()}
504
+ </p>
505
+ </div>
506
+ {!hasRun && (
507
+ <Button onClick={runComparison} disabled={isRunning}>
508
+ {isRunning ? "Running..." : "Run Comparison"}
509
+ </Button>
510
+ )}
511
+ </div>
512
+
513
+ {/* Pre-run state */}
514
+ {!hasRun && (
515
+ <div className="flex-1 flex items-center justify-center">
516
+ <Card className="max-w-md w-full">
517
+ <CardHeader className="text-center">
518
+ <CardTitle>Ready to Compare</CardTitle>
519
+ <CardDescription>
520
+ This comparison will send the same prompt to both
521
+ configurations and display the results side by side.
522
+ </CardDescription>
523
+ </CardHeader>
524
+ <CardContent className="space-y-4">
525
+ <div className="bg-muted rounded-lg p-4">
526
+ <div className="text-xs font-medium uppercase text-muted-foreground mb-2">
527
+ First message
528
+ </div>
529
+ <div className="text-sm">{run?.firstUserMessage}</div>
530
+ </div>
531
+ <div className="grid grid-cols-2 gap-4 text-sm">
532
+ <div className="space-y-1">
533
+ <div className="flex items-center gap-2">
534
+ <span className="w-2 h-2 rounded-full bg-blue-500" />
535
+ <span className="font-medium">Control</span>
536
+ </div>
537
+ <div className="text-muted-foreground text-xs">
538
+ {getControlDimensionLabel()}
539
+ </div>
540
+ </div>
541
+ <div className="space-y-1">
542
+ <div className="flex items-center gap-2">
543
+ <span className="w-2 h-2 rounded-full bg-orange-500" />
544
+ <span className="font-medium">Variant</span>
545
+ </div>
546
+ <div className="text-muted-foreground text-xs">
547
+ {getDimensionLabel()}
548
+ </div>
549
+ </div>
550
+ </div>
551
+ </CardContent>
552
+ </Card>
553
+ </div>
554
+ )}
555
+
556
+ {/* Side-by-side comparison */}
557
+ {hasRun && (
558
+ <div className="grid grid-cols-2 gap-4 flex-1 min-h-0">
559
+ {/* Control */}
560
+ <Card className="flex flex-col h-full min-h-0 overflow-hidden">
561
+ <CardHeader className="py-3 border-b shrink-0">
562
+ <CardTitle className="text-sm flex items-center gap-2">
563
+ <span className="w-2 h-2 rounded-full bg-blue-500" />
564
+ Control (Original)
565
+ </CardTitle>
566
+ <CardDescription className="text-xs">
567
+ {getControlDimensionLabel()}
568
+ </CardDescription>
569
+ </CardHeader>
570
+ <CardContent className="flex-1 overflow-auto py-4">
571
+ {controlState.messages.map((msg, i) => (
572
+ <div
573
+ key={i}
574
+ className={`mb-4 ${msg.role === "user" ? "text-blue-600 dark:text-blue-400" : ""}`}
575
+ >
576
+ <div className="text-xs font-medium uppercase mb-1">
577
+ {msg.role}
578
+ </div>
579
+ <div className="text-sm whitespace-pre-wrap">
580
+ {msg.content}
581
+ {controlState.isStreaming &&
582
+ msg.role === "assistant" &&
583
+ i === controlState.messages.length - 1 && (
584
+ <span className="animate-pulse">▊</span>
585
+ )}
586
+ </div>
587
+ </div>
588
+ ))}
589
+ {controlState.error && (
590
+ <div className="text-red-500 text-sm">
591
+ Error: {controlState.error}
592
+ </div>
593
+ )}
594
+ </CardContent>
595
+ {/* Metrics */}
596
+ {controlState.metrics && (
597
+ <div className="border-t p-3 shrink-0 bg-muted/50">
598
+ <div className="grid grid-cols-4 gap-2 text-xs">
599
+ <div>
600
+ <span className="text-muted-foreground">Duration:</span>{" "}
601
+ {formatDuration(controlState.metrics.durationMs)}
602
+ </div>
603
+ <div>
604
+ <span className="text-muted-foreground">Tokens:</span>{" "}
605
+ {formatTokens(controlState.metrics.totalTokens)}
606
+ </div>
607
+ <div>
608
+ <span className="text-muted-foreground">Cost:</span>{" "}
609
+ {formatCost(controlState.metrics.estimatedCost)}
610
+ </div>
611
+ <div>
612
+ <span className="text-muted-foreground">Tools:</span>{" "}
613
+ {controlState.metrics.toolCallCount}
614
+ </div>
615
+ </div>
616
+ </div>
617
+ )}
618
+ </Card>
619
+
620
+ {/* Variant */}
621
+ <Card className="flex flex-col h-full min-h-0 overflow-hidden">
622
+ <CardHeader className="py-3 border-b shrink-0">
623
+ <CardTitle className="text-sm flex items-center gap-2">
624
+ <span className="w-2 h-2 rounded-full bg-orange-500" />
625
+ Variant
626
+ </CardTitle>
627
+ <CardDescription className="text-xs">
628
+ {getDimensionLabel()}
629
+ </CardDescription>
630
+ </CardHeader>
631
+ <CardContent className="flex-1 overflow-auto py-4">
632
+ {variantState.messages.map((msg, i) => (
633
+ <div
634
+ key={i}
635
+ className={`mb-4 ${msg.role === "user" ? "text-orange-600 dark:text-orange-400" : ""}`}
636
+ >
637
+ <div className="text-xs font-medium uppercase mb-1">
638
+ {msg.role}
639
+ </div>
640
+ <div className="text-sm whitespace-pre-wrap">
641
+ {msg.content}
642
+ {variantState.isStreaming &&
643
+ msg.role === "assistant" &&
644
+ i === variantState.messages.length - 1 && (
645
+ <span className="animate-pulse">▊</span>
646
+ )}
647
+ </div>
648
+ </div>
649
+ ))}
650
+ {variantState.error && (
651
+ <div className="text-red-500 text-sm">
652
+ Error: {variantState.error}
653
+ </div>
654
+ )}
655
+ </CardContent>
656
+ {/* Metrics */}
657
+ {variantState.metrics && (
658
+ <div className="border-t p-3 shrink-0 bg-muted/50">
659
+ <div className="grid grid-cols-4 gap-2 text-xs">
660
+ <div>
661
+ <span className="text-muted-foreground">Duration:</span>{" "}
662
+ {formatDuration(variantState.metrics.durationMs)}
663
+ </div>
664
+ <div>
665
+ <span className="text-muted-foreground">Tokens:</span>{" "}
666
+ {formatTokens(variantState.metrics.totalTokens)}
667
+ </div>
668
+ <div>
669
+ <span className="text-muted-foreground">Cost:</span>{" "}
670
+ {formatCost(variantState.metrics.estimatedCost)}
671
+ </div>
672
+ <div>
673
+ <span className="text-muted-foreground">Tools:</span>{" "}
674
+ {variantState.metrics.toolCallCount}
675
+ </div>
676
+ </div>
677
+ </div>
678
+ )}
679
+ </Card>
680
+ </div>
681
+ )}
682
+ </div>
683
+ </DebuggerLayout>
684
+ );
685
+ }