@townco/debugger 0.1.32 → 0.1.33

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,5 @@
1
1
  import {
2
+ BarChart3,
2
3
  ChevronDown,
3
4
  ChevronUp,
4
5
  Loader2,
@@ -16,7 +17,9 @@ import {
16
17
  CardTitle,
17
18
  } from "@/components/ui/card";
18
19
  import { Checkbox } from "@/components/ui/checkbox";
20
+ import type { SessionComparisonAnalysis } from "../analysis/comparison-types";
19
21
  import type { SessionAnalysis } from "../analysis/types";
22
+ import { ComparisonAnalysisDialog } from "../components/ComparisonAnalysisDialog";
20
23
  import { DebuggerLayout } from "../components/DebuggerLayout";
21
24
  import { formatCost, formatDuration, formatTokens } from "../lib/metrics";
22
25
  import type { ComparisonConfig, ComparisonRun, SessionMetrics } from "../types";
@@ -25,14 +28,18 @@ interface ComparisonViewProps {
25
28
  runId: string;
26
29
  }
27
30
 
28
- interface ChatMessage {
29
- role: "user" | "assistant";
31
+ // Conversation item that can be user message, assistant message, or tool call
32
+ interface ConversationItem {
33
+ type: "user" | "assistant" | "tool_call";
30
34
  content: string;
35
+ toolName?: string | undefined;
36
+ toolInput?: unknown;
37
+ toolOutput?: unknown;
31
38
  }
32
39
 
33
40
  interface SessionState {
34
41
  sessionId: string | null;
35
- messages: ChatMessage[];
42
+ messages: ConversationItem[];
36
43
  isStreaming: boolean;
37
44
  isSending: boolean; // true while sending is in progress (before streaming starts)
38
45
  metrics: SessionMetrics | null;
@@ -66,17 +73,21 @@ function SessionAnalysisPanel({
66
73
  isExpanded: boolean;
67
74
  onToggle: () => void;
68
75
  onRunAnalysis: () => void;
69
- accentColor: "blue" | "orange";
76
+ accentColor: "yellow" | "blue" | "orange";
70
77
  }) {
71
78
  const colorClasses =
72
- accentColor === "blue"
73
- ? "border-blue-200 dark:border-blue-800 bg-blue-50/50 dark:bg-blue-950/30"
74
- : "border-orange-200 dark:border-orange-800 bg-orange-50/50 dark:bg-orange-950/30";
79
+ accentColor === "yellow"
80
+ ? "border-yellow-200 dark:border-yellow-800 bg-yellow-50/50 dark:bg-yellow-950/30"
81
+ : accentColor === "blue"
82
+ ? "border-blue-200 dark:border-blue-800 bg-blue-50/50 dark:bg-blue-950/30"
83
+ : "border-orange-200 dark:border-orange-800 bg-orange-50/50 dark:bg-orange-950/30";
75
84
 
76
85
  const headerColorClasses =
77
- accentColor === "blue"
78
- ? "hover:bg-blue-100/50 dark:hover:bg-blue-900/30"
79
- : "hover:bg-orange-100/50 dark:hover:bg-orange-900/30";
86
+ accentColor === "yellow"
87
+ ? "hover:bg-yellow-100/50 dark:hover:bg-yellow-900/30"
88
+ : accentColor === "blue"
89
+ ? "hover:bg-blue-100/50 dark:hover:bg-blue-900/30"
90
+ : "hover:bg-orange-100/50 dark:hover:bg-orange-900/30";
80
91
 
81
92
  if (isLoading) {
82
93
  return (
@@ -239,17 +250,21 @@ function ToolCallsPanel({
239
250
  toolCalls: SessionMetrics["toolCalls"];
240
251
  isExpanded: boolean;
241
252
  onToggle: () => void;
242
- accentColor: "blue" | "orange";
253
+ accentColor: "yellow" | "blue" | "orange";
243
254
  }) {
244
255
  const colorClasses =
245
- accentColor === "blue"
246
- ? "border-blue-200 dark:border-blue-800 bg-blue-50/50 dark:bg-blue-950/30"
247
- : "border-orange-200 dark:border-orange-800 bg-orange-50/50 dark:bg-orange-950/30";
256
+ accentColor === "yellow"
257
+ ? "border-yellow-200 dark:border-yellow-800 bg-yellow-50/50 dark:bg-yellow-950/30"
258
+ : accentColor === "blue"
259
+ ? "border-blue-200 dark:border-blue-800 bg-blue-50/50 dark:bg-blue-950/30"
260
+ : "border-orange-200 dark:border-orange-800 bg-orange-50/50 dark:bg-orange-950/30";
248
261
 
249
262
  const headerColorClasses =
250
- accentColor === "blue"
251
- ? "hover:bg-blue-100/50 dark:hover:bg-blue-900/30"
252
- : "hover:bg-orange-100/50 dark:hover:bg-orange-900/30";
263
+ accentColor === "yellow"
264
+ ? "hover:bg-yellow-100/50 dark:hover:bg-yellow-900/30"
265
+ : accentColor === "blue"
266
+ ? "hover:bg-blue-100/50 dark:hover:bg-blue-900/30"
267
+ : "hover:bg-orange-100/50 dark:hover:bg-orange-900/30";
253
268
 
254
269
  const toolCallCount = toolCalls?.length ?? 0;
255
270
 
@@ -389,28 +404,114 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
389
404
  const [isRunning, setIsRunning] = useState(false);
390
405
  const [hasRun, setHasRun] = useState(false);
391
406
 
407
+ // Original source session state (read-only, for reference)
408
+ const [originalMessages, setOriginalMessages] = useState<ConversationItem[]>(
409
+ [],
410
+ );
411
+ const [originalMetrics, setOriginalMetrics] = useState<SessionMetrics | null>(
412
+ null,
413
+ );
414
+
392
415
  // Session analysis state
416
+ const [originalAnalysis, setOriginalAnalysis] =
417
+ useState<SessionAnalysis | null>(null);
393
418
  const [controlAnalysis, setControlAnalysis] =
394
419
  useState<SessionAnalysis | null>(null);
395
420
  const [variantAnalysis, setVariantAnalysis] =
396
421
  useState<SessionAnalysis | null>(null);
422
+ const [originalAnalysisLoading, setOriginalAnalysisLoading] = useState(false);
397
423
  const [controlAnalysisLoading, setControlAnalysisLoading] = useState(false);
398
424
  const [variantAnalysisLoading, setVariantAnalysisLoading] = useState(false);
399
425
  const [analysisExpanded, setAnalysisExpanded] = useState<{
426
+ original: boolean;
400
427
  control: boolean;
401
428
  variant: boolean;
402
429
  }>({
430
+ original: false,
403
431
  control: false,
404
432
  variant: false,
405
433
  });
406
434
  const [toolCallsExpanded, setToolCallsExpanded] = useState<{
435
+ original: boolean;
407
436
  control: boolean;
408
437
  variant: boolean;
409
438
  }>({
439
+ original: false,
410
440
  control: false,
411
441
  variant: false,
412
442
  });
413
443
 
444
+ // Comparison analysis state
445
+ const [comparisonAnalysis, setComparisonAnalysis] =
446
+ useState<SessionComparisonAnalysis | null>(null);
447
+ const [comparisonAnalysisLoading, setComparisonAnalysisLoading] =
448
+ useState(false);
449
+ const [comparisonAnalysisDialogOpen, setComparisonAnalysisDialogOpen] =
450
+ useState(false);
451
+ const [hasComparisonAnalysis, setHasComparisonAnalysis] = useState(false);
452
+
453
+ // Check if comparison analysis exists
454
+ useEffect(() => {
455
+ if (runId) {
456
+ fetch(`/api/comparison-analysis/${runId}/exists`)
457
+ .then((res) => res.json())
458
+ .then((data) => {
459
+ setHasComparisonAnalysis(data.exists);
460
+ })
461
+ .catch(() => {
462
+ setHasComparisonAnalysis(false);
463
+ });
464
+ }
465
+ }, [runId]);
466
+
467
+ // Function to run comparison analysis
468
+ const runComparisonAnalysis = async () => {
469
+ setComparisonAnalysisLoading(true);
470
+ try {
471
+ const res = await fetch(`/api/analyze-comparison/${runId}`, {
472
+ method: "POST",
473
+ });
474
+ if (!res.ok) {
475
+ const error = await res.json();
476
+ throw new Error(error.error || "Analysis failed");
477
+ }
478
+ const analysis = await res.json();
479
+ setComparisonAnalysis(analysis);
480
+ setHasComparisonAnalysis(true);
481
+ setComparisonAnalysisDialogOpen(true);
482
+ } catch (error) {
483
+ console.error("Comparison analysis error:", error);
484
+ alert(
485
+ `Analysis failed: ${error instanceof Error ? error.message : "Unknown error"}`,
486
+ );
487
+ } finally {
488
+ setComparisonAnalysisLoading(false);
489
+ }
490
+ };
491
+
492
+ // Function to show existing comparison analysis
493
+ const showComparisonAnalysis = async () => {
494
+ if (comparisonAnalysis) {
495
+ setComparisonAnalysisDialogOpen(true);
496
+ return;
497
+ }
498
+
499
+ setComparisonAnalysisLoading(true);
500
+ try {
501
+ const res = await fetch(`/api/comparison-analysis/${runId}`);
502
+ if (!res.ok) {
503
+ throw new Error("Analysis not found");
504
+ }
505
+ const analysis = await res.json();
506
+ setComparisonAnalysis(analysis);
507
+ setComparisonAnalysisDialogOpen(true);
508
+ } catch (error) {
509
+ console.error("Error fetching comparison analysis:", error);
510
+ } finally {
511
+ setComparisonAnalysisLoading(false);
512
+ }
513
+ };
514
+
414
515
  // Fetch comparison run details, conversation, and restore saved messages
415
516
  useEffect(() => {
416
517
  let runData: ComparisonRun;
@@ -427,11 +528,41 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
427
528
  );
428
529
  const conversation = await conversationRes.json();
429
530
 
430
- // Extract user messages in order
531
+ // Extract user messages in order AND build original conversation with tool calls
431
532
  const messages: string[] = [];
533
+ const origMessages: ConversationItem[] = [];
432
534
  for (const trace of conversation) {
433
535
  if (trace.userInput) {
434
536
  messages.push(trace.userInput);
537
+ origMessages.push({
538
+ type: "user" as const,
539
+ content: trace.userInput,
540
+ });
541
+ }
542
+ // Use agentMessages which includes both tool_calls and chat messages in order
543
+ if (trace.agentMessages && Array.isArray(trace.agentMessages)) {
544
+ for (const msg of trace.agentMessages) {
545
+ if (msg.type === "tool_call") {
546
+ origMessages.push({
547
+ type: "tool_call" as const,
548
+ content: msg.toolName || msg.content,
549
+ toolName: msg.toolName,
550
+ toolInput: msg.toolInput,
551
+ toolOutput: msg.toolOutput,
552
+ });
553
+ } else if (msg.type === "chat" && msg.content?.trim()) {
554
+ origMessages.push({
555
+ type: "assistant" as const,
556
+ content: msg.content,
557
+ });
558
+ }
559
+ }
560
+ } else if (trace.llmOutput) {
561
+ // Fallback if no agentMessages
562
+ origMessages.push({
563
+ type: "assistant" as const,
564
+ content: trace.llmOutput,
565
+ });
435
566
  }
436
567
  }
437
568
 
@@ -441,6 +572,22 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
441
572
  }
442
573
 
443
574
  setUserMessages(messages);
575
+ setOriginalMessages(origMessages);
576
+
577
+ // Fetch metrics for the original source session
578
+ if (runData.sourceSessionId) {
579
+ try {
580
+ const metricsRes = await fetch(
581
+ `/api/session-metrics/${runData.sourceSessionId}?model=${encodeURIComponent(config?.controlModel || "claude-sonnet-4-5-20250929")}`,
582
+ );
583
+ if (metricsRes.ok) {
584
+ const metrics = await metricsRes.json();
585
+ setOriginalMetrics(metrics);
586
+ }
587
+ } catch (err) {
588
+ console.error("Failed to fetch original session metrics:", err);
589
+ }
590
+ }
444
591
 
445
592
  // Restore saved messages if the run has been completed or running
446
593
  if (runData.status === "completed" || runData.status === "running") {
@@ -460,31 +607,65 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
460
607
  : Promise.resolve([]),
461
608
  ]);
462
609
 
463
- // Convert traces to chat messages
464
- const tracesToChatMessages = (
465
- traces: Array<{ userInput?: string; llmOutput?: string }>,
466
- ): ChatMessage[] => {
467
- const chatMessages: ChatMessage[] = [];
610
+ // Convert traces to conversation items (including tool calls)
611
+ const tracesToConversationItems = (
612
+ traces: Array<{
613
+ userInput?: string;
614
+ llmOutput?: string;
615
+ agentMessages?: Array<{
616
+ type: string;
617
+ content?: string;
618
+ toolName?: string;
619
+ toolInput?: unknown;
620
+ toolOutput?: unknown;
621
+ }>;
622
+ }>,
623
+ ): ConversationItem[] => {
624
+ const items: ConversationItem[] = [];
468
625
  for (const trace of traces) {
469
626
  if (trace.userInput) {
470
- chatMessages.push({
471
- role: "user" as const,
627
+ items.push({
628
+ type: "user" as const,
472
629
  content: trace.userInput,
473
630
  });
474
631
  }
475
- if (trace.llmOutput) {
476
- chatMessages.push({
477
- role: "assistant" as const,
632
+ // Use agentMessages which includes both tool_calls and chat messages in order
633
+ if (trace.agentMessages && Array.isArray(trace.agentMessages)) {
634
+ for (const msg of trace.agentMessages) {
635
+ if (msg.type === "tool_call") {
636
+ items.push({
637
+ type: "tool_call" as const,
638
+ content: msg.toolName || msg.content || "",
639
+ toolName: msg.toolName,
640
+ toolInput: msg.toolInput,
641
+ toolOutput: msg.toolOutput,
642
+ });
643
+ } else if (msg.type === "chat" && msg.content?.trim()) {
644
+ items.push({
645
+ type: "assistant" as const,
646
+ content: msg.content,
647
+ });
648
+ }
649
+ }
650
+ } else if (trace.llmOutput) {
651
+ // Fallback if no agentMessages
652
+ items.push({
653
+ type: "assistant" as const,
478
654
  content: trace.llmOutput,
479
655
  });
480
656
  }
481
657
  }
482
- return chatMessages;
658
+ return items;
483
659
  };
484
660
 
485
661
  // Restore control messages
486
662
  if (runData.controlSessionId) {
487
- const controlMessages = tracesToChatMessages(controlConversation);
663
+ const controlMessages =
664
+ tracesToConversationItems(controlConversation);
665
+ // Count user messages for turnIndex
666
+ const controlUserCount = controlMessages.filter(
667
+ (m) => m.type === "user",
668
+ ).length;
488
669
  setControlState((prev) => ({
489
670
  ...prev,
490
671
  sessionId: runData.controlSessionId,
@@ -492,13 +673,18 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
492
673
  isStreaming: false,
493
674
  metrics: runData.controlMetrics,
494
675
  error: null,
495
- turnIndex: Math.floor(controlMessages.length / 2) - 1,
676
+ turnIndex: controlUserCount - 1,
496
677
  }));
497
678
  }
498
679
 
499
680
  // Restore variant messages
500
681
  if (runData.variantSessionId) {
501
- const variantMessages = tracesToChatMessages(variantConversation);
682
+ const variantMessages =
683
+ tracesToConversationItems(variantConversation);
684
+ // Count user messages for turnIndex
685
+ const variantUserCount = variantMessages.filter(
686
+ (m) => m.type === "user",
687
+ ).length;
502
688
  setVariantState((prev) => ({
503
689
  ...prev,
504
690
  sessionId: runData.variantSessionId,
@@ -506,14 +692,16 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
506
692
  isStreaming: false,
507
693
  metrics: runData.variantMetrics,
508
694
  error: null,
509
- turnIndex: Math.floor(variantMessages.length / 2) - 1,
695
+ turnIndex: variantUserCount - 1,
510
696
  }));
511
697
  }
512
698
 
513
699
  // Set queue state based on completed messages
700
+ const controlItems = tracesToConversationItems(controlConversation);
701
+ const variantItems = tracesToConversationItems(variantConversation);
514
702
  const completedTurns = Math.min(
515
- Math.floor(tracesToChatMessages(controlConversation).length / 2),
516
- Math.floor(tracesToChatMessages(variantConversation).length / 2),
703
+ controlItems.filter((m) => m.type === "user").length,
704
+ variantItems.filter((m) => m.type === "user").length,
517
705
  );
518
706
  setQueueState({
519
707
  currentIndex: completedTurns - 1,
@@ -535,7 +723,7 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
535
723
  setError(err.message);
536
724
  setLoading(false);
537
725
  });
538
- }, [runId]);
726
+ }, [runId, config?.controlModel]);
539
727
 
540
728
  const generateRequestId = (prefix: string, sessionId?: string) => {
541
729
  const randomPart =
@@ -592,7 +780,7 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
592
780
  let abortController: AbortController | null = new AbortController();
593
781
 
594
782
  // Start SSE connection (don't await - runs in background)
595
- const ssePromise = fetch(`${AGENT_SERVER_URL}/events`, {
783
+ const _ssePromise = fetch(`${AGENT_SERVER_URL}/events`, {
596
784
  headers: {
597
785
  "X-Session-ID": sessionId,
598
786
  },
@@ -709,6 +897,7 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
709
897
  metrics.totalTokens === previousTokens &&
710
898
  metrics.toolCallCount === previousTools
711
899
  ) {
900
+ // biome-ignore lint/style/noNonNullAssertion: lastMetrics is set in the loop
712
901
  return lastMetrics!;
713
902
  }
714
903
 
@@ -754,27 +943,24 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
754
943
  setState((prev) => ({
755
944
  ...prev,
756
945
  isStreaming: true,
757
- messages: [...prev.messages, { role: "user", content: message }],
946
+ messages: [...prev.messages, { type: "user", content: message }],
758
947
  }));
759
948
 
760
- let accumulatedContent = "";
761
-
762
949
  const response = await sendMessageAndCollect(
763
950
  sessionId,
764
951
  message,
765
952
  (content) => {
766
- accumulatedContent = content;
767
953
  setState((prev) => {
768
954
  // Find the last assistant message or add one
769
955
  const messages = [...prev.messages];
770
956
  const lastMsg = messages[messages.length - 1];
771
- if (lastMsg && lastMsg.role === "assistant") {
957
+ if (lastMsg && lastMsg.type === "assistant") {
772
958
  messages[messages.length - 1] = {
773
- role: "assistant",
959
+ type: "assistant",
774
960
  content,
775
961
  };
776
962
  } else {
777
- messages.push({ role: "assistant", content });
963
+ messages.push({ type: "assistant", content });
778
964
  }
779
965
  return { ...prev, messages };
780
966
  });
@@ -812,7 +998,8 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
812
998
  };
813
999
  }
814
1000
  },
815
- [fetchMetricsWithRetry],
1001
+ // biome-ignore lint/correctness/useExhaustiveDependencies: sendMessageAndCollect is stable
1002
+ [fetchMetricsWithRetry, sendMessageAndCollect],
816
1003
  );
817
1004
 
818
1005
  // Send staged message to a specific arm
@@ -909,11 +1096,11 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
909
1096
  const controlMsgs = controlState.messages;
910
1097
  const variantMsgs = variantState.messages;
911
1098
  const lastControlResponse =
912
- controlMsgs[controlMsgs.length - 1]?.role === "assistant"
1099
+ controlMsgs[controlMsgs.length - 1]?.type === "assistant"
913
1100
  ? controlMsgs[controlMsgs.length - 1]?.content
914
1101
  : "";
915
1102
  const lastVariantResponse =
916
- variantMsgs[variantMsgs.length - 1]?.role === "assistant"
1103
+ variantMsgs[variantMsgs.length - 1]?.type === "assistant"
917
1104
  ? variantMsgs[variantMsgs.length - 1]?.content
918
1105
  : "";
919
1106
 
@@ -1089,17 +1276,24 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
1089
1276
  setError(err instanceof Error ? err.message : "Failed to run comparison");
1090
1277
  setIsRunning(false);
1091
1278
  }
1279
+ // biome-ignore lint/correctness/useExhaustiveDependencies: stable refs
1092
1280
  }, [run, config, userMessages, initialAutoRun, runId, createSession]);
1093
1281
 
1094
1282
  // Function to fetch existing or trigger new session analysis
1095
1283
  const triggerAnalysis = useCallback(
1096
- async (sessionId: string, type: "control" | "variant") => {
1284
+ async (sessionId: string, type: "original" | "control" | "variant") => {
1097
1285
  const setLoading =
1098
- type === "control"
1099
- ? setControlAnalysisLoading
1100
- : setVariantAnalysisLoading;
1286
+ type === "original"
1287
+ ? setOriginalAnalysisLoading
1288
+ : type === "control"
1289
+ ? setControlAnalysisLoading
1290
+ : setVariantAnalysisLoading;
1101
1291
  const setAnalysis =
1102
- type === "control" ? setControlAnalysis : setVariantAnalysis;
1292
+ type === "original"
1293
+ ? setOriginalAnalysis
1294
+ : type === "control"
1295
+ ? setControlAnalysis
1296
+ : setVariantAnalysis;
1103
1297
 
1104
1298
  setLoading(true);
1105
1299
  try {
@@ -1228,14 +1422,58 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
1228
1422
  </div>
1229
1423
  )}
1230
1424
  </div>
1231
- {!hasRun && (
1232
- <Button
1233
- onClick={runComparison}
1234
- disabled={isRunning || userMessages.length === 0}
1235
- >
1236
- {isRunning ? "Running..." : "Start Comparison"}
1237
- </Button>
1238
- )}
1425
+ <div className="flex items-center gap-2">
1426
+ {/* Comparison Analysis button - shown when comparison is complete */}
1427
+ {hasRun &&
1428
+ queueState.status === "completed" &&
1429
+ (hasComparisonAnalysis ? (
1430
+ <>
1431
+ <Button
1432
+ variant="outline"
1433
+ size="sm"
1434
+ onClick={showComparisonAnalysis}
1435
+ disabled={comparisonAnalysisLoading}
1436
+ >
1437
+ {comparisonAnalysisLoading ? (
1438
+ <Loader2 className="w-4 h-4 mr-2 animate-spin" />
1439
+ ) : (
1440
+ <BarChart3 className="w-4 h-4 mr-2" />
1441
+ )}
1442
+ Show Analysis
1443
+ </Button>
1444
+ <Button
1445
+ variant="ghost"
1446
+ size="sm"
1447
+ onClick={runComparisonAnalysis}
1448
+ disabled={comparisonAnalysisLoading}
1449
+ >
1450
+ Re-analyze
1451
+ </Button>
1452
+ </>
1453
+ ) : (
1454
+ <Button
1455
+ variant="outline"
1456
+ size="sm"
1457
+ onClick={runComparisonAnalysis}
1458
+ disabled={comparisonAnalysisLoading}
1459
+ >
1460
+ {comparisonAnalysisLoading ? (
1461
+ <Loader2 className="w-4 h-4 mr-2 animate-spin" />
1462
+ ) : (
1463
+ <BarChart3 className="w-4 h-4 mr-2" />
1464
+ )}
1465
+ Analyze Comparison
1466
+ </Button>
1467
+ ))}
1468
+ {!hasRun && (
1469
+ <Button
1470
+ onClick={runComparison}
1471
+ disabled={isRunning || userMessages.length === 0}
1472
+ >
1473
+ {isRunning ? "Running..." : "Start Comparison"}
1474
+ </Button>
1475
+ )}
1476
+ </div>
1239
1477
  </div>
1240
1478
 
1241
1479
  {/* Queue Banner - shown when there's a staged message waiting */}
@@ -1317,7 +1555,10 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
1317
1555
  </div>
1318
1556
  <div className="space-y-2">
1319
1557
  {userMessages.map((msg, idx) => (
1320
- <details key={idx} className="group">
1558
+ <details
1559
+ key={`user-msg-${msg.slice(0, 50)}-${idx}`}
1560
+ className="group"
1561
+ >
1321
1562
  <summary className="text-sm cursor-pointer flex items-center gap-2 hover:text-foreground">
1322
1563
  <span className="text-xs font-mono text-muted-foreground w-5">
1323
1564
  {idx + 1}.
@@ -1358,8 +1599,17 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
1358
1599
  </div>
1359
1600
  )}
1360
1601
 
1361
- {/* Control vs Variant labels */}
1362
- <div className="grid grid-cols-2 gap-4 text-sm">
1602
+ {/* Original vs Control vs Variant labels */}
1603
+ <div className="grid grid-cols-3 gap-4 text-sm">
1604
+ <div className="space-y-1">
1605
+ <div className="flex items-center gap-2">
1606
+ <span className="w-2 h-2 rounded-full bg-yellow-500" />
1607
+ <span className="font-medium">Original</span>
1608
+ </div>
1609
+ <div className="text-muted-foreground text-xs">
1610
+ Source session
1611
+ </div>
1612
+ </div>
1363
1613
  <div className="space-y-1">
1364
1614
  <div className="flex items-center gap-2">
1365
1615
  <span className="w-2 h-2 rounded-full bg-blue-500" />
@@ -1384,16 +1634,139 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
1384
1634
  </div>
1385
1635
  )}
1386
1636
 
1387
- {/* Side-by-side comparison */}
1637
+ {/* Side-by-side comparison - 3 panes: Original, Control, Variant */}
1388
1638
  {hasRun && (
1389
- <div className="grid grid-cols-2 gap-4 flex-1 min-h-0">
1639
+ <div className="grid grid-cols-3 gap-4 flex-1 min-h-0">
1640
+ {/* Original (Source Session - Read Only) */}
1641
+ <Card className="flex flex-col h-full min-h-0 overflow-hidden">
1642
+ <CardHeader className="py-3 border-b shrink-0">
1643
+ <CardTitle className="text-sm flex items-center gap-2">
1644
+ <span className="w-2 h-2 rounded-full bg-yellow-500" />
1645
+ Original Session
1646
+ </CardTitle>
1647
+ <CardDescription className="text-xs">
1648
+ Source session (read-only)
1649
+ </CardDescription>
1650
+ </CardHeader>
1651
+ <CardContent className="flex-1 overflow-auto py-4">
1652
+ {originalMessages.map((msg, i) => (
1653
+ <div
1654
+ key={`original-${msg.type}-${i}`}
1655
+ className={`mb-4 ${
1656
+ msg.type === "user"
1657
+ ? "text-yellow-600 dark:text-yellow-400"
1658
+ : msg.type === "tool_call"
1659
+ ? ""
1660
+ : ""
1661
+ }`}
1662
+ >
1663
+ {msg.type === "tool_call" ? (
1664
+ <details className="rounded bg-muted/50 border text-xs group">
1665
+ <summary className="flex items-center gap-2 py-1.5 px-2 cursor-pointer list-none">
1666
+ <span className="text-muted-foreground">🔧</span>
1667
+ <span className="font-medium flex-1">
1668
+ {msg.toolName || msg.content}
1669
+ </span>
1670
+ <ChevronDown className="w-3 h-3 text-muted-foreground group-open:rotate-180 transition-transform" />
1671
+ </summary>
1672
+ <div className="px-2 pb-2 space-y-2 border-t mt-1 pt-2">
1673
+ {msg.toolInput !== null &&
1674
+ msg.toolInput !== undefined && (
1675
+ <div>
1676
+ <div className="text-[10px] font-semibold text-muted-foreground mb-1">
1677
+ Args
1678
+ </div>
1679
+ <pre className="text-[11px] bg-background/50 rounded p-1.5 overflow-x-auto max-h-32 whitespace-pre-wrap break-words">
1680
+ {typeof msg.toolInput === "string"
1681
+ ? msg.toolInput
1682
+ : JSON.stringify(msg.toolInput, null, 2)}
1683
+ </pre>
1684
+ </div>
1685
+ )}
1686
+ {msg.toolOutput !== null &&
1687
+ msg.toolOutput !== undefined && (
1688
+ <div>
1689
+ <div className="text-[10px] font-semibold text-muted-foreground mb-1">
1690
+ Result
1691
+ </div>
1692
+ <pre className="text-[11px] bg-background/50 rounded p-1.5 overflow-x-auto max-h-32 whitespace-pre-wrap break-words">
1693
+ {typeof msg.toolOutput === "string"
1694
+ ? msg.toolOutput
1695
+ : JSON.stringify(msg.toolOutput, null, 2)}
1696
+ </pre>
1697
+ </div>
1698
+ )}
1699
+ {(msg.toolInput === null ||
1700
+ msg.toolInput === undefined) &&
1701
+ (msg.toolOutput === null ||
1702
+ msg.toolOutput === undefined) && (
1703
+ <div className="text-muted-foreground text-[11px]">
1704
+ No input/output data available
1705
+ </div>
1706
+ )}
1707
+ </div>
1708
+ </details>
1709
+ ) : (
1710
+ <>
1711
+ <div className="text-xs font-medium uppercase mb-1">
1712
+ {msg.type === "user" ? "USER" : "ASSISTANT"}
1713
+ </div>
1714
+ <div className="text-sm whitespace-pre-wrap">
1715
+ {msg.content}
1716
+ </div>
1717
+ </>
1718
+ )}
1719
+ </div>
1720
+ ))}
1721
+ {originalMessages.length === 0 && (
1722
+ <div className="text-sm text-muted-foreground">
1723
+ No messages in source session
1724
+ </div>
1725
+ )}
1726
+ </CardContent>
1727
+ {/* Session Analysis & Tool Calls for Original */}
1728
+ {originalMetrics && (
1729
+ <div className="border-t p-3 shrink-0 bg-muted/50 space-y-3">
1730
+ {/* Session Analysis */}
1731
+ <SessionAnalysisPanel
1732
+ analysis={originalAnalysis}
1733
+ isLoading={originalAnalysisLoading}
1734
+ isExpanded={analysisExpanded.original}
1735
+ onToggle={() =>
1736
+ setAnalysisExpanded((prev) => ({
1737
+ ...prev,
1738
+ original: !prev.original,
1739
+ }))
1740
+ }
1741
+ onRunAnalysis={() =>
1742
+ run?.sourceSessionId &&
1743
+ triggerAnalysis(run.sourceSessionId, "original")
1744
+ }
1745
+ accentColor="yellow"
1746
+ />
1747
+ {/* Tool Calls */}
1748
+ <ToolCallsPanel
1749
+ toolCalls={originalMetrics.toolCalls}
1750
+ isExpanded={toolCallsExpanded.original}
1751
+ onToggle={() =>
1752
+ setToolCallsExpanded((prev) => ({
1753
+ ...prev,
1754
+ original: !prev.original,
1755
+ }))
1756
+ }
1757
+ accentColor="yellow"
1758
+ />
1759
+ </div>
1760
+ )}
1761
+ </Card>
1762
+
1390
1763
  {/* Control */}
1391
1764
  <Card className="flex flex-col h-full min-h-0 overflow-hidden">
1392
1765
  <CardHeader className="py-3 border-b shrink-0">
1393
1766
  <div className="flex items-center justify-between">
1394
1767
  <CardTitle className="text-sm flex items-center gap-2">
1395
1768
  <span className="w-2 h-2 rounded-full bg-blue-500" />
1396
- Control (Original)
1769
+ Control (Rerun)
1397
1770
  {controlState.isStreaming && (
1398
1771
  <Loader2 className="w-3 h-3 animate-spin text-blue-500" />
1399
1772
  )}
@@ -1427,20 +1800,76 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
1427
1800
  <CardContent className="flex-1 overflow-auto py-4">
1428
1801
  {controlState.messages.map((msg, i) => (
1429
1802
  <div
1430
- key={i}
1431
- className={`mb-4 ${msg.role === "user" ? "text-blue-600 dark:text-blue-400" : ""}`}
1803
+ key={`control-${msg.type}-${i}`}
1804
+ className={`mb-4 ${
1805
+ msg.type === "user"
1806
+ ? "text-blue-600 dark:text-blue-400"
1807
+ : msg.type === "tool_call"
1808
+ ? ""
1809
+ : ""
1810
+ }`}
1432
1811
  >
1433
- <div className="text-xs font-medium uppercase mb-1">
1434
- {msg.role}
1435
- </div>
1436
- <div className="text-sm whitespace-pre-wrap">
1437
- {msg.content}
1438
- {controlState.isStreaming &&
1439
- msg.role === "assistant" &&
1440
- i === controlState.messages.length - 1 && (
1441
- <span className="animate-pulse">▊</span>
1442
- )}
1443
- </div>
1812
+ {msg.type === "tool_call" ? (
1813
+ <details className="rounded bg-muted/50 border text-xs group">
1814
+ <summary className="flex items-center gap-2 py-1.5 px-2 cursor-pointer list-none">
1815
+ <span className="text-muted-foreground">🔧</span>
1816
+ <span className="font-medium flex-1">
1817
+ {msg.toolName || msg.content}
1818
+ </span>
1819
+ <ChevronDown className="w-3 h-3 text-muted-foreground group-open:rotate-180 transition-transform" />
1820
+ </summary>
1821
+ <div className="px-2 pb-2 space-y-2 border-t mt-1 pt-2">
1822
+ {msg.toolInput !== null &&
1823
+ msg.toolInput !== undefined && (
1824
+ <div>
1825
+ <div className="text-[10px] font-semibold text-muted-foreground mb-1">
1826
+ Args
1827
+ </div>
1828
+ <pre className="text-[11px] bg-background/50 rounded p-1.5 overflow-x-auto max-h-32 whitespace-pre-wrap break-words">
1829
+ {typeof msg.toolInput === "string"
1830
+ ? msg.toolInput
1831
+ : JSON.stringify(msg.toolInput, null, 2)}
1832
+ </pre>
1833
+ </div>
1834
+ )}
1835
+ {msg.toolOutput !== null &&
1836
+ msg.toolOutput !== undefined && (
1837
+ <div>
1838
+ <div className="text-[10px] font-semibold text-muted-foreground mb-1">
1839
+ Result
1840
+ </div>
1841
+ <pre className="text-[11px] bg-background/50 rounded p-1.5 overflow-x-auto max-h-32 whitespace-pre-wrap break-words">
1842
+ {typeof msg.toolOutput === "string"
1843
+ ? msg.toolOutput
1844
+ : JSON.stringify(msg.toolOutput, null, 2)}
1845
+ </pre>
1846
+ </div>
1847
+ )}
1848
+ {(msg.toolInput === null ||
1849
+ msg.toolInput === undefined) &&
1850
+ (msg.toolOutput === null ||
1851
+ msg.toolOutput === undefined) && (
1852
+ <div className="text-muted-foreground text-[11px]">
1853
+ No input/output data available
1854
+ </div>
1855
+ )}
1856
+ </div>
1857
+ </details>
1858
+ ) : (
1859
+ <>
1860
+ <div className="text-xs font-medium uppercase mb-1">
1861
+ {msg.type === "user" ? "USER" : "ASSISTANT"}
1862
+ </div>
1863
+ <div className="text-sm whitespace-pre-wrap">
1864
+ {msg.content}
1865
+ {controlState.isStreaming &&
1866
+ msg.type === "assistant" &&
1867
+ i === controlState.messages.length - 1 && (
1868
+ <span className="animate-pulse">▊</span>
1869
+ )}
1870
+ </div>
1871
+ </>
1872
+ )}
1444
1873
  </div>
1445
1874
  ))}
1446
1875
  {controlState.error && (
@@ -1540,20 +1969,76 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
1540
1969
  <CardContent className="flex-1 overflow-auto py-4">
1541
1970
  {variantState.messages.map((msg, i) => (
1542
1971
  <div
1543
- key={i}
1544
- className={`mb-4 ${msg.role === "user" ? "text-orange-600 dark:text-orange-400" : ""}`}
1972
+ key={`variant-${msg.type}-${i}`}
1973
+ className={`mb-4 ${
1974
+ msg.type === "user"
1975
+ ? "text-orange-600 dark:text-orange-400"
1976
+ : msg.type === "tool_call"
1977
+ ? ""
1978
+ : ""
1979
+ }`}
1545
1980
  >
1546
- <div className="text-xs font-medium uppercase mb-1">
1547
- {msg.role}
1548
- </div>
1549
- <div className="text-sm whitespace-pre-wrap">
1550
- {msg.content}
1551
- {variantState.isStreaming &&
1552
- msg.role === "assistant" &&
1553
- i === variantState.messages.length - 1 && (
1554
- <span className="animate-pulse">▊</span>
1555
- )}
1556
- </div>
1981
+ {msg.type === "tool_call" ? (
1982
+ <details className="rounded bg-muted/50 border text-xs group">
1983
+ <summary className="flex items-center gap-2 py-1.5 px-2 cursor-pointer list-none">
1984
+ <span className="text-muted-foreground">🔧</span>
1985
+ <span className="font-medium flex-1">
1986
+ {msg.toolName || msg.content}
1987
+ </span>
1988
+ <ChevronDown className="w-3 h-3 text-muted-foreground group-open:rotate-180 transition-transform" />
1989
+ </summary>
1990
+ <div className="px-2 pb-2 space-y-2 border-t mt-1 pt-2">
1991
+ {msg.toolInput !== null &&
1992
+ msg.toolInput !== undefined && (
1993
+ <div>
1994
+ <div className="text-[10px] font-semibold text-muted-foreground mb-1">
1995
+ Args
1996
+ </div>
1997
+ <pre className="text-[11px] bg-background/50 rounded p-1.5 overflow-x-auto max-h-32 whitespace-pre-wrap break-words">
1998
+ {typeof msg.toolInput === "string"
1999
+ ? msg.toolInput
2000
+ : JSON.stringify(msg.toolInput, null, 2)}
2001
+ </pre>
2002
+ </div>
2003
+ )}
2004
+ {msg.toolOutput !== null &&
2005
+ msg.toolOutput !== undefined && (
2006
+ <div>
2007
+ <div className="text-[10px] font-semibold text-muted-foreground mb-1">
2008
+ Result
2009
+ </div>
2010
+ <pre className="text-[11px] bg-background/50 rounded p-1.5 overflow-x-auto max-h-32 whitespace-pre-wrap break-words">
2011
+ {typeof msg.toolOutput === "string"
2012
+ ? msg.toolOutput
2013
+ : JSON.stringify(msg.toolOutput, null, 2)}
2014
+ </pre>
2015
+ </div>
2016
+ )}
2017
+ {(msg.toolInput === null ||
2018
+ msg.toolInput === undefined) &&
2019
+ (msg.toolOutput === null ||
2020
+ msg.toolOutput === undefined) && (
2021
+ <div className="text-muted-foreground text-[11px]">
2022
+ No input/output data available
2023
+ </div>
2024
+ )}
2025
+ </div>
2026
+ </details>
2027
+ ) : (
2028
+ <>
2029
+ <div className="text-xs font-medium uppercase mb-1">
2030
+ {msg.type === "user" ? "USER" : "ASSISTANT"}
2031
+ </div>
2032
+ <div className="text-sm whitespace-pre-wrap">
2033
+ {msg.content}
2034
+ {variantState.isStreaming &&
2035
+ msg.type === "assistant" &&
2036
+ i === variantState.messages.length - 1 && (
2037
+ <span className="animate-pulse">▊</span>
2038
+ )}
2039
+ </div>
2040
+ </>
2041
+ )}
1557
2042
  </div>
1558
2043
  ))}
1559
2044
  {variantState.error && (
@@ -1615,6 +2100,15 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
1615
2100
  </div>
1616
2101
  )}
1617
2102
  </div>
2103
+
2104
+ {/* Comparison Analysis Dialog */}
2105
+ {comparisonAnalysis && (
2106
+ <ComparisonAnalysisDialog
2107
+ open={comparisonAnalysisDialogOpen}
2108
+ onClose={() => setComparisonAnalysisDialogOpen(false)}
2109
+ analysis={comparisonAnalysis}
2110
+ />
2111
+ )}
1618
2112
  </DebuggerLayout>
1619
2113
  );
1620
2114
  }