@townco/debugger 0.1.32 → 0.1.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,5 @@
1
1
  import {
2
+ BarChart3,
2
3
  ChevronDown,
3
4
  ChevronUp,
4
5
  Loader2,
@@ -16,7 +17,9 @@ import {
16
17
  CardTitle,
17
18
  } from "@/components/ui/card";
18
19
  import { Checkbox } from "@/components/ui/checkbox";
20
+ import type { SessionComparisonAnalysis } from "../analysis/comparison-types";
19
21
  import type { SessionAnalysis } from "../analysis/types";
22
+ import { ComparisonAnalysisDialog } from "../components/ComparisonAnalysisDialog";
20
23
  import { DebuggerLayout } from "../components/DebuggerLayout";
21
24
  import { formatCost, formatDuration, formatTokens } from "../lib/metrics";
22
25
  import type { ComparisonConfig, ComparisonRun, SessionMetrics } from "../types";
@@ -25,14 +28,18 @@ interface ComparisonViewProps {
25
28
  runId: string;
26
29
  }
27
30
 
28
- interface ChatMessage {
29
- role: "user" | "assistant";
31
+ // Conversation item that can be user message, assistant message, or tool call
32
+ interface ConversationItem {
33
+ type: "user" | "assistant" | "tool_call";
30
34
  content: string;
35
+ toolName?: string | undefined;
36
+ toolInput?: unknown;
37
+ toolOutput?: unknown;
31
38
  }
32
39
 
33
40
  interface SessionState {
34
41
  sessionId: string | null;
35
- messages: ChatMessage[];
42
+ messages: ConversationItem[];
36
43
  isStreaming: boolean;
37
44
  isSending: boolean; // true while sending is in progress (before streaming starts)
38
45
  metrics: SessionMetrics | null;
@@ -66,17 +73,21 @@ function SessionAnalysisPanel({
66
73
  isExpanded: boolean;
67
74
  onToggle: () => void;
68
75
  onRunAnalysis: () => void;
69
- accentColor: "blue" | "orange";
76
+ accentColor: "yellow" | "blue" | "orange";
70
77
  }) {
71
78
  const colorClasses =
72
- accentColor === "blue"
73
- ? "border-blue-200 dark:border-blue-800 bg-blue-50/50 dark:bg-blue-950/30"
74
- : "border-orange-200 dark:border-orange-800 bg-orange-50/50 dark:bg-orange-950/30";
79
+ accentColor === "yellow"
80
+ ? "border-yellow-200 dark:border-yellow-800 bg-yellow-50/50 dark:bg-yellow-950/30"
81
+ : accentColor === "blue"
82
+ ? "border-blue-200 dark:border-blue-800 bg-blue-50/50 dark:bg-blue-950/30"
83
+ : "border-orange-200 dark:border-orange-800 bg-orange-50/50 dark:bg-orange-950/30";
75
84
 
76
85
  const headerColorClasses =
77
- accentColor === "blue"
78
- ? "hover:bg-blue-100/50 dark:hover:bg-blue-900/30"
79
- : "hover:bg-orange-100/50 dark:hover:bg-orange-900/30";
86
+ accentColor === "yellow"
87
+ ? "hover:bg-yellow-100/50 dark:hover:bg-yellow-900/30"
88
+ : accentColor === "blue"
89
+ ? "hover:bg-blue-100/50 dark:hover:bg-blue-900/30"
90
+ : "hover:bg-orange-100/50 dark:hover:bg-orange-900/30";
80
91
 
81
92
  if (isLoading) {
82
93
  return (
@@ -239,17 +250,21 @@ function ToolCallsPanel({
239
250
  toolCalls: SessionMetrics["toolCalls"];
240
251
  isExpanded: boolean;
241
252
  onToggle: () => void;
242
- accentColor: "blue" | "orange";
253
+ accentColor: "yellow" | "blue" | "orange";
243
254
  }) {
244
255
  const colorClasses =
245
- accentColor === "blue"
246
- ? "border-blue-200 dark:border-blue-800 bg-blue-50/50 dark:bg-blue-950/30"
247
- : "border-orange-200 dark:border-orange-800 bg-orange-50/50 dark:bg-orange-950/30";
256
+ accentColor === "yellow"
257
+ ? "border-yellow-200 dark:border-yellow-800 bg-yellow-50/50 dark:bg-yellow-950/30"
258
+ : accentColor === "blue"
259
+ ? "border-blue-200 dark:border-blue-800 bg-blue-50/50 dark:bg-blue-950/30"
260
+ : "border-orange-200 dark:border-orange-800 bg-orange-50/50 dark:bg-orange-950/30";
248
261
 
249
262
  const headerColorClasses =
250
- accentColor === "blue"
251
- ? "hover:bg-blue-100/50 dark:hover:bg-blue-900/30"
252
- : "hover:bg-orange-100/50 dark:hover:bg-orange-900/30";
263
+ accentColor === "yellow"
264
+ ? "hover:bg-yellow-100/50 dark:hover:bg-yellow-900/30"
265
+ : accentColor === "blue"
266
+ ? "hover:bg-blue-100/50 dark:hover:bg-blue-900/30"
267
+ : "hover:bg-orange-100/50 dark:hover:bg-orange-900/30";
253
268
 
254
269
  const toolCallCount = toolCalls?.length ?? 0;
255
270
 
@@ -389,28 +404,114 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
389
404
  const [isRunning, setIsRunning] = useState(false);
390
405
  const [hasRun, setHasRun] = useState(false);
391
406
 
407
+ // Original source session state (read-only, for reference)
408
+ const [originalMessages, setOriginalMessages] = useState<ConversationItem[]>(
409
+ [],
410
+ );
411
+ const [originalMetrics, setOriginalMetrics] = useState<SessionMetrics | null>(
412
+ null,
413
+ );
414
+
392
415
  // Session analysis state
416
+ const [originalAnalysis, setOriginalAnalysis] =
417
+ useState<SessionAnalysis | null>(null);
393
418
  const [controlAnalysis, setControlAnalysis] =
394
419
  useState<SessionAnalysis | null>(null);
395
420
  const [variantAnalysis, setVariantAnalysis] =
396
421
  useState<SessionAnalysis | null>(null);
422
+ const [originalAnalysisLoading, setOriginalAnalysisLoading] = useState(false);
397
423
  const [controlAnalysisLoading, setControlAnalysisLoading] = useState(false);
398
424
  const [variantAnalysisLoading, setVariantAnalysisLoading] = useState(false);
399
425
  const [analysisExpanded, setAnalysisExpanded] = useState<{
426
+ original: boolean;
400
427
  control: boolean;
401
428
  variant: boolean;
402
429
  }>({
430
+ original: false,
403
431
  control: false,
404
432
  variant: false,
405
433
  });
406
434
  const [toolCallsExpanded, setToolCallsExpanded] = useState<{
435
+ original: boolean;
407
436
  control: boolean;
408
437
  variant: boolean;
409
438
  }>({
439
+ original: false,
410
440
  control: false,
411
441
  variant: false,
412
442
  });
413
443
 
444
+ // Comparison analysis state
445
+ const [comparisonAnalysis, setComparisonAnalysis] =
446
+ useState<SessionComparisonAnalysis | null>(null);
447
+ const [comparisonAnalysisLoading, setComparisonAnalysisLoading] =
448
+ useState(false);
449
+ const [comparisonAnalysisDialogOpen, setComparisonAnalysisDialogOpen] =
450
+ useState(false);
451
+ const [hasComparisonAnalysis, setHasComparisonAnalysis] = useState(false);
452
+
453
+ // Check if comparison analysis exists
454
+ useEffect(() => {
455
+ if (runId) {
456
+ fetch(`/api/comparison-analysis/${runId}/exists`)
457
+ .then((res) => res.json())
458
+ .then((data) => {
459
+ setHasComparisonAnalysis(data.exists);
460
+ })
461
+ .catch(() => {
462
+ setHasComparisonAnalysis(false);
463
+ });
464
+ }
465
+ }, [runId]);
466
+
467
+ // Function to run comparison analysis
468
+ const runComparisonAnalysis = async () => {
469
+ setComparisonAnalysisLoading(true);
470
+ try {
471
+ const res = await fetch(`/api/analyze-comparison/${runId}`, {
472
+ method: "POST",
473
+ });
474
+ if (!res.ok) {
475
+ const error = await res.json();
476
+ throw new Error(error.error || "Analysis failed");
477
+ }
478
+ const analysis = await res.json();
479
+ setComparisonAnalysis(analysis);
480
+ setHasComparisonAnalysis(true);
481
+ setComparisonAnalysisDialogOpen(true);
482
+ } catch (error) {
483
+ console.error("Comparison analysis error:", error);
484
+ alert(
485
+ `Analysis failed: ${error instanceof Error ? error.message : "Unknown error"}`,
486
+ );
487
+ } finally {
488
+ setComparisonAnalysisLoading(false);
489
+ }
490
+ };
491
+
492
+ // Function to show existing comparison analysis
493
+ const showComparisonAnalysis = async () => {
494
+ if (comparisonAnalysis) {
495
+ setComparisonAnalysisDialogOpen(true);
496
+ return;
497
+ }
498
+
499
+ setComparisonAnalysisLoading(true);
500
+ try {
501
+ const res = await fetch(`/api/comparison-analysis/${runId}`);
502
+ if (!res.ok) {
503
+ throw new Error("Analysis not found");
504
+ }
505
+ const analysis = await res.json();
506
+ setComparisonAnalysis(analysis);
507
+ setComparisonAnalysisDialogOpen(true);
508
+ } catch (error) {
509
+ console.error("Error fetching comparison analysis:", error);
510
+ } finally {
511
+ setComparisonAnalysisLoading(false);
512
+ }
513
+ };
514
+
414
515
  // Fetch comparison run details, conversation, and restore saved messages
415
516
  useEffect(() => {
416
517
  let runData: ComparisonRun;
@@ -427,11 +528,41 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
427
528
  );
428
529
  const conversation = await conversationRes.json();
429
530
 
430
- // Extract user messages in order
531
+ // Extract user messages in order AND build original conversation with tool calls
431
532
  const messages: string[] = [];
533
+ const origMessages: ConversationItem[] = [];
432
534
  for (const trace of conversation) {
433
535
  if (trace.userInput) {
434
536
  messages.push(trace.userInput);
537
+ origMessages.push({
538
+ type: "user" as const,
539
+ content: trace.userInput,
540
+ });
541
+ }
542
+ // Use agentMessages which includes both tool_calls and chat messages in order
543
+ if (trace.agentMessages && Array.isArray(trace.agentMessages)) {
544
+ for (const msg of trace.agentMessages) {
545
+ if (msg.type === "tool_call") {
546
+ origMessages.push({
547
+ type: "tool_call" as const,
548
+ content: msg.toolName || msg.content,
549
+ toolName: msg.toolName,
550
+ toolInput: msg.toolInput,
551
+ toolOutput: msg.toolOutput,
552
+ });
553
+ } else if (msg.type === "chat" && msg.content?.trim()) {
554
+ origMessages.push({
555
+ type: "assistant" as const,
556
+ content: msg.content,
557
+ });
558
+ }
559
+ }
560
+ } else if (trace.llmOutput) {
561
+ // Fallback if no agentMessages
562
+ origMessages.push({
563
+ type: "assistant" as const,
564
+ content: trace.llmOutput,
565
+ });
435
566
  }
436
567
  }
437
568
 
@@ -441,6 +572,22 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
441
572
  }
442
573
 
443
574
  setUserMessages(messages);
575
+ setOriginalMessages(origMessages);
576
+
577
+ // Fetch metrics for the original source session
578
+ if (runData.sourceSessionId) {
579
+ try {
580
+ const metricsRes = await fetch(
581
+ `/api/session-metrics/${runData.sourceSessionId}?model=${encodeURIComponent(config?.controlModel || "claude-sonnet-4-5-20250929")}`,
582
+ );
583
+ if (metricsRes.ok) {
584
+ const metrics = await metricsRes.json();
585
+ setOriginalMetrics(metrics);
586
+ }
587
+ } catch (err) {
588
+ console.error("Failed to fetch original session metrics:", err);
589
+ }
590
+ }
444
591
 
445
592
  // Restore saved messages if the run has been completed or running
446
593
  if (runData.status === "completed" || runData.status === "running") {
@@ -460,31 +607,65 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
460
607
  : Promise.resolve([]),
461
608
  ]);
462
609
 
463
- // Convert traces to chat messages
464
- const tracesToChatMessages = (
465
- traces: Array<{ userInput?: string; llmOutput?: string }>,
466
- ): ChatMessage[] => {
467
- const chatMessages: ChatMessage[] = [];
610
+ // Convert traces to conversation items (including tool calls)
611
+ const tracesToConversationItems = (
612
+ traces: Array<{
613
+ userInput?: string;
614
+ llmOutput?: string;
615
+ agentMessages?: Array<{
616
+ type: string;
617
+ content?: string;
618
+ toolName?: string;
619
+ toolInput?: unknown;
620
+ toolOutput?: unknown;
621
+ }>;
622
+ }>,
623
+ ): ConversationItem[] => {
624
+ const items: ConversationItem[] = [];
468
625
  for (const trace of traces) {
469
626
  if (trace.userInput) {
470
- chatMessages.push({
471
- role: "user" as const,
627
+ items.push({
628
+ type: "user" as const,
472
629
  content: trace.userInput,
473
630
  });
474
631
  }
475
- if (trace.llmOutput) {
476
- chatMessages.push({
477
- role: "assistant" as const,
632
+ // Use agentMessages which includes both tool_calls and chat messages in order
633
+ if (trace.agentMessages && Array.isArray(trace.agentMessages)) {
634
+ for (const msg of trace.agentMessages) {
635
+ if (msg.type === "tool_call") {
636
+ items.push({
637
+ type: "tool_call" as const,
638
+ content: msg.toolName || msg.content || "",
639
+ toolName: msg.toolName,
640
+ toolInput: msg.toolInput,
641
+ toolOutput: msg.toolOutput,
642
+ });
643
+ } else if (msg.type === "chat" && msg.content?.trim()) {
644
+ items.push({
645
+ type: "assistant" as const,
646
+ content: msg.content,
647
+ });
648
+ }
649
+ }
650
+ } else if (trace.llmOutput) {
651
+ // Fallback if no agentMessages
652
+ items.push({
653
+ type: "assistant" as const,
478
654
  content: trace.llmOutput,
479
655
  });
480
656
  }
481
657
  }
482
- return chatMessages;
658
+ return items;
483
659
  };
484
660
 
485
661
  // Restore control messages
486
662
  if (runData.controlSessionId) {
487
- const controlMessages = tracesToChatMessages(controlConversation);
663
+ const controlMessages =
664
+ tracesToConversationItems(controlConversation);
665
+ // Count user messages for turnIndex
666
+ const controlUserCount = controlMessages.filter(
667
+ (m) => m.type === "user",
668
+ ).length;
488
669
  setControlState((prev) => ({
489
670
  ...prev,
490
671
  sessionId: runData.controlSessionId,
@@ -492,13 +673,18 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
492
673
  isStreaming: false,
493
674
  metrics: runData.controlMetrics,
494
675
  error: null,
495
- turnIndex: Math.floor(controlMessages.length / 2) - 1,
676
+ turnIndex: controlUserCount - 1,
496
677
  }));
497
678
  }
498
679
 
499
680
  // Restore variant messages
500
681
  if (runData.variantSessionId) {
501
- const variantMessages = tracesToChatMessages(variantConversation);
682
+ const variantMessages =
683
+ tracesToConversationItems(variantConversation);
684
+ // Count user messages for turnIndex
685
+ const variantUserCount = variantMessages.filter(
686
+ (m) => m.type === "user",
687
+ ).length;
502
688
  setVariantState((prev) => ({
503
689
  ...prev,
504
690
  sessionId: runData.variantSessionId,
@@ -506,14 +692,16 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
506
692
  isStreaming: false,
507
693
  metrics: runData.variantMetrics,
508
694
  error: null,
509
- turnIndex: Math.floor(variantMessages.length / 2) - 1,
695
+ turnIndex: variantUserCount - 1,
510
696
  }));
511
697
  }
512
698
 
513
699
  // Set queue state based on completed messages
700
+ const controlItems = tracesToConversationItems(controlConversation);
701
+ const variantItems = tracesToConversationItems(variantConversation);
514
702
  const completedTurns = Math.min(
515
- Math.floor(tracesToChatMessages(controlConversation).length / 2),
516
- Math.floor(tracesToChatMessages(variantConversation).length / 2),
703
+ controlItems.filter((m) => m.type === "user").length,
704
+ variantItems.filter((m) => m.type === "user").length,
517
705
  );
518
706
  setQueueState({
519
707
  currentIndex: completedTurns - 1,
@@ -535,7 +723,7 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
535
723
  setError(err.message);
536
724
  setLoading(false);
537
725
  });
538
- }, [runId]);
726
+ }, [runId, config?.controlModel]);
539
727
 
540
728
  const generateRequestId = (prefix: string, sessionId?: string) => {
541
729
  const randomPart =
@@ -592,7 +780,7 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
592
780
  let abortController: AbortController | null = new AbortController();
593
781
 
594
782
  // Start SSE connection (don't await - runs in background)
595
- const ssePromise = fetch(`${AGENT_SERVER_URL}/events`, {
783
+ const _ssePromise = fetch(`${AGENT_SERVER_URL}/events`, {
596
784
  headers: {
597
785
  "X-Session-ID": sessionId,
598
786
  },
@@ -709,6 +897,7 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
709
897
  metrics.totalTokens === previousTokens &&
710
898
  metrics.toolCallCount === previousTools
711
899
  ) {
900
+ // biome-ignore lint/style/noNonNullAssertion: lastMetrics is set in the loop
712
901
  return lastMetrics!;
713
902
  }
714
903
 
@@ -754,27 +943,24 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
754
943
  setState((prev) => ({
755
944
  ...prev,
756
945
  isStreaming: true,
757
- messages: [...prev.messages, { role: "user", content: message }],
946
+ messages: [...prev.messages, { type: "user", content: message }],
758
947
  }));
759
948
 
760
- let accumulatedContent = "";
761
-
762
949
  const response = await sendMessageAndCollect(
763
950
  sessionId,
764
951
  message,
765
952
  (content) => {
766
- accumulatedContent = content;
767
953
  setState((prev) => {
768
954
  // Find the last assistant message or add one
769
955
  const messages = [...prev.messages];
770
956
  const lastMsg = messages[messages.length - 1];
771
- if (lastMsg && lastMsg.role === "assistant") {
957
+ if (lastMsg && lastMsg.type === "assistant") {
772
958
  messages[messages.length - 1] = {
773
- role: "assistant",
959
+ type: "assistant",
774
960
  content,
775
961
  };
776
962
  } else {
777
- messages.push({ role: "assistant", content });
963
+ messages.push({ type: "assistant", content });
778
964
  }
779
965
  return { ...prev, messages };
780
966
  });
@@ -812,7 +998,7 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
812
998
  };
813
999
  }
814
1000
  },
815
- [fetchMetricsWithRetry],
1001
+ [fetchMetricsWithRetry, sendMessageAndCollect],
816
1002
  );
817
1003
 
818
1004
  // Send staged message to a specific arm
@@ -909,11 +1095,11 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
909
1095
  const controlMsgs = controlState.messages;
910
1096
  const variantMsgs = variantState.messages;
911
1097
  const lastControlResponse =
912
- controlMsgs[controlMsgs.length - 1]?.role === "assistant"
1098
+ controlMsgs[controlMsgs.length - 1]?.type === "assistant"
913
1099
  ? controlMsgs[controlMsgs.length - 1]?.content
914
1100
  : "";
915
1101
  const lastVariantResponse =
916
- variantMsgs[variantMsgs.length - 1]?.role === "assistant"
1102
+ variantMsgs[variantMsgs.length - 1]?.type === "assistant"
917
1103
  ? variantMsgs[variantMsgs.length - 1]?.content
918
1104
  : "";
919
1105
 
@@ -1093,13 +1279,19 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
1093
1279
 
1094
1280
  // Function to fetch existing or trigger new session analysis
1095
1281
  const triggerAnalysis = useCallback(
1096
- async (sessionId: string, type: "control" | "variant") => {
1282
+ async (sessionId: string, type: "original" | "control" | "variant") => {
1097
1283
  const setLoading =
1098
- type === "control"
1099
- ? setControlAnalysisLoading
1100
- : setVariantAnalysisLoading;
1284
+ type === "original"
1285
+ ? setOriginalAnalysisLoading
1286
+ : type === "control"
1287
+ ? setControlAnalysisLoading
1288
+ : setVariantAnalysisLoading;
1101
1289
  const setAnalysis =
1102
- type === "control" ? setControlAnalysis : setVariantAnalysis;
1290
+ type === "original"
1291
+ ? setOriginalAnalysis
1292
+ : type === "control"
1293
+ ? setControlAnalysis
1294
+ : setVariantAnalysis;
1103
1295
 
1104
1296
  setLoading(true);
1105
1297
  try {
@@ -1228,14 +1420,58 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
1228
1420
  </div>
1229
1421
  )}
1230
1422
  </div>
1231
- {!hasRun && (
1232
- <Button
1233
- onClick={runComparison}
1234
- disabled={isRunning || userMessages.length === 0}
1235
- >
1236
- {isRunning ? "Running..." : "Start Comparison"}
1237
- </Button>
1238
- )}
1423
+ <div className="flex items-center gap-2">
1424
+ {/* Comparison Analysis button - shown when comparison is complete */}
1425
+ {hasRun &&
1426
+ queueState.status === "completed" &&
1427
+ (hasComparisonAnalysis ? (
1428
+ <>
1429
+ <Button
1430
+ variant="outline"
1431
+ size="sm"
1432
+ onClick={showComparisonAnalysis}
1433
+ disabled={comparisonAnalysisLoading}
1434
+ >
1435
+ {comparisonAnalysisLoading ? (
1436
+ <Loader2 className="w-4 h-4 mr-2 animate-spin" />
1437
+ ) : (
1438
+ <BarChart3 className="w-4 h-4 mr-2" />
1439
+ )}
1440
+ Show Analysis
1441
+ </Button>
1442
+ <Button
1443
+ variant="ghost"
1444
+ size="sm"
1445
+ onClick={runComparisonAnalysis}
1446
+ disabled={comparisonAnalysisLoading}
1447
+ >
1448
+ Re-analyze
1449
+ </Button>
1450
+ </>
1451
+ ) : (
1452
+ <Button
1453
+ variant="outline"
1454
+ size="sm"
1455
+ onClick={runComparisonAnalysis}
1456
+ disabled={comparisonAnalysisLoading}
1457
+ >
1458
+ {comparisonAnalysisLoading ? (
1459
+ <Loader2 className="w-4 h-4 mr-2 animate-spin" />
1460
+ ) : (
1461
+ <BarChart3 className="w-4 h-4 mr-2" />
1462
+ )}
1463
+ Analyze Comparison
1464
+ </Button>
1465
+ ))}
1466
+ {!hasRun && (
1467
+ <Button
1468
+ onClick={runComparison}
1469
+ disabled={isRunning || userMessages.length === 0}
1470
+ >
1471
+ {isRunning ? "Running..." : "Start Comparison"}
1472
+ </Button>
1473
+ )}
1474
+ </div>
1239
1475
  </div>
1240
1476
 
1241
1477
  {/* Queue Banner - shown when there's a staged message waiting */}
@@ -1317,7 +1553,10 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
1317
1553
  </div>
1318
1554
  <div className="space-y-2">
1319
1555
  {userMessages.map((msg, idx) => (
1320
- <details key={idx} className="group">
1556
+ <details
1557
+ key={`user-msg-${msg.slice(0, 50)}-${idx}`}
1558
+ className="group"
1559
+ >
1321
1560
  <summary className="text-sm cursor-pointer flex items-center gap-2 hover:text-foreground">
1322
1561
  <span className="text-xs font-mono text-muted-foreground w-5">
1323
1562
  {idx + 1}.
@@ -1358,8 +1597,17 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
1358
1597
  </div>
1359
1598
  )}
1360
1599
 
1361
- {/* Control vs Variant labels */}
1362
- <div className="grid grid-cols-2 gap-4 text-sm">
1600
+ {/* Original vs Control vs Variant labels */}
1601
+ <div className="grid grid-cols-3 gap-4 text-sm">
1602
+ <div className="space-y-1">
1603
+ <div className="flex items-center gap-2">
1604
+ <span className="w-2 h-2 rounded-full bg-yellow-500" />
1605
+ <span className="font-medium">Original</span>
1606
+ </div>
1607
+ <div className="text-muted-foreground text-xs">
1608
+ Source session
1609
+ </div>
1610
+ </div>
1363
1611
  <div className="space-y-1">
1364
1612
  <div className="flex items-center gap-2">
1365
1613
  <span className="w-2 h-2 rounded-full bg-blue-500" />
@@ -1384,16 +1632,139 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
1384
1632
  </div>
1385
1633
  )}
1386
1634
 
1387
- {/* Side-by-side comparison */}
1635
+ {/* Side-by-side comparison - 3 panes: Original, Control, Variant */}
1388
1636
  {hasRun && (
1389
- <div className="grid grid-cols-2 gap-4 flex-1 min-h-0">
1637
+ <div className="grid grid-cols-3 gap-4 flex-1 min-h-0">
1638
+ {/* Original (Source Session - Read Only) */}
1639
+ <Card className="flex flex-col h-full min-h-0 overflow-hidden">
1640
+ <CardHeader className="py-3 border-b shrink-0">
1641
+ <CardTitle className="text-sm flex items-center gap-2">
1642
+ <span className="w-2 h-2 rounded-full bg-yellow-500" />
1643
+ Original Session
1644
+ </CardTitle>
1645
+ <CardDescription className="text-xs">
1646
+ Source session (read-only)
1647
+ </CardDescription>
1648
+ </CardHeader>
1649
+ <CardContent className="flex-1 overflow-auto py-4">
1650
+ {originalMessages.map((msg, i) => (
1651
+ <div
1652
+ key={`original-${msg.type}-${i}`}
1653
+ className={`mb-4 ${
1654
+ msg.type === "user"
1655
+ ? "text-yellow-600 dark:text-yellow-400"
1656
+ : msg.type === "tool_call"
1657
+ ? ""
1658
+ : ""
1659
+ }`}
1660
+ >
1661
+ {msg.type === "tool_call" ? (
1662
+ <details className="rounded bg-muted/50 border text-xs group">
1663
+ <summary className="flex items-center gap-2 py-1.5 px-2 cursor-pointer list-none">
1664
+ <span className="text-muted-foreground">🔧</span>
1665
+ <span className="font-medium flex-1">
1666
+ {msg.toolName || msg.content}
1667
+ </span>
1668
+ <ChevronDown className="w-3 h-3 text-muted-foreground group-open:rotate-180 transition-transform" />
1669
+ </summary>
1670
+ <div className="px-2 pb-2 space-y-2 border-t mt-1 pt-2">
1671
+ {msg.toolInput !== null &&
1672
+ msg.toolInput !== undefined && (
1673
+ <div>
1674
+ <div className="text-[10px] font-semibold text-muted-foreground mb-1">
1675
+ Args
1676
+ </div>
1677
+ <pre className="text-[11px] bg-background/50 rounded p-1.5 overflow-x-auto max-h-32 whitespace-pre-wrap break-words">
1678
+ {typeof msg.toolInput === "string"
1679
+ ? msg.toolInput
1680
+ : JSON.stringify(msg.toolInput, null, 2)}
1681
+ </pre>
1682
+ </div>
1683
+ )}
1684
+ {msg.toolOutput !== null &&
1685
+ msg.toolOutput !== undefined && (
1686
+ <div>
1687
+ <div className="text-[10px] font-semibold text-muted-foreground mb-1">
1688
+ Result
1689
+ </div>
1690
+ <pre className="text-[11px] bg-background/50 rounded p-1.5 overflow-x-auto max-h-32 whitespace-pre-wrap break-words">
1691
+ {typeof msg.toolOutput === "string"
1692
+ ? msg.toolOutput
1693
+ : JSON.stringify(msg.toolOutput, null, 2)}
1694
+ </pre>
1695
+ </div>
1696
+ )}
1697
+ {(msg.toolInput === null ||
1698
+ msg.toolInput === undefined) &&
1699
+ (msg.toolOutput === null ||
1700
+ msg.toolOutput === undefined) && (
1701
+ <div className="text-muted-foreground text-[11px]">
1702
+ No input/output data available
1703
+ </div>
1704
+ )}
1705
+ </div>
1706
+ </details>
1707
+ ) : (
1708
+ <>
1709
+ <div className="text-xs font-medium uppercase mb-1">
1710
+ {msg.type === "user" ? "USER" : "ASSISTANT"}
1711
+ </div>
1712
+ <div className="text-sm whitespace-pre-wrap">
1713
+ {msg.content}
1714
+ </div>
1715
+ </>
1716
+ )}
1717
+ </div>
1718
+ ))}
1719
+ {originalMessages.length === 0 && (
1720
+ <div className="text-sm text-muted-foreground">
1721
+ No messages in source session
1722
+ </div>
1723
+ )}
1724
+ </CardContent>
1725
+ {/* Session Analysis & Tool Calls for Original */}
1726
+ {originalMetrics && (
1727
+ <div className="border-t p-3 shrink-0 bg-muted/50 space-y-3">
1728
+ {/* Session Analysis */}
1729
+ <SessionAnalysisPanel
1730
+ analysis={originalAnalysis}
1731
+ isLoading={originalAnalysisLoading}
1732
+ isExpanded={analysisExpanded.original}
1733
+ onToggle={() =>
1734
+ setAnalysisExpanded((prev) => ({
1735
+ ...prev,
1736
+ original: !prev.original,
1737
+ }))
1738
+ }
1739
+ onRunAnalysis={() =>
1740
+ run?.sourceSessionId &&
1741
+ triggerAnalysis(run.sourceSessionId, "original")
1742
+ }
1743
+ accentColor="yellow"
1744
+ />
1745
+ {/* Tool Calls */}
1746
+ <ToolCallsPanel
1747
+ toolCalls={originalMetrics.toolCalls}
1748
+ isExpanded={toolCallsExpanded.original}
1749
+ onToggle={() =>
1750
+ setToolCallsExpanded((prev) => ({
1751
+ ...prev,
1752
+ original: !prev.original,
1753
+ }))
1754
+ }
1755
+ accentColor="yellow"
1756
+ />
1757
+ </div>
1758
+ )}
1759
+ </Card>
1760
+
1390
1761
  {/* Control */}
1391
1762
  <Card className="flex flex-col h-full min-h-0 overflow-hidden">
1392
1763
  <CardHeader className="py-3 border-b shrink-0">
1393
1764
  <div className="flex items-center justify-between">
1394
1765
  <CardTitle className="text-sm flex items-center gap-2">
1395
1766
  <span className="w-2 h-2 rounded-full bg-blue-500" />
1396
- Control (Original)
1767
+ Control (Rerun)
1397
1768
  {controlState.isStreaming && (
1398
1769
  <Loader2 className="w-3 h-3 animate-spin text-blue-500" />
1399
1770
  )}
@@ -1427,20 +1798,76 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
1427
1798
  <CardContent className="flex-1 overflow-auto py-4">
1428
1799
  {controlState.messages.map((msg, i) => (
1429
1800
  <div
1430
- key={i}
1431
- className={`mb-4 ${msg.role === "user" ? "text-blue-600 dark:text-blue-400" : ""}`}
1801
+ key={`control-${msg.type}-${i}`}
1802
+ className={`mb-4 ${
1803
+ msg.type === "user"
1804
+ ? "text-blue-600 dark:text-blue-400"
1805
+ : msg.type === "tool_call"
1806
+ ? ""
1807
+ : ""
1808
+ }`}
1432
1809
  >
1433
- <div className="text-xs font-medium uppercase mb-1">
1434
- {msg.role}
1435
- </div>
1436
- <div className="text-sm whitespace-pre-wrap">
1437
- {msg.content}
1438
- {controlState.isStreaming &&
1439
- msg.role === "assistant" &&
1440
- i === controlState.messages.length - 1 && (
1441
- <span className="animate-pulse">▊</span>
1442
- )}
1443
- </div>
1810
+ {msg.type === "tool_call" ? (
1811
+ <details className="rounded bg-muted/50 border text-xs group">
1812
+ <summary className="flex items-center gap-2 py-1.5 px-2 cursor-pointer list-none">
1813
+ <span className="text-muted-foreground">🔧</span>
1814
+ <span className="font-medium flex-1">
1815
+ {msg.toolName || msg.content}
1816
+ </span>
1817
+ <ChevronDown className="w-3 h-3 text-muted-foreground group-open:rotate-180 transition-transform" />
1818
+ </summary>
1819
+ <div className="px-2 pb-2 space-y-2 border-t mt-1 pt-2">
1820
+ {msg.toolInput !== null &&
1821
+ msg.toolInput !== undefined && (
1822
+ <div>
1823
+ <div className="text-[10px] font-semibold text-muted-foreground mb-1">
1824
+ Args
1825
+ </div>
1826
+ <pre className="text-[11px] bg-background/50 rounded p-1.5 overflow-x-auto max-h-32 whitespace-pre-wrap break-words">
1827
+ {typeof msg.toolInput === "string"
1828
+ ? msg.toolInput
1829
+ : JSON.stringify(msg.toolInput, null, 2)}
1830
+ </pre>
1831
+ </div>
1832
+ )}
1833
+ {msg.toolOutput !== null &&
1834
+ msg.toolOutput !== undefined && (
1835
+ <div>
1836
+ <div className="text-[10px] font-semibold text-muted-foreground mb-1">
1837
+ Result
1838
+ </div>
1839
+ <pre className="text-[11px] bg-background/50 rounded p-1.5 overflow-x-auto max-h-32 whitespace-pre-wrap break-words">
1840
+ {typeof msg.toolOutput === "string"
1841
+ ? msg.toolOutput
1842
+ : JSON.stringify(msg.toolOutput, null, 2)}
1843
+ </pre>
1844
+ </div>
1845
+ )}
1846
+ {(msg.toolInput === null ||
1847
+ msg.toolInput === undefined) &&
1848
+ (msg.toolOutput === null ||
1849
+ msg.toolOutput === undefined) && (
1850
+ <div className="text-muted-foreground text-[11px]">
1851
+ No input/output data available
1852
+ </div>
1853
+ )}
1854
+ </div>
1855
+ </details>
1856
+ ) : (
1857
+ <>
1858
+ <div className="text-xs font-medium uppercase mb-1">
1859
+ {msg.type === "user" ? "USER" : "ASSISTANT"}
1860
+ </div>
1861
+ <div className="text-sm whitespace-pre-wrap">
1862
+ {msg.content}
1863
+ {controlState.isStreaming &&
1864
+ msg.type === "assistant" &&
1865
+ i === controlState.messages.length - 1 && (
1866
+ <span className="animate-pulse">▊</span>
1867
+ )}
1868
+ </div>
1869
+ </>
1870
+ )}
1444
1871
  </div>
1445
1872
  ))}
1446
1873
  {controlState.error && (
@@ -1540,20 +1967,76 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
1540
1967
  <CardContent className="flex-1 overflow-auto py-4">
1541
1968
  {variantState.messages.map((msg, i) => (
1542
1969
  <div
1543
- key={i}
1544
- className={`mb-4 ${msg.role === "user" ? "text-orange-600 dark:text-orange-400" : ""}`}
1970
+ key={`variant-${msg.type}-${i}`}
1971
+ className={`mb-4 ${
1972
+ msg.type === "user"
1973
+ ? "text-orange-600 dark:text-orange-400"
1974
+ : msg.type === "tool_call"
1975
+ ? ""
1976
+ : ""
1977
+ }`}
1545
1978
  >
1546
- <div className="text-xs font-medium uppercase mb-1">
1547
- {msg.role}
1548
- </div>
1549
- <div className="text-sm whitespace-pre-wrap">
1550
- {msg.content}
1551
- {variantState.isStreaming &&
1552
- msg.role === "assistant" &&
1553
- i === variantState.messages.length - 1 && (
1554
- <span className="animate-pulse">▊</span>
1555
- )}
1556
- </div>
1979
+ {msg.type === "tool_call" ? (
1980
+ <details className="rounded bg-muted/50 border text-xs group">
1981
+ <summary className="flex items-center gap-2 py-1.5 px-2 cursor-pointer list-none">
1982
+ <span className="text-muted-foreground">🔧</span>
1983
+ <span className="font-medium flex-1">
1984
+ {msg.toolName || msg.content}
1985
+ </span>
1986
+ <ChevronDown className="w-3 h-3 text-muted-foreground group-open:rotate-180 transition-transform" />
1987
+ </summary>
1988
+ <div className="px-2 pb-2 space-y-2 border-t mt-1 pt-2">
1989
+ {msg.toolInput !== null &&
1990
+ msg.toolInput !== undefined && (
1991
+ <div>
1992
+ <div className="text-[10px] font-semibold text-muted-foreground mb-1">
1993
+ Args
1994
+ </div>
1995
+ <pre className="text-[11px] bg-background/50 rounded p-1.5 overflow-x-auto max-h-32 whitespace-pre-wrap break-words">
1996
+ {typeof msg.toolInput === "string"
1997
+ ? msg.toolInput
1998
+ : JSON.stringify(msg.toolInput, null, 2)}
1999
+ </pre>
2000
+ </div>
2001
+ )}
2002
+ {msg.toolOutput !== null &&
2003
+ msg.toolOutput !== undefined && (
2004
+ <div>
2005
+ <div className="text-[10px] font-semibold text-muted-foreground mb-1">
2006
+ Result
2007
+ </div>
2008
+ <pre className="text-[11px] bg-background/50 rounded p-1.5 overflow-x-auto max-h-32 whitespace-pre-wrap break-words">
2009
+ {typeof msg.toolOutput === "string"
2010
+ ? msg.toolOutput
2011
+ : JSON.stringify(msg.toolOutput, null, 2)}
2012
+ </pre>
2013
+ </div>
2014
+ )}
2015
+ {(msg.toolInput === null ||
2016
+ msg.toolInput === undefined) &&
2017
+ (msg.toolOutput === null ||
2018
+ msg.toolOutput === undefined) && (
2019
+ <div className="text-muted-foreground text-[11px]">
2020
+ No input/output data available
2021
+ </div>
2022
+ )}
2023
+ </div>
2024
+ </details>
2025
+ ) : (
2026
+ <>
2027
+ <div className="text-xs font-medium uppercase mb-1">
2028
+ {msg.type === "user" ? "USER" : "ASSISTANT"}
2029
+ </div>
2030
+ <div className="text-sm whitespace-pre-wrap">
2031
+ {msg.content}
2032
+ {variantState.isStreaming &&
2033
+ msg.type === "assistant" &&
2034
+ i === variantState.messages.length - 1 && (
2035
+ <span className="animate-pulse">▊</span>
2036
+ )}
2037
+ </div>
2038
+ </>
2039
+ )}
1557
2040
  </div>
1558
2041
  ))}
1559
2042
  {variantState.error && (
@@ -1615,6 +2098,15 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
1615
2098
  </div>
1616
2099
  )}
1617
2100
  </div>
2101
+
2102
+ {/* Comparison Analysis Dialog */}
2103
+ {comparisonAnalysis && (
2104
+ <ComparisonAnalysisDialog
2105
+ open={comparisonAnalysisDialogOpen}
2106
+ onClose={() => setComparisonAnalysisDialogOpen(false)}
2107
+ analysis={comparisonAnalysis}
2108
+ />
2109
+ )}
1618
2110
  </DebuggerLayout>
1619
2111
  );
1620
2112
  }