@townco/debugger 0.1.31 → 0.1.33

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,13 @@
1
- import { ChevronDown, ChevronUp, Loader2 } from "lucide-react";
2
- import { useCallback, useEffect, useState } from "react";
1
+ import {
2
+ BarChart3,
3
+ ChevronDown,
4
+ ChevronUp,
5
+ Loader2,
6
+ Play,
7
+ ToggleLeft,
8
+ ToggleRight,
9
+ } from "lucide-react";
10
+ import { useCallback, useEffect, useRef, useState } from "react";
3
11
  import { Button } from "@/components/ui/button";
4
12
  import {
5
13
  Card,
@@ -8,7 +16,10 @@ import {
8
16
  CardHeader,
9
17
  CardTitle,
10
18
  } from "@/components/ui/card";
19
+ import { Checkbox } from "@/components/ui/checkbox";
20
+ import type { SessionComparisonAnalysis } from "../analysis/comparison-types";
11
21
  import type { SessionAnalysis } from "../analysis/types";
22
+ import { ComparisonAnalysisDialog } from "../components/ComparisonAnalysisDialog";
12
23
  import { DebuggerLayout } from "../components/DebuggerLayout";
13
24
  import { formatCost, formatDuration, formatTokens } from "../lib/metrics";
14
25
  import type { ComparisonConfig, ComparisonRun, SessionMetrics } from "../types";
@@ -17,17 +28,30 @@ interface ComparisonViewProps {
17
28
  runId: string;
18
29
  }
19
30
 
20
- interface ChatMessage {
21
- role: "user" | "assistant";
31
+ // Conversation item that can be user message, assistant message, or tool call
32
+ interface ConversationItem {
33
+ type: "user" | "assistant" | "tool_call";
22
34
  content: string;
35
+ toolName?: string | undefined;
36
+ toolInput?: unknown;
37
+ toolOutput?: unknown;
23
38
  }
24
39
 
25
40
  interface SessionState {
26
41
  sessionId: string | null;
27
- messages: ChatMessage[];
42
+ messages: ConversationItem[];
28
43
  isStreaming: boolean;
44
+ isSending: boolean; // true while sending is in progress (before streaming starts)
29
45
  metrics: SessionMetrics | null;
30
46
  error: string | null;
47
+ autoRun: boolean;
48
+ turnIndex: number; // last completed user message index for this arm
49
+ }
50
+
51
+ interface QueueState {
52
+ currentIndex: number; // last completed turn (both arms finished)
53
+ stagedIndex: number; // next user message ready to send
54
+ status: "idle" | "running" | "completed";
31
55
  }
32
56
 
33
57
  const AGENT_SERVER_URL =
@@ -41,37 +65,60 @@ function SessionAnalysisPanel({
41
65
  isLoading,
42
66
  isExpanded,
43
67
  onToggle,
68
+ onRunAnalysis,
44
69
  accentColor,
45
70
  }: {
46
71
  analysis: SessionAnalysis | null;
47
72
  isLoading: boolean;
48
73
  isExpanded: boolean;
49
74
  onToggle: () => void;
50
- accentColor: "blue" | "orange";
75
+ onRunAnalysis: () => void;
76
+ accentColor: "yellow" | "blue" | "orange";
51
77
  }) {
52
78
  const colorClasses =
53
- accentColor === "blue"
54
- ? "border-blue-200 dark:border-blue-800 bg-blue-50/50 dark:bg-blue-950/30"
55
- : "border-orange-200 dark:border-orange-800 bg-orange-50/50 dark:bg-orange-950/30";
79
+ accentColor === "yellow"
80
+ ? "border-yellow-200 dark:border-yellow-800 bg-yellow-50/50 dark:bg-yellow-950/30"
81
+ : accentColor === "blue"
82
+ ? "border-blue-200 dark:border-blue-800 bg-blue-50/50 dark:bg-blue-950/30"
83
+ : "border-orange-200 dark:border-orange-800 bg-orange-50/50 dark:bg-orange-950/30";
56
84
 
57
85
  const headerColorClasses =
58
- accentColor === "blue"
59
- ? "hover:bg-blue-100/50 dark:hover:bg-blue-900/30"
60
- : "hover:bg-orange-100/50 dark:hover:bg-orange-900/30";
86
+ accentColor === "yellow"
87
+ ? "hover:bg-yellow-100/50 dark:hover:bg-yellow-900/30"
88
+ : accentColor === "blue"
89
+ ? "hover:bg-blue-100/50 dark:hover:bg-blue-900/30"
90
+ : "hover:bg-orange-100/50 dark:hover:bg-orange-900/30";
61
91
 
62
92
  if (isLoading) {
63
93
  return (
64
94
  <div className={`border rounded-md p-3 ${colorClasses}`}>
65
95
  <div className="flex items-center gap-2 text-xs text-muted-foreground">
66
96
  <Loader2 className="w-3 h-3 animate-spin" />
67
- Loading analysis...
97
+ Running analysis...
68
98
  </div>
69
99
  </div>
70
100
  );
71
101
  }
72
102
 
73
103
  if (!analysis) {
74
- return null;
104
+ return (
105
+ <div className={`border rounded-md p-3 ${colorClasses}`}>
106
+ <div className="flex items-center justify-between">
107
+ <span className="text-xs text-muted-foreground">
108
+ Session Analysis
109
+ </span>
110
+ <Button
111
+ size="sm"
112
+ variant="outline"
113
+ onClick={onRunAnalysis}
114
+ className="h-6 text-xs px-2"
115
+ >
116
+ <Play className="w-3 h-3 mr-1" />
117
+ Run Analysis
118
+ </Button>
119
+ </div>
120
+ </div>
121
+ );
75
122
  }
76
123
 
77
124
  return (
@@ -203,17 +250,21 @@ function ToolCallsPanel({
203
250
  toolCalls: SessionMetrics["toolCalls"];
204
251
  isExpanded: boolean;
205
252
  onToggle: () => void;
206
- accentColor: "blue" | "orange";
253
+ accentColor: "yellow" | "blue" | "orange";
207
254
  }) {
208
255
  const colorClasses =
209
- accentColor === "blue"
210
- ? "border-blue-200 dark:border-blue-800 bg-blue-50/50 dark:bg-blue-950/30"
211
- : "border-orange-200 dark:border-orange-800 bg-orange-50/50 dark:bg-orange-950/30";
256
+ accentColor === "yellow"
257
+ ? "border-yellow-200 dark:border-yellow-800 bg-yellow-50/50 dark:bg-yellow-950/30"
258
+ : accentColor === "blue"
259
+ ? "border-blue-200 dark:border-blue-800 bg-blue-50/50 dark:bg-blue-950/30"
260
+ : "border-orange-200 dark:border-orange-800 bg-orange-50/50 dark:bg-orange-950/30";
212
261
 
213
262
  const headerColorClasses =
214
- accentColor === "blue"
215
- ? "hover:bg-blue-100/50 dark:hover:bg-blue-900/30"
216
- : "hover:bg-orange-100/50 dark:hover:bg-orange-900/30";
263
+ accentColor === "yellow"
264
+ ? "hover:bg-yellow-100/50 dark:hover:bg-yellow-900/30"
265
+ : accentColor === "blue"
266
+ ? "hover:bg-blue-100/50 dark:hover:bg-blue-900/30"
267
+ : "hover:bg-orange-100/50 dark:hover:bg-orange-900/30";
217
268
 
218
269
  const toolCallCount = toolCalls?.length ?? 0;
219
270
 
@@ -294,86 +345,369 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
294
345
  const [loading, setLoading] = useState(true);
295
346
  const [error, setError] = useState<string | null>(null);
296
347
 
348
+ // User messages from source session
349
+ const [userMessages, setUserMessages] = useState<string[]>([]);
350
+ const [initialAutoRun, setInitialAutoRun] = useState(false);
351
+
352
+ // Queue state for multi-message replay
353
+ const [queueState, setQueueState] = useState<QueueState>({
354
+ currentIndex: -1,
355
+ stagedIndex: 0,
356
+ status: "idle",
357
+ });
358
+
297
359
  // Session states
298
360
  const [controlState, setControlState] = useState<SessionState>({
299
361
  sessionId: null,
300
362
  messages: [],
301
363
  isStreaming: false,
364
+ isSending: false,
302
365
  metrics: null,
303
366
  error: null,
367
+ autoRun: false,
368
+ turnIndex: -1,
304
369
  });
305
370
  const [variantState, setVariantState] = useState<SessionState>({
306
371
  sessionId: null,
307
372
  messages: [],
308
373
  isStreaming: false,
374
+ isSending: false,
309
375
  metrics: null,
310
376
  error: null,
377
+ autoRun: false,
378
+ turnIndex: -1,
311
379
  });
312
380
 
381
+ // Refs for stable callbacks
382
+ const controlStateRef = useRef(controlState);
383
+ const variantStateRef = useRef(variantState);
384
+ const queueStateRef = useRef(queueState);
385
+ const userMessagesRef = useRef(userMessages);
386
+
387
+ // Separate refs for send locks - these update synchronously to prevent race conditions
388
+ const controlSendingRef = useRef(false);
389
+ const variantSendingRef = useRef(false);
390
+
391
+ useEffect(() => {
392
+ controlStateRef.current = controlState;
393
+ }, [controlState]);
394
+ useEffect(() => {
395
+ variantStateRef.current = variantState;
396
+ }, [variantState]);
397
+ useEffect(() => {
398
+ queueStateRef.current = queueState;
399
+ }, [queueState]);
400
+ useEffect(() => {
401
+ userMessagesRef.current = userMessages;
402
+ }, [userMessages]);
403
+
313
404
  const [isRunning, setIsRunning] = useState(false);
314
405
  const [hasRun, setHasRun] = useState(false);
315
406
 
407
+ // Original source session state (read-only, for reference)
408
+ const [originalMessages, setOriginalMessages] = useState<ConversationItem[]>(
409
+ [],
410
+ );
411
+ const [originalMetrics, setOriginalMetrics] = useState<SessionMetrics | null>(
412
+ null,
413
+ );
414
+
316
415
  // Session analysis state
416
+ const [originalAnalysis, setOriginalAnalysis] =
417
+ useState<SessionAnalysis | null>(null);
317
418
  const [controlAnalysis, setControlAnalysis] =
318
419
  useState<SessionAnalysis | null>(null);
319
420
  const [variantAnalysis, setVariantAnalysis] =
320
421
  useState<SessionAnalysis | null>(null);
422
+ const [originalAnalysisLoading, setOriginalAnalysisLoading] = useState(false);
321
423
  const [controlAnalysisLoading, setControlAnalysisLoading] = useState(false);
322
424
  const [variantAnalysisLoading, setVariantAnalysisLoading] = useState(false);
323
425
  const [analysisExpanded, setAnalysisExpanded] = useState<{
426
+ original: boolean;
324
427
  control: boolean;
325
428
  variant: boolean;
326
429
  }>({
430
+ original: false,
327
431
  control: false,
328
432
  variant: false,
329
433
  });
330
434
  const [toolCallsExpanded, setToolCallsExpanded] = useState<{
435
+ original: boolean;
331
436
  control: boolean;
332
437
  variant: boolean;
333
438
  }>({
439
+ original: false,
334
440
  control: false,
335
441
  variant: false,
336
442
  });
337
443
 
338
- // Fetch comparison run details and restore saved messages
444
+ // Comparison analysis state
445
+ const [comparisonAnalysis, setComparisonAnalysis] =
446
+ useState<SessionComparisonAnalysis | null>(null);
447
+ const [comparisonAnalysisLoading, setComparisonAnalysisLoading] =
448
+ useState(false);
449
+ const [comparisonAnalysisDialogOpen, setComparisonAnalysisDialogOpen] =
450
+ useState(false);
451
+ const [hasComparisonAnalysis, setHasComparisonAnalysis] = useState(false);
452
+
453
+ // Check if comparison analysis exists
339
454
  useEffect(() => {
340
- Promise.all([
341
- fetch(`/api/comparison-run/${runId}`).then((res) => res.json()),
342
- ])
343
- .then(([runData]) => {
455
+ if (runId) {
456
+ fetch(`/api/comparison-analysis/${runId}/exists`)
457
+ .then((res) => res.json())
458
+ .then((data) => {
459
+ setHasComparisonAnalysis(data.exists);
460
+ })
461
+ .catch(() => {
462
+ setHasComparisonAnalysis(false);
463
+ });
464
+ }
465
+ }, [runId]);
466
+
467
+ // Function to run comparison analysis
468
+ const runComparisonAnalysis = async () => {
469
+ setComparisonAnalysisLoading(true);
470
+ try {
471
+ const res = await fetch(`/api/analyze-comparison/${runId}`, {
472
+ method: "POST",
473
+ });
474
+ if (!res.ok) {
475
+ const error = await res.json();
476
+ throw new Error(error.error || "Analysis failed");
477
+ }
478
+ const analysis = await res.json();
479
+ setComparisonAnalysis(analysis);
480
+ setHasComparisonAnalysis(true);
481
+ setComparisonAnalysisDialogOpen(true);
482
+ } catch (error) {
483
+ console.error("Comparison analysis error:", error);
484
+ alert(
485
+ `Analysis failed: ${error instanceof Error ? error.message : "Unknown error"}`,
486
+ );
487
+ } finally {
488
+ setComparisonAnalysisLoading(false);
489
+ }
490
+ };
491
+
492
+ // Function to show existing comparison analysis
493
+ const showComparisonAnalysis = async () => {
494
+ if (comparisonAnalysis) {
495
+ setComparisonAnalysisDialogOpen(true);
496
+ return;
497
+ }
498
+
499
+ setComparisonAnalysisLoading(true);
500
+ try {
501
+ const res = await fetch(`/api/comparison-analysis/${runId}`);
502
+ if (!res.ok) {
503
+ throw new Error("Analysis not found");
504
+ }
505
+ const analysis = await res.json();
506
+ setComparisonAnalysis(analysis);
507
+ setComparisonAnalysisDialogOpen(true);
508
+ } catch (error) {
509
+ console.error("Error fetching comparison analysis:", error);
510
+ } finally {
511
+ setComparisonAnalysisLoading(false);
512
+ }
513
+ };
514
+
515
+ // Fetch comparison run details, conversation, and restore saved messages
516
+ useEffect(() => {
517
+ let runData: ComparisonRun;
518
+
519
+ fetch(`/api/comparison-run/${runId}`)
520
+ .then((res) => res.json())
521
+ .then(async (data) => {
522
+ runData = data;
344
523
  setRun(runData);
345
524
 
346
- // Restore saved messages if the run has been completed
525
+ // Fetch conversation from source session to get all user messages
526
+ const conversationRes = await fetch(
527
+ `/api/session-conversation?sessionId=${runData.sourceSessionId}`,
528
+ );
529
+ const conversation = await conversationRes.json();
530
+
531
+ // Extract user messages in order AND build original conversation with tool calls
532
+ const messages: string[] = [];
533
+ const origMessages: ConversationItem[] = [];
534
+ for (const trace of conversation) {
535
+ if (trace.userInput) {
536
+ messages.push(trace.userInput);
537
+ origMessages.push({
538
+ type: "user" as const,
539
+ content: trace.userInput,
540
+ });
541
+ }
542
+ // Use agentMessages which includes both tool_calls and chat messages in order
543
+ if (trace.agentMessages && Array.isArray(trace.agentMessages)) {
544
+ for (const msg of trace.agentMessages) {
545
+ if (msg.type === "tool_call") {
546
+ origMessages.push({
547
+ type: "tool_call" as const,
548
+ content: msg.toolName || msg.content,
549
+ toolName: msg.toolName,
550
+ toolInput: msg.toolInput,
551
+ toolOutput: msg.toolOutput,
552
+ });
553
+ } else if (msg.type === "chat" && msg.content?.trim()) {
554
+ origMessages.push({
555
+ type: "assistant" as const,
556
+ content: msg.content,
557
+ });
558
+ }
559
+ }
560
+ } else if (trace.llmOutput) {
561
+ // Fallback if no agentMessages
562
+ origMessages.push({
563
+ type: "assistant" as const,
564
+ content: trace.llmOutput,
565
+ });
566
+ }
567
+ }
568
+
569
+ // If no messages found in conversation, fall back to firstUserMessage
570
+ if (messages.length === 0 && runData.firstUserMessage) {
571
+ messages.push(runData.firstUserMessage);
572
+ }
573
+
574
+ setUserMessages(messages);
575
+ setOriginalMessages(origMessages);
576
+
577
+ // Fetch metrics for the original source session
578
+ if (runData.sourceSessionId) {
579
+ try {
580
+ const metricsRes = await fetch(
581
+ `/api/session-metrics/${runData.sourceSessionId}?model=${encodeURIComponent(config?.controlModel || "claude-sonnet-4-5-20250929")}`,
582
+ );
583
+ if (metricsRes.ok) {
584
+ const metrics = await metricsRes.json();
585
+ setOriginalMetrics(metrics);
586
+ }
587
+ } catch (err) {
588
+ console.error("Failed to fetch original session metrics:", err);
589
+ }
590
+ }
591
+
592
+ // Restore saved messages if the run has been completed or running
347
593
  if (runData.status === "completed" || runData.status === "running") {
348
594
  setHasRun(true);
349
595
 
596
+ // Fetch full conversation history from control and variant sessions
597
+ const [controlConversation, variantConversation] = await Promise.all([
598
+ runData.controlSessionId
599
+ ? fetch(
600
+ `/api/session-conversation?sessionId=${runData.controlSessionId}`,
601
+ ).then((res) => res.json())
602
+ : Promise.resolve([]),
603
+ runData.variantSessionId
604
+ ? fetch(
605
+ `/api/session-conversation?sessionId=${runData.variantSessionId}`,
606
+ ).then((res) => res.json())
607
+ : Promise.resolve([]),
608
+ ]);
609
+
610
+ // Convert traces to conversation items (including tool calls)
611
+ const tracesToConversationItems = (
612
+ traces: Array<{
613
+ userInput?: string;
614
+ llmOutput?: string;
615
+ agentMessages?: Array<{
616
+ type: string;
617
+ content?: string;
618
+ toolName?: string;
619
+ toolInput?: unknown;
620
+ toolOutput?: unknown;
621
+ }>;
622
+ }>,
623
+ ): ConversationItem[] => {
624
+ const items: ConversationItem[] = [];
625
+ for (const trace of traces) {
626
+ if (trace.userInput) {
627
+ items.push({
628
+ type: "user" as const,
629
+ content: trace.userInput,
630
+ });
631
+ }
632
+ // Use agentMessages which includes both tool_calls and chat messages in order
633
+ if (trace.agentMessages && Array.isArray(trace.agentMessages)) {
634
+ for (const msg of trace.agentMessages) {
635
+ if (msg.type === "tool_call") {
636
+ items.push({
637
+ type: "tool_call" as const,
638
+ content: msg.toolName || msg.content || "",
639
+ toolName: msg.toolName,
640
+ toolInput: msg.toolInput,
641
+ toolOutput: msg.toolOutput,
642
+ });
643
+ } else if (msg.type === "chat" && msg.content?.trim()) {
644
+ items.push({
645
+ type: "assistant" as const,
646
+ content: msg.content,
647
+ });
648
+ }
649
+ }
650
+ } else if (trace.llmOutput) {
651
+ // Fallback if no agentMessages
652
+ items.push({
653
+ type: "assistant" as const,
654
+ content: trace.llmOutput,
655
+ });
656
+ }
657
+ }
658
+ return items;
659
+ };
660
+
350
661
  // Restore control messages
351
- if (runData.controlResponse) {
352
- setControlState({
662
+ if (runData.controlSessionId) {
663
+ const controlMessages =
664
+ tracesToConversationItems(controlConversation);
665
+ // Count user messages for turnIndex
666
+ const controlUserCount = controlMessages.filter(
667
+ (m) => m.type === "user",
668
+ ).length;
669
+ setControlState((prev) => ({
670
+ ...prev,
353
671
  sessionId: runData.controlSessionId,
354
- messages: [
355
- { role: "user", content: runData.firstUserMessage },
356
- { role: "assistant", content: runData.controlResponse },
357
- ],
672
+ messages: controlMessages,
358
673
  isStreaming: false,
359
674
  metrics: runData.controlMetrics,
360
675
  error: null,
361
- });
676
+ turnIndex: controlUserCount - 1,
677
+ }));
362
678
  }
363
679
 
364
680
  // Restore variant messages
365
- if (runData.variantResponse) {
366
- setVariantState({
681
+ if (runData.variantSessionId) {
682
+ const variantMessages =
683
+ tracesToConversationItems(variantConversation);
684
+ // Count user messages for turnIndex
685
+ const variantUserCount = variantMessages.filter(
686
+ (m) => m.type === "user",
687
+ ).length;
688
+ setVariantState((prev) => ({
689
+ ...prev,
367
690
  sessionId: runData.variantSessionId,
368
- messages: [
369
- { role: "user", content: runData.firstUserMessage },
370
- { role: "assistant", content: runData.variantResponse },
371
- ],
691
+ messages: variantMessages,
372
692
  isStreaming: false,
373
693
  metrics: runData.variantMetrics,
374
694
  error: null,
375
- });
695
+ turnIndex: variantUserCount - 1,
696
+ }));
376
697
  }
698
+
699
+ // Set queue state based on completed messages
700
+ const controlItems = tracesToConversationItems(controlConversation);
701
+ const variantItems = tracesToConversationItems(variantConversation);
702
+ const completedTurns = Math.min(
703
+ controlItems.filter((m) => m.type === "user").length,
704
+ variantItems.filter((m) => m.type === "user").length,
705
+ );
706
+ setQueueState({
707
+ currentIndex: completedTurns - 1,
708
+ stagedIndex: completedTurns,
709
+ status: runData.status === "completed" ? "completed" : "running",
710
+ });
377
711
  }
378
712
 
379
713
  // Fetch the config by the run's configId (not the latest config!)
@@ -389,7 +723,7 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
389
723
  setError(err.message);
390
724
  setLoading(false);
391
725
  });
392
- }, [runId]);
726
+ }, [runId, config?.controlModel]);
393
727
 
394
728
  const generateRequestId = (prefix: string, sessionId?: string) => {
395
729
  const randomPart =
@@ -446,7 +780,7 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
446
780
  let abortController: AbortController | null = new AbortController();
447
781
 
448
782
  // Start SSE connection (don't await - runs in background)
449
- const ssePromise = fetch(`${AGENT_SERVER_URL}/events`, {
783
+ const _ssePromise = fetch(`${AGENT_SERVER_URL}/events`, {
450
784
  headers: {
451
785
  "X-Session-ID": sessionId,
452
786
  },
@@ -535,29 +869,371 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
535
869
  return accumulatedContent;
536
870
  };
537
871
 
538
- // Run the comparison
872
+ // Helper to fetch metrics with retry
873
+ const fetchMetricsWithRetry = useCallback(
874
+ async (
875
+ sessionId: string,
876
+ model: string,
877
+ duration: number,
878
+ ): Promise<SessionMetrics> => {
879
+ const maxWaitMs = 60_000;
880
+ const pollIntervalMs = 2_000;
881
+ let elapsed = 0;
882
+ let previousTokens = -1;
883
+ let previousTools = -1;
884
+ let lastMetrics: SessionMetrics | null = null;
885
+
886
+ while (elapsed <= maxWaitMs) {
887
+ try {
888
+ const metricsRes = await fetch(
889
+ `/api/session-metrics/${sessionId}?model=${encodeURIComponent(model)}`,
890
+ );
891
+ const metrics = await metricsRes.json();
892
+ lastMetrics = { ...metrics, durationMs: duration };
893
+
894
+ // If tokens/tool calls stopped changing and we have data, treat as final.
895
+ if (
896
+ metrics.totalTokens > 0 &&
897
+ metrics.totalTokens === previousTokens &&
898
+ metrics.toolCallCount === previousTools
899
+ ) {
900
+ // biome-ignore lint/style/noNonNullAssertion: lastMetrics is set in the loop
901
+ return lastMetrics!;
902
+ }
903
+
904
+ previousTokens = metrics.totalTokens ?? 0;
905
+ previousTools = metrics.toolCallCount ?? 0;
906
+ } catch {
907
+ // swallow and retry
908
+ }
909
+
910
+ await new Promise((r) => setTimeout(r, pollIntervalMs));
911
+ elapsed += pollIntervalMs;
912
+ }
913
+
914
+ // Return whatever we last saw (or zeros if nothing ever arrived)
915
+ return (
916
+ lastMetrics ?? {
917
+ durationMs: duration,
918
+ inputTokens: 0,
919
+ outputTokens: 0,
920
+ totalTokens: 0,
921
+ estimatedCost: 0,
922
+ toolCallCount: 0,
923
+ }
924
+ );
925
+ },
926
+ [],
927
+ );
928
+
929
+ // Send a single message to one arm and handle the response
930
+ const sendMessageToArm = useCallback(
931
+ async (
932
+ sessionId: string,
933
+ message: string,
934
+ messageIndex: number,
935
+ model: string,
936
+ arm: "control" | "variant",
937
+ startTime: number,
938
+ ): Promise<{ response: string; metrics: SessionMetrics }> => {
939
+ const setState = arm === "control" ? setControlState : setVariantState;
940
+
941
+ try {
942
+ // Add user message and set streaming
943
+ setState((prev) => ({
944
+ ...prev,
945
+ isStreaming: true,
946
+ messages: [...prev.messages, { type: "user", content: message }],
947
+ }));
948
+
949
+ const response = await sendMessageAndCollect(
950
+ sessionId,
951
+ message,
952
+ (content) => {
953
+ setState((prev) => {
954
+ // Find the last assistant message or add one
955
+ const messages = [...prev.messages];
956
+ const lastMsg = messages[messages.length - 1];
957
+ if (lastMsg && lastMsg.type === "assistant") {
958
+ messages[messages.length - 1] = {
959
+ type: "assistant",
960
+ content,
961
+ };
962
+ } else {
963
+ messages.push({ type: "assistant", content });
964
+ }
965
+ return { ...prev, messages };
966
+ });
967
+ },
968
+ );
969
+
970
+ const duration = Date.now() - startTime;
971
+ const metrics = await fetchMetricsWithRetry(sessionId, model, duration);
972
+
973
+ setState((prev) => ({
974
+ ...prev,
975
+ isStreaming: false,
976
+ turnIndex: messageIndex,
977
+ metrics,
978
+ error: null,
979
+ }));
980
+
981
+ return { response, metrics };
982
+ } catch (err) {
983
+ setState((prev) => ({
984
+ ...prev,
985
+ isStreaming: false,
986
+ error: err instanceof Error ? err.message : "Unknown error",
987
+ }));
988
+ return {
989
+ response: "",
990
+ metrics: {
991
+ durationMs: 0,
992
+ inputTokens: 0,
993
+ outputTokens: 0,
994
+ totalTokens: 0,
995
+ estimatedCost: 0,
996
+ toolCallCount: 0,
997
+ },
998
+ };
999
+ }
1000
+ },
1001
+ // biome-ignore lint/correctness/useExhaustiveDependencies: sendMessageAndCollect is stable
1002
+ [fetchMetricsWithRetry, sendMessageAndCollect],
1003
+ );
1004
+
1005
+ // Send staged message to a specific arm
1006
+ const sendStagedToArm = useCallback(
1007
+ async (arm: "control" | "variant") => {
1008
+ const state =
1009
+ arm === "control" ? controlStateRef.current : variantStateRef.current;
1010
+ const setState = arm === "control" ? setControlState : setVariantState;
1011
+ const sendingRef =
1012
+ arm === "control" ? controlSendingRef : variantSendingRef;
1013
+ const queue = queueStateRef.current;
1014
+ const messages = userMessagesRef.current;
1015
+
1016
+ // Check the synchronous ref first to prevent duplicate sends
1017
+ if (sendingRef.current) return;
1018
+ if (!state.sessionId || state.isStreaming) return;
1019
+ if (queue.stagedIndex >= messages.length) return;
1020
+
1021
+ const message = messages[queue.stagedIndex];
1022
+ if (!message) return;
1023
+
1024
+ // Set sending lock immediately (synchronously) to prevent race conditions
1025
+ sendingRef.current = true;
1026
+ setState((prev) => ({ ...prev, isSending: true }));
1027
+
1028
+ const model =
1029
+ arm === "control"
1030
+ ? config?.controlModel || "claude-sonnet-4-5-20250929"
1031
+ : config?.variantModel ||
1032
+ config?.controlModel ||
1033
+ "claude-sonnet-4-5-20250929";
1034
+
1035
+ try {
1036
+ await sendMessageToArm(
1037
+ state.sessionId,
1038
+ message,
1039
+ queue.stagedIndex,
1040
+ model,
1041
+ arm,
1042
+ Date.now(),
1043
+ );
1044
+ } finally {
1045
+ // Clear sending lock after completion
1046
+ sendingRef.current = false;
1047
+ setState((prev) => ({ ...prev, isSending: false }));
1048
+ }
1049
+ },
1050
+ [config, sendMessageToArm],
1051
+ );
1052
+
1053
+ // Send staged message to both arms
1054
+ const sendStagedToBoth = useCallback(async () => {
1055
+ const control = controlStateRef.current;
1056
+ const variant = variantStateRef.current;
1057
+
1058
+ if (!control.sessionId || !variant.sessionId) return;
1059
+ if (control.isStreaming || variant.isStreaming) return;
1060
+ if (controlSendingRef.current || variantSendingRef.current) return;
1061
+
1062
+ await Promise.all([sendStagedToArm("control"), sendStagedToArm("variant")]);
1063
+ }, [sendStagedToArm]);
1064
+
1065
+ // Check and advance queue after both arms complete a turn
1066
+ useEffect(() => {
1067
+ // Both arms must have completed the same turn and not be in the middle of sending
1068
+ if (
1069
+ controlState.isStreaming ||
1070
+ variantState.isStreaming ||
1071
+ controlState.isSending ||
1072
+ variantState.isSending ||
1073
+ queueState.status !== "running"
1074
+ )
1075
+ return;
1076
+ if (controlState.turnIndex !== variantState.turnIndex) return;
1077
+
1078
+ const completedIndex = controlState.turnIndex;
1079
+
1080
+ // Advance currentIndex if both completed
1081
+ if (completedIndex > queueState.currentIndex) {
1082
+ const nextIndex = completedIndex + 1;
1083
+
1084
+ if (nextIndex >= userMessages.length) {
1085
+ // All messages completed
1086
+ setQueueState((prev) => ({
1087
+ ...prev,
1088
+ currentIndex: completedIndex,
1089
+ status: "completed",
1090
+ }));
1091
+ setIsRunning(false);
1092
+
1093
+ // Persist final state
1094
+ if (run && controlState.sessionId && variantState.sessionId) {
1095
+ // Get last responses from messages
1096
+ const controlMsgs = controlState.messages;
1097
+ const variantMsgs = variantState.messages;
1098
+ const lastControlResponse =
1099
+ controlMsgs[controlMsgs.length - 1]?.type === "assistant"
1100
+ ? controlMsgs[controlMsgs.length - 1]?.content
1101
+ : "";
1102
+ const lastVariantResponse =
1103
+ variantMsgs[variantMsgs.length - 1]?.type === "assistant"
1104
+ ? variantMsgs[variantMsgs.length - 1]?.content
1105
+ : "";
1106
+
1107
+ fetch(`/api/comparison-run/${runId}/update`, {
1108
+ method: "POST",
1109
+ headers: { "Content-Type": "application/json" },
1110
+ body: JSON.stringify({
1111
+ status: "completed",
1112
+ controlMetrics: controlState.metrics,
1113
+ variantMetrics: variantState.metrics,
1114
+ controlResponse: lastControlResponse,
1115
+ variantResponse: lastVariantResponse,
1116
+ }),
1117
+ });
1118
+ }
1119
+ } else {
1120
+ // Stage next message
1121
+ setQueueState((prev) => ({
1122
+ ...prev,
1123
+ currentIndex: completedIndex,
1124
+ stagedIndex: nextIndex,
1125
+ }));
1126
+ }
1127
+ }
1128
+ }, [
1129
+ controlState.isStreaming,
1130
+ controlState.isSending,
1131
+ controlState.turnIndex,
1132
+ controlState.messages,
1133
+ controlState.metrics,
1134
+ controlState.sessionId,
1135
+ variantState.isStreaming,
1136
+ variantState.isSending,
1137
+ variantState.turnIndex,
1138
+ variantState.messages,
1139
+ variantState.metrics,
1140
+ variantState.sessionId,
1141
+ queueState.status,
1142
+ queueState.currentIndex,
1143
+ userMessages.length,
1144
+ run,
1145
+ runId,
1146
+ ]);
1147
+
1148
+ // Auto-send staged message when conditions are met
1149
+ useEffect(() => {
1150
+ if (queueState.status !== "running") return;
1151
+ if (queueState.stagedIndex >= userMessages.length) return;
1152
+
1153
+ const message = userMessages[queueState.stagedIndex];
1154
+ if (!message) return;
1155
+
1156
+ // Check if control should auto-send
1157
+ if (
1158
+ controlState.autoRun &&
1159
+ !controlState.isStreaming &&
1160
+ !controlState.isSending &&
1161
+ controlState.sessionId &&
1162
+ controlState.turnIndex === queueState.currentIndex
1163
+ ) {
1164
+ sendStagedToArm("control");
1165
+ }
1166
+
1167
+ // Check if variant should auto-send
1168
+ if (
1169
+ variantState.autoRun &&
1170
+ !variantState.isStreaming &&
1171
+ !variantState.isSending &&
1172
+ variantState.sessionId &&
1173
+ variantState.turnIndex === queueState.currentIndex
1174
+ ) {
1175
+ sendStagedToArm("variant");
1176
+ }
1177
+ }, [
1178
+ queueState.status,
1179
+ queueState.stagedIndex,
1180
+ queueState.currentIndex,
1181
+ userMessages,
1182
+ controlState.autoRun,
1183
+ controlState.isStreaming,
1184
+ controlState.isSending,
1185
+ controlState.sessionId,
1186
+ controlState.turnIndex,
1187
+ variantState.autoRun,
1188
+ variantState.isStreaming,
1189
+ variantState.isSending,
1190
+ variantState.sessionId,
1191
+ variantState.turnIndex,
1192
+ sendStagedToArm,
1193
+ ]);
1194
+
1195
+ // Toggle auto-run for an arm
1196
+ const toggleAutoRun = useCallback((arm: "control" | "variant") => {
1197
+ const setState = arm === "control" ? setControlState : setVariantState;
1198
+ setState((prev) => ({ ...prev, autoRun: !prev.autoRun }));
1199
+ }, []);
1200
+
1201
+ // Start the comparison (initialize sessions, first message sent by auto-send effect)
539
1202
  const runComparison = useCallback(async () => {
540
- if (!run || !config) return;
1203
+ if (!run || !config || userMessages.length === 0) return;
541
1204
 
542
1205
  setIsRunning(true);
543
1206
  setHasRun(true);
544
1207
 
545
- const firstMessage = run.firstUserMessage;
1208
+ // Reset sending refs
1209
+ controlSendingRef.current = false;
1210
+ variantSendingRef.current = false;
546
1211
 
547
- // Reset states
1212
+ // Reset states with initial autoRun setting
548
1213
  setControlState({
549
1214
  sessionId: null,
550
- messages: [{ role: "user", content: firstMessage }],
551
- isStreaming: true,
1215
+ messages: [],
1216
+ isStreaming: false,
1217
+ isSending: false,
552
1218
  metrics: null,
553
1219
  error: null,
1220
+ autoRun: initialAutoRun,
1221
+ turnIndex: -1,
554
1222
  });
555
1223
  setVariantState({
556
1224
  sessionId: null,
557
- messages: [{ role: "user", content: firstMessage }],
558
- isStreaming: true,
1225
+ messages: [],
1226
+ isStreaming: false,
1227
+ isSending: false,
559
1228
  metrics: null,
560
1229
  error: null,
1230
+ autoRun: initialAutoRun,
1231
+ turnIndex: -1,
1232
+ });
1233
+ setQueueState({
1234
+ currentIndex: -1,
1235
+ stagedIndex: 0,
1236
+ status: "running",
561
1237
  });
562
1238
 
563
1239
  try {
@@ -594,189 +1270,30 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
594
1270
  }),
595
1271
  });
596
1272
 
597
- // Run both sessions in parallel
598
- const startTime = Date.now();
599
-
600
- // Track final responses and metrics
601
- let finalControlMetrics: SessionMetrics = {
602
- durationMs: 0,
603
- inputTokens: 0,
604
- outputTokens: 0,
605
- totalTokens: 0,
606
- estimatedCost: 0,
607
- toolCallCount: 0,
608
- };
609
- let finalVariantMetrics: SessionMetrics = {
610
- durationMs: 0,
611
- inputTokens: 0,
612
- outputTokens: 0,
613
- totalTokens: 0,
614
- estimatedCost: 0,
615
- toolCallCount: 0,
616
- };
617
-
618
- // Helper to run a session and fetch metrics
619
- const runSession = async (
620
- sessionId: string,
621
- model: string,
622
- setState: typeof setControlState,
623
- onContentUpdate: (content: string) => void,
624
- ): Promise<{ response: string; metrics: SessionMetrics }> => {
625
- try {
626
- const response = await sendMessageAndCollect(
627
- sessionId,
628
- firstMessage,
629
- onContentUpdate,
630
- );
631
-
632
- const duration = Date.now() - startTime;
633
-
634
- // Poll metrics until they stabilize or we hit a max wait window.
635
- const fetchMetricsWithRetry = async (): Promise<SessionMetrics> => {
636
- const maxWaitMs = 60_000;
637
- const pollIntervalMs = 2_000;
638
- let elapsed = 0;
639
- let previousTokens = -1;
640
- let previousTools = -1;
641
- let lastMetrics: SessionMetrics | null = null;
642
-
643
- while (elapsed <= maxWaitMs) {
644
- try {
645
- const metricsRes = await fetch(
646
- `/api/session-metrics/${sessionId}?model=${encodeURIComponent(model)}`,
647
- );
648
- const metrics = await metricsRes.json();
649
- lastMetrics = { ...metrics, durationMs: duration };
650
-
651
- // If tokens/tool calls stopped changing and we have data, treat as final.
652
- if (
653
- metrics.totalTokens > 0 &&
654
- metrics.totalTokens === previousTokens &&
655
- metrics.toolCallCount === previousTools
656
- ) {
657
- return lastMetrics!;
658
- }
659
-
660
- previousTokens = metrics.totalTokens ?? 0;
661
- previousTools = metrics.toolCallCount ?? 0;
662
- } catch {
663
- // swallow and retry
664
- }
665
-
666
- await new Promise((r) => setTimeout(r, pollIntervalMs));
667
- elapsed += pollIntervalMs;
668
- }
669
-
670
- // Return whatever we last saw (or zeros if nothing ever arrived)
671
- return (
672
- lastMetrics ?? {
673
- durationMs: duration,
674
- inputTokens: 0,
675
- outputTokens: 0,
676
- totalTokens: 0,
677
- estimatedCost: 0,
678
- toolCallCount: 0,
679
- }
680
- );
681
- };
682
-
683
- const metrics = await fetchMetricsWithRetry();
684
-
685
- setState((prev) => ({
686
- ...prev,
687
- isStreaming: false,
688
- metrics,
689
- }));
690
-
691
- return { response, metrics };
692
- } catch (err) {
693
- setState((prev) => ({
694
- ...prev,
695
- isStreaming: false,
696
- error: err instanceof Error ? err.message : "Unknown error",
697
- }));
698
- return {
699
- response: "",
700
- metrics: {
701
- durationMs: 0,
702
- inputTokens: 0,
703
- outputTokens: 0,
704
- totalTokens: 0,
705
- estimatedCost: 0,
706
- toolCallCount: 0,
707
- },
708
- };
709
- }
710
- };
711
-
712
- const controlModel = config.controlModel || "claude-sonnet-4-5-20250929";
713
- const variantModel =
714
- config.variantModel ||
715
- config.controlModel ||
716
- "claude-sonnet-4-5-20250929";
717
-
718
- const [controlResult, variantResult] = await Promise.all([
719
- runSession(
720
- controlSessionId,
721
- controlModel,
722
- setControlState,
723
- (content) => {
724
- setControlState((prev) => ({
725
- ...prev,
726
- messages: [
727
- { role: "user", content: firstMessage },
728
- { role: "assistant", content },
729
- ],
730
- }));
731
- },
732
- ),
733
- runSession(
734
- variantSessionId,
735
- variantModel,
736
- setVariantState,
737
- (content) => {
738
- setVariantState((prev) => ({
739
- ...prev,
740
- messages: [
741
- { role: "user", content: firstMessage },
742
- { role: "assistant", content },
743
- ],
744
- }));
745
- },
746
- ),
747
- ]);
748
-
749
- finalControlMetrics = controlResult.metrics;
750
- finalVariantMetrics = variantResult.metrics;
751
-
752
- // Update run status with responses and metrics
753
- await fetch(`/api/comparison-run/${runId}/update`, {
754
- method: "POST",
755
- headers: { "Content-Type": "application/json" },
756
- body: JSON.stringify({
757
- status: "completed",
758
- controlMetrics: finalControlMetrics,
759
- variantMetrics: finalVariantMetrics,
760
- controlResponse: controlResult.response,
761
- variantResponse: variantResult.response,
762
- }),
763
- });
1273
+ // Don't send first message here - let the auto-send effect handle it
1274
+ // This ensures all messages go through the same code path and prevents duplicates
764
1275
  } catch (err) {
765
1276
  setError(err instanceof Error ? err.message : "Failed to run comparison");
766
- } finally {
767
1277
  setIsRunning(false);
768
1278
  }
769
- }, [run, config, runId]);
1279
+ // biome-ignore lint/correctness/useExhaustiveDependencies: stable refs
1280
+ }, [run, config, userMessages, initialAutoRun, runId, createSession]);
770
1281
 
771
1282
  // Function to fetch existing or trigger new session analysis
772
1283
  const triggerAnalysis = useCallback(
773
- async (sessionId: string, type: "control" | "variant") => {
1284
+ async (sessionId: string, type: "original" | "control" | "variant") => {
774
1285
  const setLoading =
775
- type === "control"
776
- ? setControlAnalysisLoading
777
- : setVariantAnalysisLoading;
1286
+ type === "original"
1287
+ ? setOriginalAnalysisLoading
1288
+ : type === "control"
1289
+ ? setControlAnalysisLoading
1290
+ : setVariantAnalysisLoading;
778
1291
  const setAnalysis =
779
- type === "control" ? setControlAnalysis : setVariantAnalysis;
1292
+ type === "original"
1293
+ ? setOriginalAnalysis
1294
+ : type === "control"
1295
+ ? setControlAnalysis
1296
+ : setVariantAnalysis;
780
1297
 
781
1298
  setLoading(true);
782
1299
  try {
@@ -812,47 +1329,6 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
812
1329
  [],
813
1330
  );
814
1331
 
815
- // Auto-trigger analysis when sessions complete
816
- useEffect(() => {
817
- // Control session completed
818
- if (
819
- controlState.sessionId &&
820
- !controlState.isStreaming &&
821
- controlState.metrics &&
822
- !controlAnalysis &&
823
- !controlAnalysisLoading
824
- ) {
825
- triggerAnalysis(controlState.sessionId, "control");
826
- }
827
- }, [
828
- controlState.sessionId,
829
- controlState.isStreaming,
830
- controlState.metrics,
831
- controlAnalysis,
832
- controlAnalysisLoading,
833
- triggerAnalysis,
834
- ]);
835
-
836
- useEffect(() => {
837
- // Variant session completed
838
- if (
839
- variantState.sessionId &&
840
- !variantState.isStreaming &&
841
- variantState.metrics &&
842
- !variantAnalysis &&
843
- !variantAnalysisLoading
844
- ) {
845
- triggerAnalysis(variantState.sessionId, "variant");
846
- }
847
- }, [
848
- variantState.sessionId,
849
- variantState.isStreaming,
850
- variantState.metrics,
851
- variantAnalysis,
852
- variantAnalysisLoading,
853
- triggerAnalysis,
854
- ]);
855
-
856
1332
  if (loading) {
857
1333
  return (
858
1334
  <DebuggerLayout title="Comparison" showBackButton backHref="/town-hall">
@@ -924,38 +1400,216 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
924
1400
  <div className="container mx-auto p-4 h-[calc(100vh-4rem)] flex flex-col overflow-hidden">
925
1401
  {/* Header */}
926
1402
  <div className="flex items-center justify-between mb-4">
927
- <div>
928
- <h2 className="text-lg font-semibold">A/B Comparison</h2>
929
- <p className="text-sm text-muted-foreground">
930
- Comparing: {getDimensionsSummary()}
931
- </p>
1403
+ <div className="flex items-center gap-3">
1404
+ <div>
1405
+ <h2 className="text-lg font-semibold">A/B Comparison</h2>
1406
+ <p className="text-sm text-muted-foreground">
1407
+ Comparing: {getDimensionsSummary()}
1408
+ </p>
1409
+ </div>
1410
+ {/* Message count badge when running */}
1411
+ {hasRun && userMessages.length > 1 && (
1412
+ <div className="flex items-center gap-2 px-3 py-1.5 rounded-full bg-muted text-sm">
1413
+ <span className="font-medium">
1414
+ {queueState.currentIndex + 1}/{userMessages.length}
1415
+ </span>
1416
+ <span className="text-muted-foreground">messages</span>
1417
+ {queueState.status === "completed" && (
1418
+ <span className="text-green-600 dark:text-green-400 text-xs">
1419
+ Complete
1420
+ </span>
1421
+ )}
1422
+ </div>
1423
+ )}
1424
+ </div>
1425
+ <div className="flex items-center gap-2">
1426
+ {/* Comparison Analysis button - shown when comparison is complete */}
1427
+ {hasRun &&
1428
+ queueState.status === "completed" &&
1429
+ (hasComparisonAnalysis ? (
1430
+ <>
1431
+ <Button
1432
+ variant="outline"
1433
+ size="sm"
1434
+ onClick={showComparisonAnalysis}
1435
+ disabled={comparisonAnalysisLoading}
1436
+ >
1437
+ {comparisonAnalysisLoading ? (
1438
+ <Loader2 className="w-4 h-4 mr-2 animate-spin" />
1439
+ ) : (
1440
+ <BarChart3 className="w-4 h-4 mr-2" />
1441
+ )}
1442
+ Show Analysis
1443
+ </Button>
1444
+ <Button
1445
+ variant="ghost"
1446
+ size="sm"
1447
+ onClick={runComparisonAnalysis}
1448
+ disabled={comparisonAnalysisLoading}
1449
+ >
1450
+ Re-analyze
1451
+ </Button>
1452
+ </>
1453
+ ) : (
1454
+ <Button
1455
+ variant="outline"
1456
+ size="sm"
1457
+ onClick={runComparisonAnalysis}
1458
+ disabled={comparisonAnalysisLoading}
1459
+ >
1460
+ {comparisonAnalysisLoading ? (
1461
+ <Loader2 className="w-4 h-4 mr-2 animate-spin" />
1462
+ ) : (
1463
+ <BarChart3 className="w-4 h-4 mr-2" />
1464
+ )}
1465
+ Analyze Comparison
1466
+ </Button>
1467
+ ))}
1468
+ {!hasRun && (
1469
+ <Button
1470
+ onClick={runComparison}
1471
+ disabled={isRunning || userMessages.length === 0}
1472
+ >
1473
+ {isRunning ? "Running..." : "Start Comparison"}
1474
+ </Button>
1475
+ )}
932
1476
  </div>
933
- {!hasRun && (
934
- <Button onClick={runComparison} disabled={isRunning}>
935
- {isRunning ? "Running..." : "Run Comparison"}
936
- </Button>
937
- )}
938
1477
  </div>
939
1478
 
1479
+ {/* Queue Banner - shown when there's a staged message waiting */}
1480
+ {hasRun &&
1481
+ queueState.status === "running" &&
1482
+ queueState.stagedIndex > queueState.currentIndex &&
1483
+ queueState.stagedIndex < userMessages.length &&
1484
+ !controlState.isStreaming &&
1485
+ !variantState.isStreaming && (
1486
+ <div className="mb-4 p-3 rounded-lg border bg-muted/50 flex items-center gap-4">
1487
+ <div className="flex-1">
1488
+ <div className="text-xs font-medium text-muted-foreground mb-1">
1489
+ Next message ready (#{queueState.stagedIndex + 1})
1490
+ </div>
1491
+ <div className="text-sm truncate">
1492
+ {userMessages[queueState.stagedIndex]?.slice(0, 100)}
1493
+ {(userMessages[queueState.stagedIndex]?.length ?? 0) > 100
1494
+ ? "..."
1495
+ : ""}
1496
+ </div>
1497
+ </div>
1498
+ <div className="flex items-center gap-2 shrink-0">
1499
+ {/* Per-arm send buttons when that arm is not auto-running */}
1500
+ {!controlState.autoRun &&
1501
+ controlState.turnIndex === queueState.currentIndex && (
1502
+ <Button
1503
+ size="sm"
1504
+ variant="outline"
1505
+ onClick={() => sendStagedToArm("control")}
1506
+ className="text-blue-600 border-blue-300 hover:bg-blue-50 dark:text-blue-400 dark:border-blue-700 dark:hover:bg-blue-950"
1507
+ >
1508
+ <Play className="w-3 h-3 mr-1" />
1509
+ Control
1510
+ </Button>
1511
+ )}
1512
+ {!variantState.autoRun &&
1513
+ variantState.turnIndex === queueState.currentIndex && (
1514
+ <Button
1515
+ size="sm"
1516
+ variant="outline"
1517
+ onClick={() => sendStagedToArm("variant")}
1518
+ className="text-orange-600 border-orange-300 hover:bg-orange-50 dark:text-orange-400 dark:border-orange-700 dark:hover:bg-orange-950"
1519
+ >
1520
+ <Play className="w-3 h-3 mr-1" />
1521
+ Variant
1522
+ </Button>
1523
+ )}
1524
+ {/* Send to both button */}
1525
+ {!controlState.autoRun &&
1526
+ !variantState.autoRun &&
1527
+ controlState.turnIndex === queueState.currentIndex &&
1528
+ variantState.turnIndex === queueState.currentIndex && (
1529
+ <Button size="sm" onClick={sendStagedToBoth}>
1530
+ <Play className="w-3 h-3 mr-1" />
1531
+ Send to Both
1532
+ </Button>
1533
+ )}
1534
+ </div>
1535
+ </div>
1536
+ )}
1537
+
940
1538
  {/* Pre-run state */}
941
1539
  {!hasRun && (
942
1540
  <div className="flex-1 flex items-center justify-center">
943
- <Card className="max-w-md w-full">
1541
+ <Card className="max-w-lg w-full">
944
1542
  <CardHeader className="text-center">
945
1543
  <CardTitle>Ready to Compare</CardTitle>
946
1544
  <CardDescription>
947
- This comparison will send the same prompt to both
948
- configurations and display the results side by side.
1545
+ This comparison will replay {userMessages.length} user message
1546
+ {userMessages.length !== 1 ? "s" : ""} to both configurations
1547
+ and display the results side by side.
949
1548
  </CardDescription>
950
1549
  </CardHeader>
951
1550
  <CardContent className="space-y-4">
952
- <div className="bg-muted rounded-lg p-4">
1551
+ {/* User messages list */}
1552
+ <div className="bg-muted rounded-lg p-4 max-h-64 overflow-y-auto">
953
1553
  <div className="text-xs font-medium uppercase text-muted-foreground mb-2">
954
- First message
1554
+ User Messages ({userMessages.length})
1555
+ </div>
1556
+ <div className="space-y-2">
1557
+ {userMessages.map((msg, idx) => (
1558
+ <details
1559
+ key={`user-msg-${msg.slice(0, 50)}-${idx}`}
1560
+ className="group"
1561
+ >
1562
+ <summary className="text-sm cursor-pointer flex items-center gap-2 hover:text-foreground">
1563
+ <span className="text-xs font-mono text-muted-foreground w-5">
1564
+ {idx + 1}.
1565
+ </span>
1566
+ <span className="truncate flex-1">
1567
+ {msg.slice(0, 80)}
1568
+ {msg.length > 80 ? "..." : ""}
1569
+ </span>
1570
+ <ChevronDown className="w-3 h-3 text-muted-foreground group-open:rotate-180 transition-transform" />
1571
+ </summary>
1572
+ <div className="mt-2 ml-7 text-sm whitespace-pre-wrap bg-background/50 rounded p-2 text-muted-foreground">
1573
+ {msg}
1574
+ </div>
1575
+ </details>
1576
+ ))}
955
1577
  </div>
956
- <div className="text-sm">{run?.firstUserMessage}</div>
957
1578
  </div>
958
- <div className="grid grid-cols-2 gap-4 text-sm">
1579
+
1580
+ {/* Auto-run checkbox */}
1581
+ {userMessages.length > 1 && (
1582
+ <div className="flex items-center gap-3 p-3 rounded-lg border bg-background">
1583
+ <Checkbox
1584
+ id="auto-run"
1585
+ checked={initialAutoRun}
1586
+ onCheckedChange={(checked) => setInitialAutoRun(checked)}
1587
+ />
1588
+ <div className="flex-1">
1589
+ <label
1590
+ htmlFor="auto-run"
1591
+ className="text-sm font-medium cursor-pointer"
1592
+ >
1593
+ Auto run all messages
1594
+ </label>
1595
+ <p className="text-xs text-muted-foreground">
1596
+ If off, next messages are enqueued after each turn.
1597
+ </p>
1598
+ </div>
1599
+ </div>
1600
+ )}
1601
+
1602
+ {/* Original vs Control vs Variant labels */}
1603
+ <div className="grid grid-cols-3 gap-4 text-sm">
1604
+ <div className="space-y-1">
1605
+ <div className="flex items-center gap-2">
1606
+ <span className="w-2 h-2 rounded-full bg-yellow-500" />
1607
+ <span className="font-medium">Original</span>
1608
+ </div>
1609
+ <div className="text-muted-foreground text-xs">
1610
+ Source session
1611
+ </div>
1612
+ </div>
959
1613
  <div className="space-y-1">
960
1614
  <div className="flex items-center gap-2">
961
1615
  <span className="w-2 h-2 rounded-full bg-blue-500" />
@@ -980,16 +1634,165 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
980
1634
  </div>
981
1635
  )}
982
1636
 
983
- {/* Side-by-side comparison */}
1637
+ {/* Side-by-side comparison - 3 panes: Original, Control, Variant */}
984
1638
  {hasRun && (
985
- <div className="grid grid-cols-2 gap-4 flex-1 min-h-0">
986
- {/* Control */}
1639
+ <div className="grid grid-cols-3 gap-4 flex-1 min-h-0">
1640
+ {/* Original (Source Session - Read Only) */}
987
1641
  <Card className="flex flex-col h-full min-h-0 overflow-hidden">
988
1642
  <CardHeader className="py-3 border-b shrink-0">
989
1643
  <CardTitle className="text-sm flex items-center gap-2">
990
- <span className="w-2 h-2 rounded-full bg-blue-500" />
991
- Control (Original)
1644
+ <span className="w-2 h-2 rounded-full bg-yellow-500" />
1645
+ Original Session
992
1646
  </CardTitle>
1647
+ <CardDescription className="text-xs">
1648
+ Source session (read-only)
1649
+ </CardDescription>
1650
+ </CardHeader>
1651
+ <CardContent className="flex-1 overflow-auto py-4">
1652
+ {originalMessages.map((msg, i) => (
1653
+ <div
1654
+ key={`original-${msg.type}-${i}`}
1655
+ className={`mb-4 ${
1656
+ msg.type === "user"
1657
+ ? "text-yellow-600 dark:text-yellow-400"
1658
+ : msg.type === "tool_call"
1659
+ ? ""
1660
+ : ""
1661
+ }`}
1662
+ >
1663
+ {msg.type === "tool_call" ? (
1664
+ <details className="rounded bg-muted/50 border text-xs group">
1665
+ <summary className="flex items-center gap-2 py-1.5 px-2 cursor-pointer list-none">
1666
+ <span className="text-muted-foreground">🔧</span>
1667
+ <span className="font-medium flex-1">
1668
+ {msg.toolName || msg.content}
1669
+ </span>
1670
+ <ChevronDown className="w-3 h-3 text-muted-foreground group-open:rotate-180 transition-transform" />
1671
+ </summary>
1672
+ <div className="px-2 pb-2 space-y-2 border-t mt-1 pt-2">
1673
+ {msg.toolInput !== null &&
1674
+ msg.toolInput !== undefined && (
1675
+ <div>
1676
+ <div className="text-[10px] font-semibold text-muted-foreground mb-1">
1677
+ Args
1678
+ </div>
1679
+ <pre className="text-[11px] bg-background/50 rounded p-1.5 overflow-x-auto max-h-32 whitespace-pre-wrap break-words">
1680
+ {typeof msg.toolInput === "string"
1681
+ ? msg.toolInput
1682
+ : JSON.stringify(msg.toolInput, null, 2)}
1683
+ </pre>
1684
+ </div>
1685
+ )}
1686
+ {msg.toolOutput !== null &&
1687
+ msg.toolOutput !== undefined && (
1688
+ <div>
1689
+ <div className="text-[10px] font-semibold text-muted-foreground mb-1">
1690
+ Result
1691
+ </div>
1692
+ <pre className="text-[11px] bg-background/50 rounded p-1.5 overflow-x-auto max-h-32 whitespace-pre-wrap break-words">
1693
+ {typeof msg.toolOutput === "string"
1694
+ ? msg.toolOutput
1695
+ : JSON.stringify(msg.toolOutput, null, 2)}
1696
+ </pre>
1697
+ </div>
1698
+ )}
1699
+ {(msg.toolInput === null ||
1700
+ msg.toolInput === undefined) &&
1701
+ (msg.toolOutput === null ||
1702
+ msg.toolOutput === undefined) && (
1703
+ <div className="text-muted-foreground text-[11px]">
1704
+ No input/output data available
1705
+ </div>
1706
+ )}
1707
+ </div>
1708
+ </details>
1709
+ ) : (
1710
+ <>
1711
+ <div className="text-xs font-medium uppercase mb-1">
1712
+ {msg.type === "user" ? "USER" : "ASSISTANT"}
1713
+ </div>
1714
+ <div className="text-sm whitespace-pre-wrap">
1715
+ {msg.content}
1716
+ </div>
1717
+ </>
1718
+ )}
1719
+ </div>
1720
+ ))}
1721
+ {originalMessages.length === 0 && (
1722
+ <div className="text-sm text-muted-foreground">
1723
+ No messages in source session
1724
+ </div>
1725
+ )}
1726
+ </CardContent>
1727
+ {/* Session Analysis & Tool Calls for Original */}
1728
+ {originalMetrics && (
1729
+ <div className="border-t p-3 shrink-0 bg-muted/50 space-y-3">
1730
+ {/* Session Analysis */}
1731
+ <SessionAnalysisPanel
1732
+ analysis={originalAnalysis}
1733
+ isLoading={originalAnalysisLoading}
1734
+ isExpanded={analysisExpanded.original}
1735
+ onToggle={() =>
1736
+ setAnalysisExpanded((prev) => ({
1737
+ ...prev,
1738
+ original: !prev.original,
1739
+ }))
1740
+ }
1741
+ onRunAnalysis={() =>
1742
+ run?.sourceSessionId &&
1743
+ triggerAnalysis(run.sourceSessionId, "original")
1744
+ }
1745
+ accentColor="yellow"
1746
+ />
1747
+ {/* Tool Calls */}
1748
+ <ToolCallsPanel
1749
+ toolCalls={originalMetrics.toolCalls}
1750
+ isExpanded={toolCallsExpanded.original}
1751
+ onToggle={() =>
1752
+ setToolCallsExpanded((prev) => ({
1753
+ ...prev,
1754
+ original: !prev.original,
1755
+ }))
1756
+ }
1757
+ accentColor="yellow"
1758
+ />
1759
+ </div>
1760
+ )}
1761
+ </Card>
1762
+
1763
+ {/* Control */}
1764
+ <Card className="flex flex-col h-full min-h-0 overflow-hidden">
1765
+ <CardHeader className="py-3 border-b shrink-0">
1766
+ <div className="flex items-center justify-between">
1767
+ <CardTitle className="text-sm flex items-center gap-2">
1768
+ <span className="w-2 h-2 rounded-full bg-blue-500" />
1769
+ Control (Rerun)
1770
+ {controlState.isStreaming && (
1771
+ <Loader2 className="w-3 h-3 animate-spin text-blue-500" />
1772
+ )}
1773
+ </CardTitle>
1774
+ {/* Auto-run toggle for Control */}
1775
+ {userMessages.length > 1 &&
1776
+ queueState.status === "running" && (
1777
+ <button
1778
+ type="button"
1779
+ onClick={() => toggleAutoRun("control")}
1780
+ className="flex items-center gap-1.5 text-xs text-muted-foreground hover:text-foreground transition-colors"
1781
+ title={
1782
+ controlState.autoRun
1783
+ ? "Disable auto-run"
1784
+ : "Enable auto-run"
1785
+ }
1786
+ >
1787
+ {controlState.autoRun ? (
1788
+ <ToggleRight className="w-4 h-4 text-blue-500" />
1789
+ ) : (
1790
+ <ToggleLeft className="w-4 h-4" />
1791
+ )}
1792
+ <span>Auto</span>
1793
+ </button>
1794
+ )}
1795
+ </div>
993
1796
  <CardDescription className="text-xs">
994
1797
  {getControlDimensionLabel()}
995
1798
  </CardDescription>
@@ -997,25 +1800,96 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
997
1800
  <CardContent className="flex-1 overflow-auto py-4">
998
1801
  {controlState.messages.map((msg, i) => (
999
1802
  <div
1000
- key={i}
1001
- className={`mb-4 ${msg.role === "user" ? "text-blue-600 dark:text-blue-400" : ""}`}
1803
+ key={`control-${msg.type}-${i}`}
1804
+ className={`mb-4 ${
1805
+ msg.type === "user"
1806
+ ? "text-blue-600 dark:text-blue-400"
1807
+ : msg.type === "tool_call"
1808
+ ? ""
1809
+ : ""
1810
+ }`}
1002
1811
  >
1003
- <div className="text-xs font-medium uppercase mb-1">
1004
- {msg.role}
1005
- </div>
1006
- <div className="text-sm whitespace-pre-wrap">
1007
- {msg.content}
1008
- {controlState.isStreaming &&
1009
- msg.role === "assistant" &&
1010
- i === controlState.messages.length - 1 && (
1011
- <span className="animate-pulse">▊</span>
1012
- )}
1013
- </div>
1812
+ {msg.type === "tool_call" ? (
1813
+ <details className="rounded bg-muted/50 border text-xs group">
1814
+ <summary className="flex items-center gap-2 py-1.5 px-2 cursor-pointer list-none">
1815
+ <span className="text-muted-foreground">🔧</span>
1816
+ <span className="font-medium flex-1">
1817
+ {msg.toolName || msg.content}
1818
+ </span>
1819
+ <ChevronDown className="w-3 h-3 text-muted-foreground group-open:rotate-180 transition-transform" />
1820
+ </summary>
1821
+ <div className="px-2 pb-2 space-y-2 border-t mt-1 pt-2">
1822
+ {msg.toolInput !== null &&
1823
+ msg.toolInput !== undefined && (
1824
+ <div>
1825
+ <div className="text-[10px] font-semibold text-muted-foreground mb-1">
1826
+ Args
1827
+ </div>
1828
+ <pre className="text-[11px] bg-background/50 rounded p-1.5 overflow-x-auto max-h-32 whitespace-pre-wrap break-words">
1829
+ {typeof msg.toolInput === "string"
1830
+ ? msg.toolInput
1831
+ : JSON.stringify(msg.toolInput, null, 2)}
1832
+ </pre>
1833
+ </div>
1834
+ )}
1835
+ {msg.toolOutput !== null &&
1836
+ msg.toolOutput !== undefined && (
1837
+ <div>
1838
+ <div className="text-[10px] font-semibold text-muted-foreground mb-1">
1839
+ Result
1840
+ </div>
1841
+ <pre className="text-[11px] bg-background/50 rounded p-1.5 overflow-x-auto max-h-32 whitespace-pre-wrap break-words">
1842
+ {typeof msg.toolOutput === "string"
1843
+ ? msg.toolOutput
1844
+ : JSON.stringify(msg.toolOutput, null, 2)}
1845
+ </pre>
1846
+ </div>
1847
+ )}
1848
+ {(msg.toolInput === null ||
1849
+ msg.toolInput === undefined) &&
1850
+ (msg.toolOutput === null ||
1851
+ msg.toolOutput === undefined) && (
1852
+ <div className="text-muted-foreground text-[11px]">
1853
+ No input/output data available
1854
+ </div>
1855
+ )}
1856
+ </div>
1857
+ </details>
1858
+ ) : (
1859
+ <>
1860
+ <div className="text-xs font-medium uppercase mb-1">
1861
+ {msg.type === "user" ? "USER" : "ASSISTANT"}
1862
+ </div>
1863
+ <div className="text-sm whitespace-pre-wrap">
1864
+ {msg.content}
1865
+ {controlState.isStreaming &&
1866
+ msg.type === "assistant" &&
1867
+ i === controlState.messages.length - 1 && (
1868
+ <span className="animate-pulse">▊</span>
1869
+ )}
1870
+ </div>
1871
+ </>
1872
+ )}
1014
1873
  </div>
1015
1874
  ))}
1016
1875
  {controlState.error && (
1017
- <div className="text-red-500 text-sm">
1018
- Error: {controlState.error}
1876
+ <div className="p-3 rounded-lg border border-red-200 bg-red-50 dark:border-red-800 dark:bg-red-950/30">
1877
+ <div className="text-red-600 dark:text-red-400 text-sm mb-2">
1878
+ Error: {controlState.error}
1879
+ </div>
1880
+ {queueState.status === "running" && (
1881
+ <Button
1882
+ size="sm"
1883
+ variant="outline"
1884
+ onClick={() => {
1885
+ setControlState((prev) => ({ ...prev, error: null }));
1886
+ sendStagedToArm("control");
1887
+ }}
1888
+ className="text-red-600 border-red-300 hover:bg-red-100 dark:text-red-400"
1889
+ >
1890
+ Retry
1891
+ </Button>
1892
+ )}
1019
1893
  </div>
1020
1894
  )}
1021
1895
  </CardContent>
@@ -1033,6 +1907,10 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
1033
1907
  control: !prev.control,
1034
1908
  }))
1035
1909
  }
1910
+ onRunAnalysis={() =>
1911
+ controlState.sessionId &&
1912
+ triggerAnalysis(controlState.sessionId, "control")
1913
+ }
1036
1914
  accentColor="blue"
1037
1915
  />
1038
1916
  {/* Tool Calls */}
@@ -1054,10 +1932,36 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
1054
1932
  {/* Variant */}
1055
1933
  <Card className="flex flex-col h-full min-h-0 overflow-hidden">
1056
1934
  <CardHeader className="py-3 border-b shrink-0">
1057
- <CardTitle className="text-sm flex items-center gap-2">
1058
- <span className="w-2 h-2 rounded-full bg-orange-500" />
1059
- Variant
1060
- </CardTitle>
1935
+ <div className="flex items-center justify-between">
1936
+ <CardTitle className="text-sm flex items-center gap-2">
1937
+ <span className="w-2 h-2 rounded-full bg-orange-500" />
1938
+ Variant
1939
+ {variantState.isStreaming && (
1940
+ <Loader2 className="w-3 h-3 animate-spin text-orange-500" />
1941
+ )}
1942
+ </CardTitle>
1943
+ {/* Auto-run toggle for Variant */}
1944
+ {userMessages.length > 1 &&
1945
+ queueState.status === "running" && (
1946
+ <button
1947
+ type="button"
1948
+ onClick={() => toggleAutoRun("variant")}
1949
+ className="flex items-center gap-1.5 text-xs text-muted-foreground hover:text-foreground transition-colors"
1950
+ title={
1951
+ variantState.autoRun
1952
+ ? "Disable auto-run"
1953
+ : "Enable auto-run"
1954
+ }
1955
+ >
1956
+ {variantState.autoRun ? (
1957
+ <ToggleRight className="w-4 h-4 text-orange-500" />
1958
+ ) : (
1959
+ <ToggleLeft className="w-4 h-4" />
1960
+ )}
1961
+ <span>Auto</span>
1962
+ </button>
1963
+ )}
1964
+ </div>
1061
1965
  <CardDescription className="text-xs">
1062
1966
  {getDimensionLabel()}
1063
1967
  </CardDescription>
@@ -1065,25 +1969,96 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
1065
1969
  <CardContent className="flex-1 overflow-auto py-4">
1066
1970
  {variantState.messages.map((msg, i) => (
1067
1971
  <div
1068
- key={i}
1069
- className={`mb-4 ${msg.role === "user" ? "text-orange-600 dark:text-orange-400" : ""}`}
1972
+ key={`variant-${msg.type}-${i}`}
1973
+ className={`mb-4 ${
1974
+ msg.type === "user"
1975
+ ? "text-orange-600 dark:text-orange-400"
1976
+ : msg.type === "tool_call"
1977
+ ? ""
1978
+ : ""
1979
+ }`}
1070
1980
  >
1071
- <div className="text-xs font-medium uppercase mb-1">
1072
- {msg.role}
1073
- </div>
1074
- <div className="text-sm whitespace-pre-wrap">
1075
- {msg.content}
1076
- {variantState.isStreaming &&
1077
- msg.role === "assistant" &&
1078
- i === variantState.messages.length - 1 && (
1079
- <span className="animate-pulse">▊</span>
1080
- )}
1081
- </div>
1981
+ {msg.type === "tool_call" ? (
1982
+ <details className="rounded bg-muted/50 border text-xs group">
1983
+ <summary className="flex items-center gap-2 py-1.5 px-2 cursor-pointer list-none">
1984
+ <span className="text-muted-foreground">🔧</span>
1985
+ <span className="font-medium flex-1">
1986
+ {msg.toolName || msg.content}
1987
+ </span>
1988
+ <ChevronDown className="w-3 h-3 text-muted-foreground group-open:rotate-180 transition-transform" />
1989
+ </summary>
1990
+ <div className="px-2 pb-2 space-y-2 border-t mt-1 pt-2">
1991
+ {msg.toolInput !== null &&
1992
+ msg.toolInput !== undefined && (
1993
+ <div>
1994
+ <div className="text-[10px] font-semibold text-muted-foreground mb-1">
1995
+ Args
1996
+ </div>
1997
+ <pre className="text-[11px] bg-background/50 rounded p-1.5 overflow-x-auto max-h-32 whitespace-pre-wrap break-words">
1998
+ {typeof msg.toolInput === "string"
1999
+ ? msg.toolInput
2000
+ : JSON.stringify(msg.toolInput, null, 2)}
2001
+ </pre>
2002
+ </div>
2003
+ )}
2004
+ {msg.toolOutput !== null &&
2005
+ msg.toolOutput !== undefined && (
2006
+ <div>
2007
+ <div className="text-[10px] font-semibold text-muted-foreground mb-1">
2008
+ Result
2009
+ </div>
2010
+ <pre className="text-[11px] bg-background/50 rounded p-1.5 overflow-x-auto max-h-32 whitespace-pre-wrap break-words">
2011
+ {typeof msg.toolOutput === "string"
2012
+ ? msg.toolOutput
2013
+ : JSON.stringify(msg.toolOutput, null, 2)}
2014
+ </pre>
2015
+ </div>
2016
+ )}
2017
+ {(msg.toolInput === null ||
2018
+ msg.toolInput === undefined) &&
2019
+ (msg.toolOutput === null ||
2020
+ msg.toolOutput === undefined) && (
2021
+ <div className="text-muted-foreground text-[11px]">
2022
+ No input/output data available
2023
+ </div>
2024
+ )}
2025
+ </div>
2026
+ </details>
2027
+ ) : (
2028
+ <>
2029
+ <div className="text-xs font-medium uppercase mb-1">
2030
+ {msg.type === "user" ? "USER" : "ASSISTANT"}
2031
+ </div>
2032
+ <div className="text-sm whitespace-pre-wrap">
2033
+ {msg.content}
2034
+ {variantState.isStreaming &&
2035
+ msg.type === "assistant" &&
2036
+ i === variantState.messages.length - 1 && (
2037
+ <span className="animate-pulse">▊</span>
2038
+ )}
2039
+ </div>
2040
+ </>
2041
+ )}
1082
2042
  </div>
1083
2043
  ))}
1084
2044
  {variantState.error && (
1085
- <div className="text-red-500 text-sm">
1086
- Error: {variantState.error}
2045
+ <div className="p-3 rounded-lg border border-red-200 bg-red-50 dark:border-red-800 dark:bg-red-950/30">
2046
+ <div className="text-red-600 dark:text-red-400 text-sm mb-2">
2047
+ Error: {variantState.error}
2048
+ </div>
2049
+ {queueState.status === "running" && (
2050
+ <Button
2051
+ size="sm"
2052
+ variant="outline"
2053
+ onClick={() => {
2054
+ setVariantState((prev) => ({ ...prev, error: null }));
2055
+ sendStagedToArm("variant");
2056
+ }}
2057
+ className="text-red-600 border-red-300 hover:bg-red-100 dark:text-red-400"
2058
+ >
2059
+ Retry
2060
+ </Button>
2061
+ )}
1087
2062
  </div>
1088
2063
  )}
1089
2064
  </CardContent>
@@ -1101,6 +2076,10 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
1101
2076
  variant: !prev.variant,
1102
2077
  }))
1103
2078
  }
2079
+ onRunAnalysis={() =>
2080
+ variantState.sessionId &&
2081
+ triggerAnalysis(variantState.sessionId, "variant")
2082
+ }
1104
2083
  accentColor="orange"
1105
2084
  />
1106
2085
  {/* Tool Calls */}
@@ -1121,6 +2100,15 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
1121
2100
  </div>
1122
2101
  )}
1123
2102
  </div>
2103
+
2104
+ {/* Comparison Analysis Dialog */}
2105
+ {comparisonAnalysis && (
2106
+ <ComparisonAnalysisDialog
2107
+ open={comparisonAnalysisDialogOpen}
2108
+ onClose={() => setComparisonAnalysisDialogOpen(false)}
2109
+ analysis={comparisonAnalysis}
2110
+ />
2111
+ )}
1124
2112
  </DebuggerLayout>
1125
2113
  );
1126
2114
  }