@townco/debugger 0.1.31 → 0.1.33
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +14 -14
- package/src/App.tsx +1 -0
- package/src/analysis/analyzer.ts +1 -2
- package/src/analysis/comparison-analyzer.ts +528 -0
- package/src/analysis/comparison-schema.ts +151 -0
- package/src/analysis/comparison-types.ts +194 -0
- package/src/analysis-db.ts +13 -6
- package/src/comparison-db.ts +75 -3
- package/src/components/AnalyzeAllButton.tsx +6 -2
- package/src/components/ComparisonAnalysisDialog.tsx +591 -0
- package/src/components/DebuggerHeader.tsx +0 -1
- package/src/components/LogList.tsx +9 -0
- package/src/components/SessionTraceList.tsx +9 -0
- package/src/components/SpanDetailsPanel.tsx +20 -1
- package/src/components/SpanTimeline.tsx +31 -4
- package/src/components/SpanTree.tsx +10 -1
- package/src/components/TurnMetadataPanel.tsx +0 -1
- package/src/components/UnifiedTimeline.tsx +25 -35
- package/src/components/ui/button.tsx +1 -1
- package/src/components/ui/card.tsx +1 -1
- package/src/components/ui/checkbox.tsx +43 -0
- package/src/components/ui/input.tsx +1 -1
- package/src/components/ui/label.tsx +1 -1
- package/src/components/ui/select.tsx +1 -1
- package/src/components/ui/textarea.tsx +1 -1
- package/src/frontend.tsx +2 -0
- package/src/lib/metrics.test.ts +2 -0
- package/src/lib/turnExtractor.ts +28 -0
- package/src/pages/ComparisonView.tsx +1310 -322
- package/src/pages/FindSessions.tsx +3 -1
- package/src/pages/TownHall.tsx +30 -14
- package/src/server.ts +177 -7
- package/src/types.ts +4 -0
- package/styles/globals.css +120 -0
- package/tsconfig.json +2 -2
|
@@ -1,5 +1,13 @@
|
|
|
1
|
-
import {
|
|
2
|
-
|
|
1
|
+
import {
|
|
2
|
+
BarChart3,
|
|
3
|
+
ChevronDown,
|
|
4
|
+
ChevronUp,
|
|
5
|
+
Loader2,
|
|
6
|
+
Play,
|
|
7
|
+
ToggleLeft,
|
|
8
|
+
ToggleRight,
|
|
9
|
+
} from "lucide-react";
|
|
10
|
+
import { useCallback, useEffect, useRef, useState } from "react";
|
|
3
11
|
import { Button } from "@/components/ui/button";
|
|
4
12
|
import {
|
|
5
13
|
Card,
|
|
@@ -8,7 +16,10 @@ import {
|
|
|
8
16
|
CardHeader,
|
|
9
17
|
CardTitle,
|
|
10
18
|
} from "@/components/ui/card";
|
|
19
|
+
import { Checkbox } from "@/components/ui/checkbox";
|
|
20
|
+
import type { SessionComparisonAnalysis } from "../analysis/comparison-types";
|
|
11
21
|
import type { SessionAnalysis } from "../analysis/types";
|
|
22
|
+
import { ComparisonAnalysisDialog } from "../components/ComparisonAnalysisDialog";
|
|
12
23
|
import { DebuggerLayout } from "../components/DebuggerLayout";
|
|
13
24
|
import { formatCost, formatDuration, formatTokens } from "../lib/metrics";
|
|
14
25
|
import type { ComparisonConfig, ComparisonRun, SessionMetrics } from "../types";
|
|
@@ -17,17 +28,30 @@ interface ComparisonViewProps {
|
|
|
17
28
|
runId: string;
|
|
18
29
|
}
|
|
19
30
|
|
|
20
|
-
|
|
21
|
-
|
|
31
|
+
// Conversation item that can be user message, assistant message, or tool call
|
|
32
|
+
interface ConversationItem {
|
|
33
|
+
type: "user" | "assistant" | "tool_call";
|
|
22
34
|
content: string;
|
|
35
|
+
toolName?: string | undefined;
|
|
36
|
+
toolInput?: unknown;
|
|
37
|
+
toolOutput?: unknown;
|
|
23
38
|
}
|
|
24
39
|
|
|
25
40
|
interface SessionState {
|
|
26
41
|
sessionId: string | null;
|
|
27
|
-
messages:
|
|
42
|
+
messages: ConversationItem[];
|
|
28
43
|
isStreaming: boolean;
|
|
44
|
+
isSending: boolean; // true while sending is in progress (before streaming starts)
|
|
29
45
|
metrics: SessionMetrics | null;
|
|
30
46
|
error: string | null;
|
|
47
|
+
autoRun: boolean;
|
|
48
|
+
turnIndex: number; // last completed user message index for this arm
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
interface QueueState {
|
|
52
|
+
currentIndex: number; // last completed turn (both arms finished)
|
|
53
|
+
stagedIndex: number; // next user message ready to send
|
|
54
|
+
status: "idle" | "running" | "completed";
|
|
31
55
|
}
|
|
32
56
|
|
|
33
57
|
const AGENT_SERVER_URL =
|
|
@@ -41,37 +65,60 @@ function SessionAnalysisPanel({
|
|
|
41
65
|
isLoading,
|
|
42
66
|
isExpanded,
|
|
43
67
|
onToggle,
|
|
68
|
+
onRunAnalysis,
|
|
44
69
|
accentColor,
|
|
45
70
|
}: {
|
|
46
71
|
analysis: SessionAnalysis | null;
|
|
47
72
|
isLoading: boolean;
|
|
48
73
|
isExpanded: boolean;
|
|
49
74
|
onToggle: () => void;
|
|
50
|
-
|
|
75
|
+
onRunAnalysis: () => void;
|
|
76
|
+
accentColor: "yellow" | "blue" | "orange";
|
|
51
77
|
}) {
|
|
52
78
|
const colorClasses =
|
|
53
|
-
accentColor === "
|
|
54
|
-
? "border-
|
|
55
|
-
:
|
|
79
|
+
accentColor === "yellow"
|
|
80
|
+
? "border-yellow-200 dark:border-yellow-800 bg-yellow-50/50 dark:bg-yellow-950/30"
|
|
81
|
+
: accentColor === "blue"
|
|
82
|
+
? "border-blue-200 dark:border-blue-800 bg-blue-50/50 dark:bg-blue-950/30"
|
|
83
|
+
: "border-orange-200 dark:border-orange-800 bg-orange-50/50 dark:bg-orange-950/30";
|
|
56
84
|
|
|
57
85
|
const headerColorClasses =
|
|
58
|
-
accentColor === "
|
|
59
|
-
? "hover:bg-
|
|
60
|
-
:
|
|
86
|
+
accentColor === "yellow"
|
|
87
|
+
? "hover:bg-yellow-100/50 dark:hover:bg-yellow-900/30"
|
|
88
|
+
: accentColor === "blue"
|
|
89
|
+
? "hover:bg-blue-100/50 dark:hover:bg-blue-900/30"
|
|
90
|
+
: "hover:bg-orange-100/50 dark:hover:bg-orange-900/30";
|
|
61
91
|
|
|
62
92
|
if (isLoading) {
|
|
63
93
|
return (
|
|
64
94
|
<div className={`border rounded-md p-3 ${colorClasses}`}>
|
|
65
95
|
<div className="flex items-center gap-2 text-xs text-muted-foreground">
|
|
66
96
|
<Loader2 className="w-3 h-3 animate-spin" />
|
|
67
|
-
|
|
97
|
+
Running analysis...
|
|
68
98
|
</div>
|
|
69
99
|
</div>
|
|
70
100
|
);
|
|
71
101
|
}
|
|
72
102
|
|
|
73
103
|
if (!analysis) {
|
|
74
|
-
return
|
|
104
|
+
return (
|
|
105
|
+
<div className={`border rounded-md p-3 ${colorClasses}`}>
|
|
106
|
+
<div className="flex items-center justify-between">
|
|
107
|
+
<span className="text-xs text-muted-foreground">
|
|
108
|
+
Session Analysis
|
|
109
|
+
</span>
|
|
110
|
+
<Button
|
|
111
|
+
size="sm"
|
|
112
|
+
variant="outline"
|
|
113
|
+
onClick={onRunAnalysis}
|
|
114
|
+
className="h-6 text-xs px-2"
|
|
115
|
+
>
|
|
116
|
+
<Play className="w-3 h-3 mr-1" />
|
|
117
|
+
Run Analysis
|
|
118
|
+
</Button>
|
|
119
|
+
</div>
|
|
120
|
+
</div>
|
|
121
|
+
);
|
|
75
122
|
}
|
|
76
123
|
|
|
77
124
|
return (
|
|
@@ -203,17 +250,21 @@ function ToolCallsPanel({
|
|
|
203
250
|
toolCalls: SessionMetrics["toolCalls"];
|
|
204
251
|
isExpanded: boolean;
|
|
205
252
|
onToggle: () => void;
|
|
206
|
-
accentColor: "blue" | "orange";
|
|
253
|
+
accentColor: "yellow" | "blue" | "orange";
|
|
207
254
|
}) {
|
|
208
255
|
const colorClasses =
|
|
209
|
-
accentColor === "
|
|
210
|
-
? "border-
|
|
211
|
-
:
|
|
256
|
+
accentColor === "yellow"
|
|
257
|
+
? "border-yellow-200 dark:border-yellow-800 bg-yellow-50/50 dark:bg-yellow-950/30"
|
|
258
|
+
: accentColor === "blue"
|
|
259
|
+
? "border-blue-200 dark:border-blue-800 bg-blue-50/50 dark:bg-blue-950/30"
|
|
260
|
+
: "border-orange-200 dark:border-orange-800 bg-orange-50/50 dark:bg-orange-950/30";
|
|
212
261
|
|
|
213
262
|
const headerColorClasses =
|
|
214
|
-
accentColor === "
|
|
215
|
-
? "hover:bg-
|
|
216
|
-
:
|
|
263
|
+
accentColor === "yellow"
|
|
264
|
+
? "hover:bg-yellow-100/50 dark:hover:bg-yellow-900/30"
|
|
265
|
+
: accentColor === "blue"
|
|
266
|
+
? "hover:bg-blue-100/50 dark:hover:bg-blue-900/30"
|
|
267
|
+
: "hover:bg-orange-100/50 dark:hover:bg-orange-900/30";
|
|
217
268
|
|
|
218
269
|
const toolCallCount = toolCalls?.length ?? 0;
|
|
219
270
|
|
|
@@ -294,86 +345,369 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
|
|
|
294
345
|
const [loading, setLoading] = useState(true);
|
|
295
346
|
const [error, setError] = useState<string | null>(null);
|
|
296
347
|
|
|
348
|
+
// User messages from source session
|
|
349
|
+
const [userMessages, setUserMessages] = useState<string[]>([]);
|
|
350
|
+
const [initialAutoRun, setInitialAutoRun] = useState(false);
|
|
351
|
+
|
|
352
|
+
// Queue state for multi-message replay
|
|
353
|
+
const [queueState, setQueueState] = useState<QueueState>({
|
|
354
|
+
currentIndex: -1,
|
|
355
|
+
stagedIndex: 0,
|
|
356
|
+
status: "idle",
|
|
357
|
+
});
|
|
358
|
+
|
|
297
359
|
// Session states
|
|
298
360
|
const [controlState, setControlState] = useState<SessionState>({
|
|
299
361
|
sessionId: null,
|
|
300
362
|
messages: [],
|
|
301
363
|
isStreaming: false,
|
|
364
|
+
isSending: false,
|
|
302
365
|
metrics: null,
|
|
303
366
|
error: null,
|
|
367
|
+
autoRun: false,
|
|
368
|
+
turnIndex: -1,
|
|
304
369
|
});
|
|
305
370
|
const [variantState, setVariantState] = useState<SessionState>({
|
|
306
371
|
sessionId: null,
|
|
307
372
|
messages: [],
|
|
308
373
|
isStreaming: false,
|
|
374
|
+
isSending: false,
|
|
309
375
|
metrics: null,
|
|
310
376
|
error: null,
|
|
377
|
+
autoRun: false,
|
|
378
|
+
turnIndex: -1,
|
|
311
379
|
});
|
|
312
380
|
|
|
381
|
+
// Refs for stable callbacks
|
|
382
|
+
const controlStateRef = useRef(controlState);
|
|
383
|
+
const variantStateRef = useRef(variantState);
|
|
384
|
+
const queueStateRef = useRef(queueState);
|
|
385
|
+
const userMessagesRef = useRef(userMessages);
|
|
386
|
+
|
|
387
|
+
// Separate refs for send locks - these update synchronously to prevent race conditions
|
|
388
|
+
const controlSendingRef = useRef(false);
|
|
389
|
+
const variantSendingRef = useRef(false);
|
|
390
|
+
|
|
391
|
+
useEffect(() => {
|
|
392
|
+
controlStateRef.current = controlState;
|
|
393
|
+
}, [controlState]);
|
|
394
|
+
useEffect(() => {
|
|
395
|
+
variantStateRef.current = variantState;
|
|
396
|
+
}, [variantState]);
|
|
397
|
+
useEffect(() => {
|
|
398
|
+
queueStateRef.current = queueState;
|
|
399
|
+
}, [queueState]);
|
|
400
|
+
useEffect(() => {
|
|
401
|
+
userMessagesRef.current = userMessages;
|
|
402
|
+
}, [userMessages]);
|
|
403
|
+
|
|
313
404
|
const [isRunning, setIsRunning] = useState(false);
|
|
314
405
|
const [hasRun, setHasRun] = useState(false);
|
|
315
406
|
|
|
407
|
+
// Original source session state (read-only, for reference)
|
|
408
|
+
const [originalMessages, setOriginalMessages] = useState<ConversationItem[]>(
|
|
409
|
+
[],
|
|
410
|
+
);
|
|
411
|
+
const [originalMetrics, setOriginalMetrics] = useState<SessionMetrics | null>(
|
|
412
|
+
null,
|
|
413
|
+
);
|
|
414
|
+
|
|
316
415
|
// Session analysis state
|
|
416
|
+
const [originalAnalysis, setOriginalAnalysis] =
|
|
417
|
+
useState<SessionAnalysis | null>(null);
|
|
317
418
|
const [controlAnalysis, setControlAnalysis] =
|
|
318
419
|
useState<SessionAnalysis | null>(null);
|
|
319
420
|
const [variantAnalysis, setVariantAnalysis] =
|
|
320
421
|
useState<SessionAnalysis | null>(null);
|
|
422
|
+
const [originalAnalysisLoading, setOriginalAnalysisLoading] = useState(false);
|
|
321
423
|
const [controlAnalysisLoading, setControlAnalysisLoading] = useState(false);
|
|
322
424
|
const [variantAnalysisLoading, setVariantAnalysisLoading] = useState(false);
|
|
323
425
|
const [analysisExpanded, setAnalysisExpanded] = useState<{
|
|
426
|
+
original: boolean;
|
|
324
427
|
control: boolean;
|
|
325
428
|
variant: boolean;
|
|
326
429
|
}>({
|
|
430
|
+
original: false,
|
|
327
431
|
control: false,
|
|
328
432
|
variant: false,
|
|
329
433
|
});
|
|
330
434
|
const [toolCallsExpanded, setToolCallsExpanded] = useState<{
|
|
435
|
+
original: boolean;
|
|
331
436
|
control: boolean;
|
|
332
437
|
variant: boolean;
|
|
333
438
|
}>({
|
|
439
|
+
original: false,
|
|
334
440
|
control: false,
|
|
335
441
|
variant: false,
|
|
336
442
|
});
|
|
337
443
|
|
|
338
|
-
//
|
|
444
|
+
// Comparison analysis state
|
|
445
|
+
const [comparisonAnalysis, setComparisonAnalysis] =
|
|
446
|
+
useState<SessionComparisonAnalysis | null>(null);
|
|
447
|
+
const [comparisonAnalysisLoading, setComparisonAnalysisLoading] =
|
|
448
|
+
useState(false);
|
|
449
|
+
const [comparisonAnalysisDialogOpen, setComparisonAnalysisDialogOpen] =
|
|
450
|
+
useState(false);
|
|
451
|
+
const [hasComparisonAnalysis, setHasComparisonAnalysis] = useState(false);
|
|
452
|
+
|
|
453
|
+
// Check if comparison analysis exists
|
|
339
454
|
useEffect(() => {
|
|
340
|
-
|
|
341
|
-
fetch(`/api/comparison-
|
|
342
|
-
|
|
343
|
-
|
|
455
|
+
if (runId) {
|
|
456
|
+
fetch(`/api/comparison-analysis/${runId}/exists`)
|
|
457
|
+
.then((res) => res.json())
|
|
458
|
+
.then((data) => {
|
|
459
|
+
setHasComparisonAnalysis(data.exists);
|
|
460
|
+
})
|
|
461
|
+
.catch(() => {
|
|
462
|
+
setHasComparisonAnalysis(false);
|
|
463
|
+
});
|
|
464
|
+
}
|
|
465
|
+
}, [runId]);
|
|
466
|
+
|
|
467
|
+
// Function to run comparison analysis
|
|
468
|
+
const runComparisonAnalysis = async () => {
|
|
469
|
+
setComparisonAnalysisLoading(true);
|
|
470
|
+
try {
|
|
471
|
+
const res = await fetch(`/api/analyze-comparison/${runId}`, {
|
|
472
|
+
method: "POST",
|
|
473
|
+
});
|
|
474
|
+
if (!res.ok) {
|
|
475
|
+
const error = await res.json();
|
|
476
|
+
throw new Error(error.error || "Analysis failed");
|
|
477
|
+
}
|
|
478
|
+
const analysis = await res.json();
|
|
479
|
+
setComparisonAnalysis(analysis);
|
|
480
|
+
setHasComparisonAnalysis(true);
|
|
481
|
+
setComparisonAnalysisDialogOpen(true);
|
|
482
|
+
} catch (error) {
|
|
483
|
+
console.error("Comparison analysis error:", error);
|
|
484
|
+
alert(
|
|
485
|
+
`Analysis failed: ${error instanceof Error ? error.message : "Unknown error"}`,
|
|
486
|
+
);
|
|
487
|
+
} finally {
|
|
488
|
+
setComparisonAnalysisLoading(false);
|
|
489
|
+
}
|
|
490
|
+
};
|
|
491
|
+
|
|
492
|
+
// Function to show existing comparison analysis
|
|
493
|
+
const showComparisonAnalysis = async () => {
|
|
494
|
+
if (comparisonAnalysis) {
|
|
495
|
+
setComparisonAnalysisDialogOpen(true);
|
|
496
|
+
return;
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
setComparisonAnalysisLoading(true);
|
|
500
|
+
try {
|
|
501
|
+
const res = await fetch(`/api/comparison-analysis/${runId}`);
|
|
502
|
+
if (!res.ok) {
|
|
503
|
+
throw new Error("Analysis not found");
|
|
504
|
+
}
|
|
505
|
+
const analysis = await res.json();
|
|
506
|
+
setComparisonAnalysis(analysis);
|
|
507
|
+
setComparisonAnalysisDialogOpen(true);
|
|
508
|
+
} catch (error) {
|
|
509
|
+
console.error("Error fetching comparison analysis:", error);
|
|
510
|
+
} finally {
|
|
511
|
+
setComparisonAnalysisLoading(false);
|
|
512
|
+
}
|
|
513
|
+
};
|
|
514
|
+
|
|
515
|
+
// Fetch comparison run details, conversation, and restore saved messages
|
|
516
|
+
useEffect(() => {
|
|
517
|
+
let runData: ComparisonRun;
|
|
518
|
+
|
|
519
|
+
fetch(`/api/comparison-run/${runId}`)
|
|
520
|
+
.then((res) => res.json())
|
|
521
|
+
.then(async (data) => {
|
|
522
|
+
runData = data;
|
|
344
523
|
setRun(runData);
|
|
345
524
|
|
|
346
|
-
//
|
|
525
|
+
// Fetch conversation from source session to get all user messages
|
|
526
|
+
const conversationRes = await fetch(
|
|
527
|
+
`/api/session-conversation?sessionId=${runData.sourceSessionId}`,
|
|
528
|
+
);
|
|
529
|
+
const conversation = await conversationRes.json();
|
|
530
|
+
|
|
531
|
+
// Extract user messages in order AND build original conversation with tool calls
|
|
532
|
+
const messages: string[] = [];
|
|
533
|
+
const origMessages: ConversationItem[] = [];
|
|
534
|
+
for (const trace of conversation) {
|
|
535
|
+
if (trace.userInput) {
|
|
536
|
+
messages.push(trace.userInput);
|
|
537
|
+
origMessages.push({
|
|
538
|
+
type: "user" as const,
|
|
539
|
+
content: trace.userInput,
|
|
540
|
+
});
|
|
541
|
+
}
|
|
542
|
+
// Use agentMessages which includes both tool_calls and chat messages in order
|
|
543
|
+
if (trace.agentMessages && Array.isArray(trace.agentMessages)) {
|
|
544
|
+
for (const msg of trace.agentMessages) {
|
|
545
|
+
if (msg.type === "tool_call") {
|
|
546
|
+
origMessages.push({
|
|
547
|
+
type: "tool_call" as const,
|
|
548
|
+
content: msg.toolName || msg.content,
|
|
549
|
+
toolName: msg.toolName,
|
|
550
|
+
toolInput: msg.toolInput,
|
|
551
|
+
toolOutput: msg.toolOutput,
|
|
552
|
+
});
|
|
553
|
+
} else if (msg.type === "chat" && msg.content?.trim()) {
|
|
554
|
+
origMessages.push({
|
|
555
|
+
type: "assistant" as const,
|
|
556
|
+
content: msg.content,
|
|
557
|
+
});
|
|
558
|
+
}
|
|
559
|
+
}
|
|
560
|
+
} else if (trace.llmOutput) {
|
|
561
|
+
// Fallback if no agentMessages
|
|
562
|
+
origMessages.push({
|
|
563
|
+
type: "assistant" as const,
|
|
564
|
+
content: trace.llmOutput,
|
|
565
|
+
});
|
|
566
|
+
}
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
// If no messages found in conversation, fall back to firstUserMessage
|
|
570
|
+
if (messages.length === 0 && runData.firstUserMessage) {
|
|
571
|
+
messages.push(runData.firstUserMessage);
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
setUserMessages(messages);
|
|
575
|
+
setOriginalMessages(origMessages);
|
|
576
|
+
|
|
577
|
+
// Fetch metrics for the original source session
|
|
578
|
+
if (runData.sourceSessionId) {
|
|
579
|
+
try {
|
|
580
|
+
const metricsRes = await fetch(
|
|
581
|
+
`/api/session-metrics/${runData.sourceSessionId}?model=${encodeURIComponent(config?.controlModel || "claude-sonnet-4-5-20250929")}`,
|
|
582
|
+
);
|
|
583
|
+
if (metricsRes.ok) {
|
|
584
|
+
const metrics = await metricsRes.json();
|
|
585
|
+
setOriginalMetrics(metrics);
|
|
586
|
+
}
|
|
587
|
+
} catch (err) {
|
|
588
|
+
console.error("Failed to fetch original session metrics:", err);
|
|
589
|
+
}
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
// Restore saved messages if the run has been completed or running
|
|
347
593
|
if (runData.status === "completed" || runData.status === "running") {
|
|
348
594
|
setHasRun(true);
|
|
349
595
|
|
|
596
|
+
// Fetch full conversation history from control and variant sessions
|
|
597
|
+
const [controlConversation, variantConversation] = await Promise.all([
|
|
598
|
+
runData.controlSessionId
|
|
599
|
+
? fetch(
|
|
600
|
+
`/api/session-conversation?sessionId=${runData.controlSessionId}`,
|
|
601
|
+
).then((res) => res.json())
|
|
602
|
+
: Promise.resolve([]),
|
|
603
|
+
runData.variantSessionId
|
|
604
|
+
? fetch(
|
|
605
|
+
`/api/session-conversation?sessionId=${runData.variantSessionId}`,
|
|
606
|
+
).then((res) => res.json())
|
|
607
|
+
: Promise.resolve([]),
|
|
608
|
+
]);
|
|
609
|
+
|
|
610
|
+
// Convert traces to conversation items (including tool calls)
|
|
611
|
+
const tracesToConversationItems = (
|
|
612
|
+
traces: Array<{
|
|
613
|
+
userInput?: string;
|
|
614
|
+
llmOutput?: string;
|
|
615
|
+
agentMessages?: Array<{
|
|
616
|
+
type: string;
|
|
617
|
+
content?: string;
|
|
618
|
+
toolName?: string;
|
|
619
|
+
toolInput?: unknown;
|
|
620
|
+
toolOutput?: unknown;
|
|
621
|
+
}>;
|
|
622
|
+
}>,
|
|
623
|
+
): ConversationItem[] => {
|
|
624
|
+
const items: ConversationItem[] = [];
|
|
625
|
+
for (const trace of traces) {
|
|
626
|
+
if (trace.userInput) {
|
|
627
|
+
items.push({
|
|
628
|
+
type: "user" as const,
|
|
629
|
+
content: trace.userInput,
|
|
630
|
+
});
|
|
631
|
+
}
|
|
632
|
+
// Use agentMessages which includes both tool_calls and chat messages in order
|
|
633
|
+
if (trace.agentMessages && Array.isArray(trace.agentMessages)) {
|
|
634
|
+
for (const msg of trace.agentMessages) {
|
|
635
|
+
if (msg.type === "tool_call") {
|
|
636
|
+
items.push({
|
|
637
|
+
type: "tool_call" as const,
|
|
638
|
+
content: msg.toolName || msg.content || "",
|
|
639
|
+
toolName: msg.toolName,
|
|
640
|
+
toolInput: msg.toolInput,
|
|
641
|
+
toolOutput: msg.toolOutput,
|
|
642
|
+
});
|
|
643
|
+
} else if (msg.type === "chat" && msg.content?.trim()) {
|
|
644
|
+
items.push({
|
|
645
|
+
type: "assistant" as const,
|
|
646
|
+
content: msg.content,
|
|
647
|
+
});
|
|
648
|
+
}
|
|
649
|
+
}
|
|
650
|
+
} else if (trace.llmOutput) {
|
|
651
|
+
// Fallback if no agentMessages
|
|
652
|
+
items.push({
|
|
653
|
+
type: "assistant" as const,
|
|
654
|
+
content: trace.llmOutput,
|
|
655
|
+
});
|
|
656
|
+
}
|
|
657
|
+
}
|
|
658
|
+
return items;
|
|
659
|
+
};
|
|
660
|
+
|
|
350
661
|
// Restore control messages
|
|
351
|
-
if (runData.
|
|
352
|
-
|
|
662
|
+
if (runData.controlSessionId) {
|
|
663
|
+
const controlMessages =
|
|
664
|
+
tracesToConversationItems(controlConversation);
|
|
665
|
+
// Count user messages for turnIndex
|
|
666
|
+
const controlUserCount = controlMessages.filter(
|
|
667
|
+
(m) => m.type === "user",
|
|
668
|
+
).length;
|
|
669
|
+
setControlState((prev) => ({
|
|
670
|
+
...prev,
|
|
353
671
|
sessionId: runData.controlSessionId,
|
|
354
|
-
messages:
|
|
355
|
-
{ role: "user", content: runData.firstUserMessage },
|
|
356
|
-
{ role: "assistant", content: runData.controlResponse },
|
|
357
|
-
],
|
|
672
|
+
messages: controlMessages,
|
|
358
673
|
isStreaming: false,
|
|
359
674
|
metrics: runData.controlMetrics,
|
|
360
675
|
error: null,
|
|
361
|
-
|
|
676
|
+
turnIndex: controlUserCount - 1,
|
|
677
|
+
}));
|
|
362
678
|
}
|
|
363
679
|
|
|
364
680
|
// Restore variant messages
|
|
365
|
-
if (runData.
|
|
366
|
-
|
|
681
|
+
if (runData.variantSessionId) {
|
|
682
|
+
const variantMessages =
|
|
683
|
+
tracesToConversationItems(variantConversation);
|
|
684
|
+
// Count user messages for turnIndex
|
|
685
|
+
const variantUserCount = variantMessages.filter(
|
|
686
|
+
(m) => m.type === "user",
|
|
687
|
+
).length;
|
|
688
|
+
setVariantState((prev) => ({
|
|
689
|
+
...prev,
|
|
367
690
|
sessionId: runData.variantSessionId,
|
|
368
|
-
messages:
|
|
369
|
-
{ role: "user", content: runData.firstUserMessage },
|
|
370
|
-
{ role: "assistant", content: runData.variantResponse },
|
|
371
|
-
],
|
|
691
|
+
messages: variantMessages,
|
|
372
692
|
isStreaming: false,
|
|
373
693
|
metrics: runData.variantMetrics,
|
|
374
694
|
error: null,
|
|
375
|
-
|
|
695
|
+
turnIndex: variantUserCount - 1,
|
|
696
|
+
}));
|
|
376
697
|
}
|
|
698
|
+
|
|
699
|
+
// Set queue state based on completed messages
|
|
700
|
+
const controlItems = tracesToConversationItems(controlConversation);
|
|
701
|
+
const variantItems = tracesToConversationItems(variantConversation);
|
|
702
|
+
const completedTurns = Math.min(
|
|
703
|
+
controlItems.filter((m) => m.type === "user").length,
|
|
704
|
+
variantItems.filter((m) => m.type === "user").length,
|
|
705
|
+
);
|
|
706
|
+
setQueueState({
|
|
707
|
+
currentIndex: completedTurns - 1,
|
|
708
|
+
stagedIndex: completedTurns,
|
|
709
|
+
status: runData.status === "completed" ? "completed" : "running",
|
|
710
|
+
});
|
|
377
711
|
}
|
|
378
712
|
|
|
379
713
|
// Fetch the config by the run's configId (not the latest config!)
|
|
@@ -389,7 +723,7 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
|
|
|
389
723
|
setError(err.message);
|
|
390
724
|
setLoading(false);
|
|
391
725
|
});
|
|
392
|
-
}, [runId]);
|
|
726
|
+
}, [runId, config?.controlModel]);
|
|
393
727
|
|
|
394
728
|
const generateRequestId = (prefix: string, sessionId?: string) => {
|
|
395
729
|
const randomPart =
|
|
@@ -446,7 +780,7 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
|
|
|
446
780
|
let abortController: AbortController | null = new AbortController();
|
|
447
781
|
|
|
448
782
|
// Start SSE connection (don't await - runs in background)
|
|
449
|
-
const
|
|
783
|
+
const _ssePromise = fetch(`${AGENT_SERVER_URL}/events`, {
|
|
450
784
|
headers: {
|
|
451
785
|
"X-Session-ID": sessionId,
|
|
452
786
|
},
|
|
@@ -535,29 +869,371 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
|
|
|
535
869
|
return accumulatedContent;
|
|
536
870
|
};
|
|
537
871
|
|
|
538
|
-
//
|
|
872
|
+
// Helper to fetch metrics with retry
|
|
873
|
+
const fetchMetricsWithRetry = useCallback(
|
|
874
|
+
async (
|
|
875
|
+
sessionId: string,
|
|
876
|
+
model: string,
|
|
877
|
+
duration: number,
|
|
878
|
+
): Promise<SessionMetrics> => {
|
|
879
|
+
const maxWaitMs = 60_000;
|
|
880
|
+
const pollIntervalMs = 2_000;
|
|
881
|
+
let elapsed = 0;
|
|
882
|
+
let previousTokens = -1;
|
|
883
|
+
let previousTools = -1;
|
|
884
|
+
let lastMetrics: SessionMetrics | null = null;
|
|
885
|
+
|
|
886
|
+
while (elapsed <= maxWaitMs) {
|
|
887
|
+
try {
|
|
888
|
+
const metricsRes = await fetch(
|
|
889
|
+
`/api/session-metrics/${sessionId}?model=${encodeURIComponent(model)}`,
|
|
890
|
+
);
|
|
891
|
+
const metrics = await metricsRes.json();
|
|
892
|
+
lastMetrics = { ...metrics, durationMs: duration };
|
|
893
|
+
|
|
894
|
+
// If tokens/tool calls stopped changing and we have data, treat as final.
|
|
895
|
+
if (
|
|
896
|
+
metrics.totalTokens > 0 &&
|
|
897
|
+
metrics.totalTokens === previousTokens &&
|
|
898
|
+
metrics.toolCallCount === previousTools
|
|
899
|
+
) {
|
|
900
|
+
// biome-ignore lint/style/noNonNullAssertion: lastMetrics is set in the loop
|
|
901
|
+
return lastMetrics!;
|
|
902
|
+
}
|
|
903
|
+
|
|
904
|
+
previousTokens = metrics.totalTokens ?? 0;
|
|
905
|
+
previousTools = metrics.toolCallCount ?? 0;
|
|
906
|
+
} catch {
|
|
907
|
+
// swallow and retry
|
|
908
|
+
}
|
|
909
|
+
|
|
910
|
+
await new Promise((r) => setTimeout(r, pollIntervalMs));
|
|
911
|
+
elapsed += pollIntervalMs;
|
|
912
|
+
}
|
|
913
|
+
|
|
914
|
+
// Return whatever we last saw (or zeros if nothing ever arrived)
|
|
915
|
+
return (
|
|
916
|
+
lastMetrics ?? {
|
|
917
|
+
durationMs: duration,
|
|
918
|
+
inputTokens: 0,
|
|
919
|
+
outputTokens: 0,
|
|
920
|
+
totalTokens: 0,
|
|
921
|
+
estimatedCost: 0,
|
|
922
|
+
toolCallCount: 0,
|
|
923
|
+
}
|
|
924
|
+
);
|
|
925
|
+
},
|
|
926
|
+
[],
|
|
927
|
+
);
|
|
928
|
+
|
|
929
|
+
// Send a single message to one arm and handle the response
|
|
930
|
+
const sendMessageToArm = useCallback(
|
|
931
|
+
async (
|
|
932
|
+
sessionId: string,
|
|
933
|
+
message: string,
|
|
934
|
+
messageIndex: number,
|
|
935
|
+
model: string,
|
|
936
|
+
arm: "control" | "variant",
|
|
937
|
+
startTime: number,
|
|
938
|
+
): Promise<{ response: string; metrics: SessionMetrics }> => {
|
|
939
|
+
const setState = arm === "control" ? setControlState : setVariantState;
|
|
940
|
+
|
|
941
|
+
try {
|
|
942
|
+
// Add user message and set streaming
|
|
943
|
+
setState((prev) => ({
|
|
944
|
+
...prev,
|
|
945
|
+
isStreaming: true,
|
|
946
|
+
messages: [...prev.messages, { type: "user", content: message }],
|
|
947
|
+
}));
|
|
948
|
+
|
|
949
|
+
const response = await sendMessageAndCollect(
|
|
950
|
+
sessionId,
|
|
951
|
+
message,
|
|
952
|
+
(content) => {
|
|
953
|
+
setState((prev) => {
|
|
954
|
+
// Find the last assistant message or add one
|
|
955
|
+
const messages = [...prev.messages];
|
|
956
|
+
const lastMsg = messages[messages.length - 1];
|
|
957
|
+
if (lastMsg && lastMsg.type === "assistant") {
|
|
958
|
+
messages[messages.length - 1] = {
|
|
959
|
+
type: "assistant",
|
|
960
|
+
content,
|
|
961
|
+
};
|
|
962
|
+
} else {
|
|
963
|
+
messages.push({ type: "assistant", content });
|
|
964
|
+
}
|
|
965
|
+
return { ...prev, messages };
|
|
966
|
+
});
|
|
967
|
+
},
|
|
968
|
+
);
|
|
969
|
+
|
|
970
|
+
const duration = Date.now() - startTime;
|
|
971
|
+
const metrics = await fetchMetricsWithRetry(sessionId, model, duration);
|
|
972
|
+
|
|
973
|
+
setState((prev) => ({
|
|
974
|
+
...prev,
|
|
975
|
+
isStreaming: false,
|
|
976
|
+
turnIndex: messageIndex,
|
|
977
|
+
metrics,
|
|
978
|
+
error: null,
|
|
979
|
+
}));
|
|
980
|
+
|
|
981
|
+
return { response, metrics };
|
|
982
|
+
} catch (err) {
|
|
983
|
+
setState((prev) => ({
|
|
984
|
+
...prev,
|
|
985
|
+
isStreaming: false,
|
|
986
|
+
error: err instanceof Error ? err.message : "Unknown error",
|
|
987
|
+
}));
|
|
988
|
+
return {
|
|
989
|
+
response: "",
|
|
990
|
+
metrics: {
|
|
991
|
+
durationMs: 0,
|
|
992
|
+
inputTokens: 0,
|
|
993
|
+
outputTokens: 0,
|
|
994
|
+
totalTokens: 0,
|
|
995
|
+
estimatedCost: 0,
|
|
996
|
+
toolCallCount: 0,
|
|
997
|
+
},
|
|
998
|
+
};
|
|
999
|
+
}
|
|
1000
|
+
},
|
|
1001
|
+
// biome-ignore lint/correctness/useExhaustiveDependencies: sendMessageAndCollect is stable
|
|
1002
|
+
[fetchMetricsWithRetry, sendMessageAndCollect],
|
|
1003
|
+
);
|
|
1004
|
+
|
|
1005
|
+
// Send staged message to a specific arm
|
|
1006
|
+
const sendStagedToArm = useCallback(
|
|
1007
|
+
async (arm: "control" | "variant") => {
|
|
1008
|
+
const state =
|
|
1009
|
+
arm === "control" ? controlStateRef.current : variantStateRef.current;
|
|
1010
|
+
const setState = arm === "control" ? setControlState : setVariantState;
|
|
1011
|
+
const sendingRef =
|
|
1012
|
+
arm === "control" ? controlSendingRef : variantSendingRef;
|
|
1013
|
+
const queue = queueStateRef.current;
|
|
1014
|
+
const messages = userMessagesRef.current;
|
|
1015
|
+
|
|
1016
|
+
// Check the synchronous ref first to prevent duplicate sends
|
|
1017
|
+
if (sendingRef.current) return;
|
|
1018
|
+
if (!state.sessionId || state.isStreaming) return;
|
|
1019
|
+
if (queue.stagedIndex >= messages.length) return;
|
|
1020
|
+
|
|
1021
|
+
const message = messages[queue.stagedIndex];
|
|
1022
|
+
if (!message) return;
|
|
1023
|
+
|
|
1024
|
+
// Set sending lock immediately (synchronously) to prevent race conditions
|
|
1025
|
+
sendingRef.current = true;
|
|
1026
|
+
setState((prev) => ({ ...prev, isSending: true }));
|
|
1027
|
+
|
|
1028
|
+
const model =
|
|
1029
|
+
arm === "control"
|
|
1030
|
+
? config?.controlModel || "claude-sonnet-4-5-20250929"
|
|
1031
|
+
: config?.variantModel ||
|
|
1032
|
+
config?.controlModel ||
|
|
1033
|
+
"claude-sonnet-4-5-20250929";
|
|
1034
|
+
|
|
1035
|
+
try {
|
|
1036
|
+
await sendMessageToArm(
|
|
1037
|
+
state.sessionId,
|
|
1038
|
+
message,
|
|
1039
|
+
queue.stagedIndex,
|
|
1040
|
+
model,
|
|
1041
|
+
arm,
|
|
1042
|
+
Date.now(),
|
|
1043
|
+
);
|
|
1044
|
+
} finally {
|
|
1045
|
+
// Clear sending lock after completion
|
|
1046
|
+
sendingRef.current = false;
|
|
1047
|
+
setState((prev) => ({ ...prev, isSending: false }));
|
|
1048
|
+
}
|
|
1049
|
+
},
|
|
1050
|
+
[config, sendMessageToArm],
|
|
1051
|
+
);
|
|
1052
|
+
|
|
1053
|
+
// Send staged message to both arms
|
|
1054
|
+
const sendStagedToBoth = useCallback(async () => {
|
|
1055
|
+
const control = controlStateRef.current;
|
|
1056
|
+
const variant = variantStateRef.current;
|
|
1057
|
+
|
|
1058
|
+
if (!control.sessionId || !variant.sessionId) return;
|
|
1059
|
+
if (control.isStreaming || variant.isStreaming) return;
|
|
1060
|
+
if (controlSendingRef.current || variantSendingRef.current) return;
|
|
1061
|
+
|
|
1062
|
+
await Promise.all([sendStagedToArm("control"), sendStagedToArm("variant")]);
|
|
1063
|
+
}, [sendStagedToArm]);
|
|
1064
|
+
|
|
1065
|
+
// Check and advance queue after both arms complete a turn
|
|
1066
|
+
useEffect(() => {
|
|
1067
|
+
// Both arms must have completed the same turn and not be in the middle of sending
|
|
1068
|
+
if (
|
|
1069
|
+
controlState.isStreaming ||
|
|
1070
|
+
variantState.isStreaming ||
|
|
1071
|
+
controlState.isSending ||
|
|
1072
|
+
variantState.isSending ||
|
|
1073
|
+
queueState.status !== "running"
|
|
1074
|
+
)
|
|
1075
|
+
return;
|
|
1076
|
+
if (controlState.turnIndex !== variantState.turnIndex) return;
|
|
1077
|
+
|
|
1078
|
+
const completedIndex = controlState.turnIndex;
|
|
1079
|
+
|
|
1080
|
+
// Advance currentIndex if both completed
|
|
1081
|
+
if (completedIndex > queueState.currentIndex) {
|
|
1082
|
+
const nextIndex = completedIndex + 1;
|
|
1083
|
+
|
|
1084
|
+
if (nextIndex >= userMessages.length) {
|
|
1085
|
+
// All messages completed
|
|
1086
|
+
setQueueState((prev) => ({
|
|
1087
|
+
...prev,
|
|
1088
|
+
currentIndex: completedIndex,
|
|
1089
|
+
status: "completed",
|
|
1090
|
+
}));
|
|
1091
|
+
setIsRunning(false);
|
|
1092
|
+
|
|
1093
|
+
// Persist final state
|
|
1094
|
+
if (run && controlState.sessionId && variantState.sessionId) {
|
|
1095
|
+
// Get last responses from messages
|
|
1096
|
+
const controlMsgs = controlState.messages;
|
|
1097
|
+
const variantMsgs = variantState.messages;
|
|
1098
|
+
const lastControlResponse =
|
|
1099
|
+
controlMsgs[controlMsgs.length - 1]?.type === "assistant"
|
|
1100
|
+
? controlMsgs[controlMsgs.length - 1]?.content
|
|
1101
|
+
: "";
|
|
1102
|
+
const lastVariantResponse =
|
|
1103
|
+
variantMsgs[variantMsgs.length - 1]?.type === "assistant"
|
|
1104
|
+
? variantMsgs[variantMsgs.length - 1]?.content
|
|
1105
|
+
: "";
|
|
1106
|
+
|
|
1107
|
+
fetch(`/api/comparison-run/${runId}/update`, {
|
|
1108
|
+
method: "POST",
|
|
1109
|
+
headers: { "Content-Type": "application/json" },
|
|
1110
|
+
body: JSON.stringify({
|
|
1111
|
+
status: "completed",
|
|
1112
|
+
controlMetrics: controlState.metrics,
|
|
1113
|
+
variantMetrics: variantState.metrics,
|
|
1114
|
+
controlResponse: lastControlResponse,
|
|
1115
|
+
variantResponse: lastVariantResponse,
|
|
1116
|
+
}),
|
|
1117
|
+
});
|
|
1118
|
+
}
|
|
1119
|
+
} else {
|
|
1120
|
+
// Stage next message
|
|
1121
|
+
setQueueState((prev) => ({
|
|
1122
|
+
...prev,
|
|
1123
|
+
currentIndex: completedIndex,
|
|
1124
|
+
stagedIndex: nextIndex,
|
|
1125
|
+
}));
|
|
1126
|
+
}
|
|
1127
|
+
}
|
|
1128
|
+
}, [
|
|
1129
|
+
controlState.isStreaming,
|
|
1130
|
+
controlState.isSending,
|
|
1131
|
+
controlState.turnIndex,
|
|
1132
|
+
controlState.messages,
|
|
1133
|
+
controlState.metrics,
|
|
1134
|
+
controlState.sessionId,
|
|
1135
|
+
variantState.isStreaming,
|
|
1136
|
+
variantState.isSending,
|
|
1137
|
+
variantState.turnIndex,
|
|
1138
|
+
variantState.messages,
|
|
1139
|
+
variantState.metrics,
|
|
1140
|
+
variantState.sessionId,
|
|
1141
|
+
queueState.status,
|
|
1142
|
+
queueState.currentIndex,
|
|
1143
|
+
userMessages.length,
|
|
1144
|
+
run,
|
|
1145
|
+
runId,
|
|
1146
|
+
]);
|
|
1147
|
+
|
|
1148
|
+
// Auto-send staged message when conditions are met
|
|
1149
|
+
useEffect(() => {
|
|
1150
|
+
if (queueState.status !== "running") return;
|
|
1151
|
+
if (queueState.stagedIndex >= userMessages.length) return;
|
|
1152
|
+
|
|
1153
|
+
const message = userMessages[queueState.stagedIndex];
|
|
1154
|
+
if (!message) return;
|
|
1155
|
+
|
|
1156
|
+
// Check if control should auto-send
|
|
1157
|
+
if (
|
|
1158
|
+
controlState.autoRun &&
|
|
1159
|
+
!controlState.isStreaming &&
|
|
1160
|
+
!controlState.isSending &&
|
|
1161
|
+
controlState.sessionId &&
|
|
1162
|
+
controlState.turnIndex === queueState.currentIndex
|
|
1163
|
+
) {
|
|
1164
|
+
sendStagedToArm("control");
|
|
1165
|
+
}
|
|
1166
|
+
|
|
1167
|
+
// Check if variant should auto-send
|
|
1168
|
+
if (
|
|
1169
|
+
variantState.autoRun &&
|
|
1170
|
+
!variantState.isStreaming &&
|
|
1171
|
+
!variantState.isSending &&
|
|
1172
|
+
variantState.sessionId &&
|
|
1173
|
+
variantState.turnIndex === queueState.currentIndex
|
|
1174
|
+
) {
|
|
1175
|
+
sendStagedToArm("variant");
|
|
1176
|
+
}
|
|
1177
|
+
}, [
|
|
1178
|
+
queueState.status,
|
|
1179
|
+
queueState.stagedIndex,
|
|
1180
|
+
queueState.currentIndex,
|
|
1181
|
+
userMessages,
|
|
1182
|
+
controlState.autoRun,
|
|
1183
|
+
controlState.isStreaming,
|
|
1184
|
+
controlState.isSending,
|
|
1185
|
+
controlState.sessionId,
|
|
1186
|
+
controlState.turnIndex,
|
|
1187
|
+
variantState.autoRun,
|
|
1188
|
+
variantState.isStreaming,
|
|
1189
|
+
variantState.isSending,
|
|
1190
|
+
variantState.sessionId,
|
|
1191
|
+
variantState.turnIndex,
|
|
1192
|
+
sendStagedToArm,
|
|
1193
|
+
]);
|
|
1194
|
+
|
|
1195
|
+
// Toggle auto-run for an arm
|
|
1196
|
+
const toggleAutoRun = useCallback((arm: "control" | "variant") => {
|
|
1197
|
+
const setState = arm === "control" ? setControlState : setVariantState;
|
|
1198
|
+
setState((prev) => ({ ...prev, autoRun: !prev.autoRun }));
|
|
1199
|
+
}, []);
|
|
1200
|
+
|
|
1201
|
+
// Start the comparison (initialize sessions, first message sent by auto-send effect)
|
|
539
1202
|
const runComparison = useCallback(async () => {
|
|
540
|
-
if (!run || !config) return;
|
|
1203
|
+
if (!run || !config || userMessages.length === 0) return;
|
|
541
1204
|
|
|
542
1205
|
setIsRunning(true);
|
|
543
1206
|
setHasRun(true);
|
|
544
1207
|
|
|
545
|
-
|
|
1208
|
+
// Reset sending refs
|
|
1209
|
+
controlSendingRef.current = false;
|
|
1210
|
+
variantSendingRef.current = false;
|
|
546
1211
|
|
|
547
|
-
// Reset states
|
|
1212
|
+
// Reset states with initial autoRun setting
|
|
548
1213
|
setControlState({
|
|
549
1214
|
sessionId: null,
|
|
550
|
-
messages: [
|
|
551
|
-
isStreaming:
|
|
1215
|
+
messages: [],
|
|
1216
|
+
isStreaming: false,
|
|
1217
|
+
isSending: false,
|
|
552
1218
|
metrics: null,
|
|
553
1219
|
error: null,
|
|
1220
|
+
autoRun: initialAutoRun,
|
|
1221
|
+
turnIndex: -1,
|
|
554
1222
|
});
|
|
555
1223
|
setVariantState({
|
|
556
1224
|
sessionId: null,
|
|
557
|
-
messages: [
|
|
558
|
-
isStreaming:
|
|
1225
|
+
messages: [],
|
|
1226
|
+
isStreaming: false,
|
|
1227
|
+
isSending: false,
|
|
559
1228
|
metrics: null,
|
|
560
1229
|
error: null,
|
|
1230
|
+
autoRun: initialAutoRun,
|
|
1231
|
+
turnIndex: -1,
|
|
1232
|
+
});
|
|
1233
|
+
setQueueState({
|
|
1234
|
+
currentIndex: -1,
|
|
1235
|
+
stagedIndex: 0,
|
|
1236
|
+
status: "running",
|
|
561
1237
|
});
|
|
562
1238
|
|
|
563
1239
|
try {
|
|
@@ -594,189 +1270,30 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
|
|
|
594
1270
|
}),
|
|
595
1271
|
});
|
|
596
1272
|
|
|
597
|
-
//
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
// Track final responses and metrics
|
|
601
|
-
let finalControlMetrics: SessionMetrics = {
|
|
602
|
-
durationMs: 0,
|
|
603
|
-
inputTokens: 0,
|
|
604
|
-
outputTokens: 0,
|
|
605
|
-
totalTokens: 0,
|
|
606
|
-
estimatedCost: 0,
|
|
607
|
-
toolCallCount: 0,
|
|
608
|
-
};
|
|
609
|
-
let finalVariantMetrics: SessionMetrics = {
|
|
610
|
-
durationMs: 0,
|
|
611
|
-
inputTokens: 0,
|
|
612
|
-
outputTokens: 0,
|
|
613
|
-
totalTokens: 0,
|
|
614
|
-
estimatedCost: 0,
|
|
615
|
-
toolCallCount: 0,
|
|
616
|
-
};
|
|
617
|
-
|
|
618
|
-
// Helper to run a session and fetch metrics
|
|
619
|
-
const runSession = async (
|
|
620
|
-
sessionId: string,
|
|
621
|
-
model: string,
|
|
622
|
-
setState: typeof setControlState,
|
|
623
|
-
onContentUpdate: (content: string) => void,
|
|
624
|
-
): Promise<{ response: string; metrics: SessionMetrics }> => {
|
|
625
|
-
try {
|
|
626
|
-
const response = await sendMessageAndCollect(
|
|
627
|
-
sessionId,
|
|
628
|
-
firstMessage,
|
|
629
|
-
onContentUpdate,
|
|
630
|
-
);
|
|
631
|
-
|
|
632
|
-
const duration = Date.now() - startTime;
|
|
633
|
-
|
|
634
|
-
// Poll metrics until they stabilize or we hit a max wait window.
|
|
635
|
-
const fetchMetricsWithRetry = async (): Promise<SessionMetrics> => {
|
|
636
|
-
const maxWaitMs = 60_000;
|
|
637
|
-
const pollIntervalMs = 2_000;
|
|
638
|
-
let elapsed = 0;
|
|
639
|
-
let previousTokens = -1;
|
|
640
|
-
let previousTools = -1;
|
|
641
|
-
let lastMetrics: SessionMetrics | null = null;
|
|
642
|
-
|
|
643
|
-
while (elapsed <= maxWaitMs) {
|
|
644
|
-
try {
|
|
645
|
-
const metricsRes = await fetch(
|
|
646
|
-
`/api/session-metrics/${sessionId}?model=${encodeURIComponent(model)}`,
|
|
647
|
-
);
|
|
648
|
-
const metrics = await metricsRes.json();
|
|
649
|
-
lastMetrics = { ...metrics, durationMs: duration };
|
|
650
|
-
|
|
651
|
-
// If tokens/tool calls stopped changing and we have data, treat as final.
|
|
652
|
-
if (
|
|
653
|
-
metrics.totalTokens > 0 &&
|
|
654
|
-
metrics.totalTokens === previousTokens &&
|
|
655
|
-
metrics.toolCallCount === previousTools
|
|
656
|
-
) {
|
|
657
|
-
return lastMetrics!;
|
|
658
|
-
}
|
|
659
|
-
|
|
660
|
-
previousTokens = metrics.totalTokens ?? 0;
|
|
661
|
-
previousTools = metrics.toolCallCount ?? 0;
|
|
662
|
-
} catch {
|
|
663
|
-
// swallow and retry
|
|
664
|
-
}
|
|
665
|
-
|
|
666
|
-
await new Promise((r) => setTimeout(r, pollIntervalMs));
|
|
667
|
-
elapsed += pollIntervalMs;
|
|
668
|
-
}
|
|
669
|
-
|
|
670
|
-
// Return whatever we last saw (or zeros if nothing ever arrived)
|
|
671
|
-
return (
|
|
672
|
-
lastMetrics ?? {
|
|
673
|
-
durationMs: duration,
|
|
674
|
-
inputTokens: 0,
|
|
675
|
-
outputTokens: 0,
|
|
676
|
-
totalTokens: 0,
|
|
677
|
-
estimatedCost: 0,
|
|
678
|
-
toolCallCount: 0,
|
|
679
|
-
}
|
|
680
|
-
);
|
|
681
|
-
};
|
|
682
|
-
|
|
683
|
-
const metrics = await fetchMetricsWithRetry();
|
|
684
|
-
|
|
685
|
-
setState((prev) => ({
|
|
686
|
-
...prev,
|
|
687
|
-
isStreaming: false,
|
|
688
|
-
metrics,
|
|
689
|
-
}));
|
|
690
|
-
|
|
691
|
-
return { response, metrics };
|
|
692
|
-
} catch (err) {
|
|
693
|
-
setState((prev) => ({
|
|
694
|
-
...prev,
|
|
695
|
-
isStreaming: false,
|
|
696
|
-
error: err instanceof Error ? err.message : "Unknown error",
|
|
697
|
-
}));
|
|
698
|
-
return {
|
|
699
|
-
response: "",
|
|
700
|
-
metrics: {
|
|
701
|
-
durationMs: 0,
|
|
702
|
-
inputTokens: 0,
|
|
703
|
-
outputTokens: 0,
|
|
704
|
-
totalTokens: 0,
|
|
705
|
-
estimatedCost: 0,
|
|
706
|
-
toolCallCount: 0,
|
|
707
|
-
},
|
|
708
|
-
};
|
|
709
|
-
}
|
|
710
|
-
};
|
|
711
|
-
|
|
712
|
-
const controlModel = config.controlModel || "claude-sonnet-4-5-20250929";
|
|
713
|
-
const variantModel =
|
|
714
|
-
config.variantModel ||
|
|
715
|
-
config.controlModel ||
|
|
716
|
-
"claude-sonnet-4-5-20250929";
|
|
717
|
-
|
|
718
|
-
const [controlResult, variantResult] = await Promise.all([
|
|
719
|
-
runSession(
|
|
720
|
-
controlSessionId,
|
|
721
|
-
controlModel,
|
|
722
|
-
setControlState,
|
|
723
|
-
(content) => {
|
|
724
|
-
setControlState((prev) => ({
|
|
725
|
-
...prev,
|
|
726
|
-
messages: [
|
|
727
|
-
{ role: "user", content: firstMessage },
|
|
728
|
-
{ role: "assistant", content },
|
|
729
|
-
],
|
|
730
|
-
}));
|
|
731
|
-
},
|
|
732
|
-
),
|
|
733
|
-
runSession(
|
|
734
|
-
variantSessionId,
|
|
735
|
-
variantModel,
|
|
736
|
-
setVariantState,
|
|
737
|
-
(content) => {
|
|
738
|
-
setVariantState((prev) => ({
|
|
739
|
-
...prev,
|
|
740
|
-
messages: [
|
|
741
|
-
{ role: "user", content: firstMessage },
|
|
742
|
-
{ role: "assistant", content },
|
|
743
|
-
],
|
|
744
|
-
}));
|
|
745
|
-
},
|
|
746
|
-
),
|
|
747
|
-
]);
|
|
748
|
-
|
|
749
|
-
finalControlMetrics = controlResult.metrics;
|
|
750
|
-
finalVariantMetrics = variantResult.metrics;
|
|
751
|
-
|
|
752
|
-
// Update run status with responses and metrics
|
|
753
|
-
await fetch(`/api/comparison-run/${runId}/update`, {
|
|
754
|
-
method: "POST",
|
|
755
|
-
headers: { "Content-Type": "application/json" },
|
|
756
|
-
body: JSON.stringify({
|
|
757
|
-
status: "completed",
|
|
758
|
-
controlMetrics: finalControlMetrics,
|
|
759
|
-
variantMetrics: finalVariantMetrics,
|
|
760
|
-
controlResponse: controlResult.response,
|
|
761
|
-
variantResponse: variantResult.response,
|
|
762
|
-
}),
|
|
763
|
-
});
|
|
1273
|
+
// Don't send first message here - let the auto-send effect handle it
|
|
1274
|
+
// This ensures all messages go through the same code path and prevents duplicates
|
|
764
1275
|
} catch (err) {
|
|
765
1276
|
setError(err instanceof Error ? err.message : "Failed to run comparison");
|
|
766
|
-
} finally {
|
|
767
1277
|
setIsRunning(false);
|
|
768
1278
|
}
|
|
769
|
-
|
|
1279
|
+
// biome-ignore lint/correctness/useExhaustiveDependencies: stable refs
|
|
1280
|
+
}, [run, config, userMessages, initialAutoRun, runId, createSession]);
|
|
770
1281
|
|
|
771
1282
|
// Function to fetch existing or trigger new session analysis
|
|
772
1283
|
const triggerAnalysis = useCallback(
|
|
773
|
-
async (sessionId: string, type: "control" | "variant") => {
|
|
1284
|
+
async (sessionId: string, type: "original" | "control" | "variant") => {
|
|
774
1285
|
const setLoading =
|
|
775
|
-
type === "
|
|
776
|
-
?
|
|
777
|
-
:
|
|
1286
|
+
type === "original"
|
|
1287
|
+
? setOriginalAnalysisLoading
|
|
1288
|
+
: type === "control"
|
|
1289
|
+
? setControlAnalysisLoading
|
|
1290
|
+
: setVariantAnalysisLoading;
|
|
778
1291
|
const setAnalysis =
|
|
779
|
-
type === "
|
|
1292
|
+
type === "original"
|
|
1293
|
+
? setOriginalAnalysis
|
|
1294
|
+
: type === "control"
|
|
1295
|
+
? setControlAnalysis
|
|
1296
|
+
: setVariantAnalysis;
|
|
780
1297
|
|
|
781
1298
|
setLoading(true);
|
|
782
1299
|
try {
|
|
@@ -812,47 +1329,6 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
|
|
|
812
1329
|
[],
|
|
813
1330
|
);
|
|
814
1331
|
|
|
815
|
-
// Auto-trigger analysis when sessions complete
|
|
816
|
-
useEffect(() => {
|
|
817
|
-
// Control session completed
|
|
818
|
-
if (
|
|
819
|
-
controlState.sessionId &&
|
|
820
|
-
!controlState.isStreaming &&
|
|
821
|
-
controlState.metrics &&
|
|
822
|
-
!controlAnalysis &&
|
|
823
|
-
!controlAnalysisLoading
|
|
824
|
-
) {
|
|
825
|
-
triggerAnalysis(controlState.sessionId, "control");
|
|
826
|
-
}
|
|
827
|
-
}, [
|
|
828
|
-
controlState.sessionId,
|
|
829
|
-
controlState.isStreaming,
|
|
830
|
-
controlState.metrics,
|
|
831
|
-
controlAnalysis,
|
|
832
|
-
controlAnalysisLoading,
|
|
833
|
-
triggerAnalysis,
|
|
834
|
-
]);
|
|
835
|
-
|
|
836
|
-
useEffect(() => {
|
|
837
|
-
// Variant session completed
|
|
838
|
-
if (
|
|
839
|
-
variantState.sessionId &&
|
|
840
|
-
!variantState.isStreaming &&
|
|
841
|
-
variantState.metrics &&
|
|
842
|
-
!variantAnalysis &&
|
|
843
|
-
!variantAnalysisLoading
|
|
844
|
-
) {
|
|
845
|
-
triggerAnalysis(variantState.sessionId, "variant");
|
|
846
|
-
}
|
|
847
|
-
}, [
|
|
848
|
-
variantState.sessionId,
|
|
849
|
-
variantState.isStreaming,
|
|
850
|
-
variantState.metrics,
|
|
851
|
-
variantAnalysis,
|
|
852
|
-
variantAnalysisLoading,
|
|
853
|
-
triggerAnalysis,
|
|
854
|
-
]);
|
|
855
|
-
|
|
856
1332
|
if (loading) {
|
|
857
1333
|
return (
|
|
858
1334
|
<DebuggerLayout title="Comparison" showBackButton backHref="/town-hall">
|
|
@@ -924,38 +1400,216 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
|
|
|
924
1400
|
<div className="container mx-auto p-4 h-[calc(100vh-4rem)] flex flex-col overflow-hidden">
|
|
925
1401
|
{/* Header */}
|
|
926
1402
|
<div className="flex items-center justify-between mb-4">
|
|
927
|
-
<div>
|
|
928
|
-
<
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
1403
|
+
<div className="flex items-center gap-3">
|
|
1404
|
+
<div>
|
|
1405
|
+
<h2 className="text-lg font-semibold">A/B Comparison</h2>
|
|
1406
|
+
<p className="text-sm text-muted-foreground">
|
|
1407
|
+
Comparing: {getDimensionsSummary()}
|
|
1408
|
+
</p>
|
|
1409
|
+
</div>
|
|
1410
|
+
{/* Message count badge when running */}
|
|
1411
|
+
{hasRun && userMessages.length > 1 && (
|
|
1412
|
+
<div className="flex items-center gap-2 px-3 py-1.5 rounded-full bg-muted text-sm">
|
|
1413
|
+
<span className="font-medium">
|
|
1414
|
+
{queueState.currentIndex + 1}/{userMessages.length}
|
|
1415
|
+
</span>
|
|
1416
|
+
<span className="text-muted-foreground">messages</span>
|
|
1417
|
+
{queueState.status === "completed" && (
|
|
1418
|
+
<span className="text-green-600 dark:text-green-400 text-xs">
|
|
1419
|
+
Complete
|
|
1420
|
+
</span>
|
|
1421
|
+
)}
|
|
1422
|
+
</div>
|
|
1423
|
+
)}
|
|
1424
|
+
</div>
|
|
1425
|
+
<div className="flex items-center gap-2">
|
|
1426
|
+
{/* Comparison Analysis button - shown when comparison is complete */}
|
|
1427
|
+
{hasRun &&
|
|
1428
|
+
queueState.status === "completed" &&
|
|
1429
|
+
(hasComparisonAnalysis ? (
|
|
1430
|
+
<>
|
|
1431
|
+
<Button
|
|
1432
|
+
variant="outline"
|
|
1433
|
+
size="sm"
|
|
1434
|
+
onClick={showComparisonAnalysis}
|
|
1435
|
+
disabled={comparisonAnalysisLoading}
|
|
1436
|
+
>
|
|
1437
|
+
{comparisonAnalysisLoading ? (
|
|
1438
|
+
<Loader2 className="w-4 h-4 mr-2 animate-spin" />
|
|
1439
|
+
) : (
|
|
1440
|
+
<BarChart3 className="w-4 h-4 mr-2" />
|
|
1441
|
+
)}
|
|
1442
|
+
Show Analysis
|
|
1443
|
+
</Button>
|
|
1444
|
+
<Button
|
|
1445
|
+
variant="ghost"
|
|
1446
|
+
size="sm"
|
|
1447
|
+
onClick={runComparisonAnalysis}
|
|
1448
|
+
disabled={comparisonAnalysisLoading}
|
|
1449
|
+
>
|
|
1450
|
+
Re-analyze
|
|
1451
|
+
</Button>
|
|
1452
|
+
</>
|
|
1453
|
+
) : (
|
|
1454
|
+
<Button
|
|
1455
|
+
variant="outline"
|
|
1456
|
+
size="sm"
|
|
1457
|
+
onClick={runComparisonAnalysis}
|
|
1458
|
+
disabled={comparisonAnalysisLoading}
|
|
1459
|
+
>
|
|
1460
|
+
{comparisonAnalysisLoading ? (
|
|
1461
|
+
<Loader2 className="w-4 h-4 mr-2 animate-spin" />
|
|
1462
|
+
) : (
|
|
1463
|
+
<BarChart3 className="w-4 h-4 mr-2" />
|
|
1464
|
+
)}
|
|
1465
|
+
Analyze Comparison
|
|
1466
|
+
</Button>
|
|
1467
|
+
))}
|
|
1468
|
+
{!hasRun && (
|
|
1469
|
+
<Button
|
|
1470
|
+
onClick={runComparison}
|
|
1471
|
+
disabled={isRunning || userMessages.length === 0}
|
|
1472
|
+
>
|
|
1473
|
+
{isRunning ? "Running..." : "Start Comparison"}
|
|
1474
|
+
</Button>
|
|
1475
|
+
)}
|
|
932
1476
|
</div>
|
|
933
|
-
{!hasRun && (
|
|
934
|
-
<Button onClick={runComparison} disabled={isRunning}>
|
|
935
|
-
{isRunning ? "Running..." : "Run Comparison"}
|
|
936
|
-
</Button>
|
|
937
|
-
)}
|
|
938
1477
|
</div>
|
|
939
1478
|
|
|
1479
|
+
{/* Queue Banner - shown when there's a staged message waiting */}
|
|
1480
|
+
{hasRun &&
|
|
1481
|
+
queueState.status === "running" &&
|
|
1482
|
+
queueState.stagedIndex > queueState.currentIndex &&
|
|
1483
|
+
queueState.stagedIndex < userMessages.length &&
|
|
1484
|
+
!controlState.isStreaming &&
|
|
1485
|
+
!variantState.isStreaming && (
|
|
1486
|
+
<div className="mb-4 p-3 rounded-lg border bg-muted/50 flex items-center gap-4">
|
|
1487
|
+
<div className="flex-1">
|
|
1488
|
+
<div className="text-xs font-medium text-muted-foreground mb-1">
|
|
1489
|
+
Next message ready (#{queueState.stagedIndex + 1})
|
|
1490
|
+
</div>
|
|
1491
|
+
<div className="text-sm truncate">
|
|
1492
|
+
{userMessages[queueState.stagedIndex]?.slice(0, 100)}
|
|
1493
|
+
{(userMessages[queueState.stagedIndex]?.length ?? 0) > 100
|
|
1494
|
+
? "..."
|
|
1495
|
+
: ""}
|
|
1496
|
+
</div>
|
|
1497
|
+
</div>
|
|
1498
|
+
<div className="flex items-center gap-2 shrink-0">
|
|
1499
|
+
{/* Per-arm send buttons when that arm is not auto-running */}
|
|
1500
|
+
{!controlState.autoRun &&
|
|
1501
|
+
controlState.turnIndex === queueState.currentIndex && (
|
|
1502
|
+
<Button
|
|
1503
|
+
size="sm"
|
|
1504
|
+
variant="outline"
|
|
1505
|
+
onClick={() => sendStagedToArm("control")}
|
|
1506
|
+
className="text-blue-600 border-blue-300 hover:bg-blue-50 dark:text-blue-400 dark:border-blue-700 dark:hover:bg-blue-950"
|
|
1507
|
+
>
|
|
1508
|
+
<Play className="w-3 h-3 mr-1" />
|
|
1509
|
+
Control
|
|
1510
|
+
</Button>
|
|
1511
|
+
)}
|
|
1512
|
+
{!variantState.autoRun &&
|
|
1513
|
+
variantState.turnIndex === queueState.currentIndex && (
|
|
1514
|
+
<Button
|
|
1515
|
+
size="sm"
|
|
1516
|
+
variant="outline"
|
|
1517
|
+
onClick={() => sendStagedToArm("variant")}
|
|
1518
|
+
className="text-orange-600 border-orange-300 hover:bg-orange-50 dark:text-orange-400 dark:border-orange-700 dark:hover:bg-orange-950"
|
|
1519
|
+
>
|
|
1520
|
+
<Play className="w-3 h-3 mr-1" />
|
|
1521
|
+
Variant
|
|
1522
|
+
</Button>
|
|
1523
|
+
)}
|
|
1524
|
+
{/* Send to both button */}
|
|
1525
|
+
{!controlState.autoRun &&
|
|
1526
|
+
!variantState.autoRun &&
|
|
1527
|
+
controlState.turnIndex === queueState.currentIndex &&
|
|
1528
|
+
variantState.turnIndex === queueState.currentIndex && (
|
|
1529
|
+
<Button size="sm" onClick={sendStagedToBoth}>
|
|
1530
|
+
<Play className="w-3 h-3 mr-1" />
|
|
1531
|
+
Send to Both
|
|
1532
|
+
</Button>
|
|
1533
|
+
)}
|
|
1534
|
+
</div>
|
|
1535
|
+
</div>
|
|
1536
|
+
)}
|
|
1537
|
+
|
|
940
1538
|
{/* Pre-run state */}
|
|
941
1539
|
{!hasRun && (
|
|
942
1540
|
<div className="flex-1 flex items-center justify-center">
|
|
943
|
-
<Card className="max-w-
|
|
1541
|
+
<Card className="max-w-lg w-full">
|
|
944
1542
|
<CardHeader className="text-center">
|
|
945
1543
|
<CardTitle>Ready to Compare</CardTitle>
|
|
946
1544
|
<CardDescription>
|
|
947
|
-
This comparison will
|
|
948
|
-
|
|
1545
|
+
This comparison will replay {userMessages.length} user message
|
|
1546
|
+
{userMessages.length !== 1 ? "s" : ""} to both configurations
|
|
1547
|
+
and display the results side by side.
|
|
949
1548
|
</CardDescription>
|
|
950
1549
|
</CardHeader>
|
|
951
1550
|
<CardContent className="space-y-4">
|
|
952
|
-
|
|
1551
|
+
{/* User messages list */}
|
|
1552
|
+
<div className="bg-muted rounded-lg p-4 max-h-64 overflow-y-auto">
|
|
953
1553
|
<div className="text-xs font-medium uppercase text-muted-foreground mb-2">
|
|
954
|
-
|
|
1554
|
+
User Messages ({userMessages.length})
|
|
1555
|
+
</div>
|
|
1556
|
+
<div className="space-y-2">
|
|
1557
|
+
{userMessages.map((msg, idx) => (
|
|
1558
|
+
<details
|
|
1559
|
+
key={`user-msg-${msg.slice(0, 50)}-${idx}`}
|
|
1560
|
+
className="group"
|
|
1561
|
+
>
|
|
1562
|
+
<summary className="text-sm cursor-pointer flex items-center gap-2 hover:text-foreground">
|
|
1563
|
+
<span className="text-xs font-mono text-muted-foreground w-5">
|
|
1564
|
+
{idx + 1}.
|
|
1565
|
+
</span>
|
|
1566
|
+
<span className="truncate flex-1">
|
|
1567
|
+
{msg.slice(0, 80)}
|
|
1568
|
+
{msg.length > 80 ? "..." : ""}
|
|
1569
|
+
</span>
|
|
1570
|
+
<ChevronDown className="w-3 h-3 text-muted-foreground group-open:rotate-180 transition-transform" />
|
|
1571
|
+
</summary>
|
|
1572
|
+
<div className="mt-2 ml-7 text-sm whitespace-pre-wrap bg-background/50 rounded p-2 text-muted-foreground">
|
|
1573
|
+
{msg}
|
|
1574
|
+
</div>
|
|
1575
|
+
</details>
|
|
1576
|
+
))}
|
|
955
1577
|
</div>
|
|
956
|
-
<div className="text-sm">{run?.firstUserMessage}</div>
|
|
957
1578
|
</div>
|
|
958
|
-
|
|
1579
|
+
|
|
1580
|
+
{/* Auto-run checkbox */}
|
|
1581
|
+
{userMessages.length > 1 && (
|
|
1582
|
+
<div className="flex items-center gap-3 p-3 rounded-lg border bg-background">
|
|
1583
|
+
<Checkbox
|
|
1584
|
+
id="auto-run"
|
|
1585
|
+
checked={initialAutoRun}
|
|
1586
|
+
onCheckedChange={(checked) => setInitialAutoRun(checked)}
|
|
1587
|
+
/>
|
|
1588
|
+
<div className="flex-1">
|
|
1589
|
+
<label
|
|
1590
|
+
htmlFor="auto-run"
|
|
1591
|
+
className="text-sm font-medium cursor-pointer"
|
|
1592
|
+
>
|
|
1593
|
+
Auto run all messages
|
|
1594
|
+
</label>
|
|
1595
|
+
<p className="text-xs text-muted-foreground">
|
|
1596
|
+
If off, next messages are enqueued after each turn.
|
|
1597
|
+
</p>
|
|
1598
|
+
</div>
|
|
1599
|
+
</div>
|
|
1600
|
+
)}
|
|
1601
|
+
|
|
1602
|
+
{/* Original vs Control vs Variant labels */}
|
|
1603
|
+
<div className="grid grid-cols-3 gap-4 text-sm">
|
|
1604
|
+
<div className="space-y-1">
|
|
1605
|
+
<div className="flex items-center gap-2">
|
|
1606
|
+
<span className="w-2 h-2 rounded-full bg-yellow-500" />
|
|
1607
|
+
<span className="font-medium">Original</span>
|
|
1608
|
+
</div>
|
|
1609
|
+
<div className="text-muted-foreground text-xs">
|
|
1610
|
+
Source session
|
|
1611
|
+
</div>
|
|
1612
|
+
</div>
|
|
959
1613
|
<div className="space-y-1">
|
|
960
1614
|
<div className="flex items-center gap-2">
|
|
961
1615
|
<span className="w-2 h-2 rounded-full bg-blue-500" />
|
|
@@ -980,16 +1634,165 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
|
|
|
980
1634
|
</div>
|
|
981
1635
|
)}
|
|
982
1636
|
|
|
983
|
-
{/* Side-by-side comparison */}
|
|
1637
|
+
{/* Side-by-side comparison - 3 panes: Original, Control, Variant */}
|
|
984
1638
|
{hasRun && (
|
|
985
|
-
<div className="grid grid-cols-
|
|
986
|
-
{/*
|
|
1639
|
+
<div className="grid grid-cols-3 gap-4 flex-1 min-h-0">
|
|
1640
|
+
{/* Original (Source Session - Read Only) */}
|
|
987
1641
|
<Card className="flex flex-col h-full min-h-0 overflow-hidden">
|
|
988
1642
|
<CardHeader className="py-3 border-b shrink-0">
|
|
989
1643
|
<CardTitle className="text-sm flex items-center gap-2">
|
|
990
|
-
<span className="w-2 h-2 rounded-full bg-
|
|
991
|
-
|
|
1644
|
+
<span className="w-2 h-2 rounded-full bg-yellow-500" />
|
|
1645
|
+
Original Session
|
|
992
1646
|
</CardTitle>
|
|
1647
|
+
<CardDescription className="text-xs">
|
|
1648
|
+
Source session (read-only)
|
|
1649
|
+
</CardDescription>
|
|
1650
|
+
</CardHeader>
|
|
1651
|
+
<CardContent className="flex-1 overflow-auto py-4">
|
|
1652
|
+
{originalMessages.map((msg, i) => (
|
|
1653
|
+
<div
|
|
1654
|
+
key={`original-${msg.type}-${i}`}
|
|
1655
|
+
className={`mb-4 ${
|
|
1656
|
+
msg.type === "user"
|
|
1657
|
+
? "text-yellow-600 dark:text-yellow-400"
|
|
1658
|
+
: msg.type === "tool_call"
|
|
1659
|
+
? ""
|
|
1660
|
+
: ""
|
|
1661
|
+
}`}
|
|
1662
|
+
>
|
|
1663
|
+
{msg.type === "tool_call" ? (
|
|
1664
|
+
<details className="rounded bg-muted/50 border text-xs group">
|
|
1665
|
+
<summary className="flex items-center gap-2 py-1.5 px-2 cursor-pointer list-none">
|
|
1666
|
+
<span className="text-muted-foreground">🔧</span>
|
|
1667
|
+
<span className="font-medium flex-1">
|
|
1668
|
+
{msg.toolName || msg.content}
|
|
1669
|
+
</span>
|
|
1670
|
+
<ChevronDown className="w-3 h-3 text-muted-foreground group-open:rotate-180 transition-transform" />
|
|
1671
|
+
</summary>
|
|
1672
|
+
<div className="px-2 pb-2 space-y-2 border-t mt-1 pt-2">
|
|
1673
|
+
{msg.toolInput !== null &&
|
|
1674
|
+
msg.toolInput !== undefined && (
|
|
1675
|
+
<div>
|
|
1676
|
+
<div className="text-[10px] font-semibold text-muted-foreground mb-1">
|
|
1677
|
+
Args
|
|
1678
|
+
</div>
|
|
1679
|
+
<pre className="text-[11px] bg-background/50 rounded p-1.5 overflow-x-auto max-h-32 whitespace-pre-wrap break-words">
|
|
1680
|
+
{typeof msg.toolInput === "string"
|
|
1681
|
+
? msg.toolInput
|
|
1682
|
+
: JSON.stringify(msg.toolInput, null, 2)}
|
|
1683
|
+
</pre>
|
|
1684
|
+
</div>
|
|
1685
|
+
)}
|
|
1686
|
+
{msg.toolOutput !== null &&
|
|
1687
|
+
msg.toolOutput !== undefined && (
|
|
1688
|
+
<div>
|
|
1689
|
+
<div className="text-[10px] font-semibold text-muted-foreground mb-1">
|
|
1690
|
+
Result
|
|
1691
|
+
</div>
|
|
1692
|
+
<pre className="text-[11px] bg-background/50 rounded p-1.5 overflow-x-auto max-h-32 whitespace-pre-wrap break-words">
|
|
1693
|
+
{typeof msg.toolOutput === "string"
|
|
1694
|
+
? msg.toolOutput
|
|
1695
|
+
: JSON.stringify(msg.toolOutput, null, 2)}
|
|
1696
|
+
</pre>
|
|
1697
|
+
</div>
|
|
1698
|
+
)}
|
|
1699
|
+
{(msg.toolInput === null ||
|
|
1700
|
+
msg.toolInput === undefined) &&
|
|
1701
|
+
(msg.toolOutput === null ||
|
|
1702
|
+
msg.toolOutput === undefined) && (
|
|
1703
|
+
<div className="text-muted-foreground text-[11px]">
|
|
1704
|
+
No input/output data available
|
|
1705
|
+
</div>
|
|
1706
|
+
)}
|
|
1707
|
+
</div>
|
|
1708
|
+
</details>
|
|
1709
|
+
) : (
|
|
1710
|
+
<>
|
|
1711
|
+
<div className="text-xs font-medium uppercase mb-1">
|
|
1712
|
+
{msg.type === "user" ? "USER" : "ASSISTANT"}
|
|
1713
|
+
</div>
|
|
1714
|
+
<div className="text-sm whitespace-pre-wrap">
|
|
1715
|
+
{msg.content}
|
|
1716
|
+
</div>
|
|
1717
|
+
</>
|
|
1718
|
+
)}
|
|
1719
|
+
</div>
|
|
1720
|
+
))}
|
|
1721
|
+
{originalMessages.length === 0 && (
|
|
1722
|
+
<div className="text-sm text-muted-foreground">
|
|
1723
|
+
No messages in source session
|
|
1724
|
+
</div>
|
|
1725
|
+
)}
|
|
1726
|
+
</CardContent>
|
|
1727
|
+
{/* Session Analysis & Tool Calls for Original */}
|
|
1728
|
+
{originalMetrics && (
|
|
1729
|
+
<div className="border-t p-3 shrink-0 bg-muted/50 space-y-3">
|
|
1730
|
+
{/* Session Analysis */}
|
|
1731
|
+
<SessionAnalysisPanel
|
|
1732
|
+
analysis={originalAnalysis}
|
|
1733
|
+
isLoading={originalAnalysisLoading}
|
|
1734
|
+
isExpanded={analysisExpanded.original}
|
|
1735
|
+
onToggle={() =>
|
|
1736
|
+
setAnalysisExpanded((prev) => ({
|
|
1737
|
+
...prev,
|
|
1738
|
+
original: !prev.original,
|
|
1739
|
+
}))
|
|
1740
|
+
}
|
|
1741
|
+
onRunAnalysis={() =>
|
|
1742
|
+
run?.sourceSessionId &&
|
|
1743
|
+
triggerAnalysis(run.sourceSessionId, "original")
|
|
1744
|
+
}
|
|
1745
|
+
accentColor="yellow"
|
|
1746
|
+
/>
|
|
1747
|
+
{/* Tool Calls */}
|
|
1748
|
+
<ToolCallsPanel
|
|
1749
|
+
toolCalls={originalMetrics.toolCalls}
|
|
1750
|
+
isExpanded={toolCallsExpanded.original}
|
|
1751
|
+
onToggle={() =>
|
|
1752
|
+
setToolCallsExpanded((prev) => ({
|
|
1753
|
+
...prev,
|
|
1754
|
+
original: !prev.original,
|
|
1755
|
+
}))
|
|
1756
|
+
}
|
|
1757
|
+
accentColor="yellow"
|
|
1758
|
+
/>
|
|
1759
|
+
</div>
|
|
1760
|
+
)}
|
|
1761
|
+
</Card>
|
|
1762
|
+
|
|
1763
|
+
{/* Control */}
|
|
1764
|
+
<Card className="flex flex-col h-full min-h-0 overflow-hidden">
|
|
1765
|
+
<CardHeader className="py-3 border-b shrink-0">
|
|
1766
|
+
<div className="flex items-center justify-between">
|
|
1767
|
+
<CardTitle className="text-sm flex items-center gap-2">
|
|
1768
|
+
<span className="w-2 h-2 rounded-full bg-blue-500" />
|
|
1769
|
+
Control (Rerun)
|
|
1770
|
+
{controlState.isStreaming && (
|
|
1771
|
+
<Loader2 className="w-3 h-3 animate-spin text-blue-500" />
|
|
1772
|
+
)}
|
|
1773
|
+
</CardTitle>
|
|
1774
|
+
{/* Auto-run toggle for Control */}
|
|
1775
|
+
{userMessages.length > 1 &&
|
|
1776
|
+
queueState.status === "running" && (
|
|
1777
|
+
<button
|
|
1778
|
+
type="button"
|
|
1779
|
+
onClick={() => toggleAutoRun("control")}
|
|
1780
|
+
className="flex items-center gap-1.5 text-xs text-muted-foreground hover:text-foreground transition-colors"
|
|
1781
|
+
title={
|
|
1782
|
+
controlState.autoRun
|
|
1783
|
+
? "Disable auto-run"
|
|
1784
|
+
: "Enable auto-run"
|
|
1785
|
+
}
|
|
1786
|
+
>
|
|
1787
|
+
{controlState.autoRun ? (
|
|
1788
|
+
<ToggleRight className="w-4 h-4 text-blue-500" />
|
|
1789
|
+
) : (
|
|
1790
|
+
<ToggleLeft className="w-4 h-4" />
|
|
1791
|
+
)}
|
|
1792
|
+
<span>Auto</span>
|
|
1793
|
+
</button>
|
|
1794
|
+
)}
|
|
1795
|
+
</div>
|
|
993
1796
|
<CardDescription className="text-xs">
|
|
994
1797
|
{getControlDimensionLabel()}
|
|
995
1798
|
</CardDescription>
|
|
@@ -997,25 +1800,96 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
|
|
|
997
1800
|
<CardContent className="flex-1 overflow-auto py-4">
|
|
998
1801
|
{controlState.messages.map((msg, i) => (
|
|
999
1802
|
<div
|
|
1000
|
-
key={i}
|
|
1001
|
-
className={`mb-4 ${
|
|
1803
|
+
key={`control-${msg.type}-${i}`}
|
|
1804
|
+
className={`mb-4 ${
|
|
1805
|
+
msg.type === "user"
|
|
1806
|
+
? "text-blue-600 dark:text-blue-400"
|
|
1807
|
+
: msg.type === "tool_call"
|
|
1808
|
+
? ""
|
|
1809
|
+
: ""
|
|
1810
|
+
}`}
|
|
1002
1811
|
>
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1812
|
+
{msg.type === "tool_call" ? (
|
|
1813
|
+
<details className="rounded bg-muted/50 border text-xs group">
|
|
1814
|
+
<summary className="flex items-center gap-2 py-1.5 px-2 cursor-pointer list-none">
|
|
1815
|
+
<span className="text-muted-foreground">🔧</span>
|
|
1816
|
+
<span className="font-medium flex-1">
|
|
1817
|
+
{msg.toolName || msg.content}
|
|
1818
|
+
</span>
|
|
1819
|
+
<ChevronDown className="w-3 h-3 text-muted-foreground group-open:rotate-180 transition-transform" />
|
|
1820
|
+
</summary>
|
|
1821
|
+
<div className="px-2 pb-2 space-y-2 border-t mt-1 pt-2">
|
|
1822
|
+
{msg.toolInput !== null &&
|
|
1823
|
+
msg.toolInput !== undefined && (
|
|
1824
|
+
<div>
|
|
1825
|
+
<div className="text-[10px] font-semibold text-muted-foreground mb-1">
|
|
1826
|
+
Args
|
|
1827
|
+
</div>
|
|
1828
|
+
<pre className="text-[11px] bg-background/50 rounded p-1.5 overflow-x-auto max-h-32 whitespace-pre-wrap break-words">
|
|
1829
|
+
{typeof msg.toolInput === "string"
|
|
1830
|
+
? msg.toolInput
|
|
1831
|
+
: JSON.stringify(msg.toolInput, null, 2)}
|
|
1832
|
+
</pre>
|
|
1833
|
+
</div>
|
|
1834
|
+
)}
|
|
1835
|
+
{msg.toolOutput !== null &&
|
|
1836
|
+
msg.toolOutput !== undefined && (
|
|
1837
|
+
<div>
|
|
1838
|
+
<div className="text-[10px] font-semibold text-muted-foreground mb-1">
|
|
1839
|
+
Result
|
|
1840
|
+
</div>
|
|
1841
|
+
<pre className="text-[11px] bg-background/50 rounded p-1.5 overflow-x-auto max-h-32 whitespace-pre-wrap break-words">
|
|
1842
|
+
{typeof msg.toolOutput === "string"
|
|
1843
|
+
? msg.toolOutput
|
|
1844
|
+
: JSON.stringify(msg.toolOutput, null, 2)}
|
|
1845
|
+
</pre>
|
|
1846
|
+
</div>
|
|
1847
|
+
)}
|
|
1848
|
+
{(msg.toolInput === null ||
|
|
1849
|
+
msg.toolInput === undefined) &&
|
|
1850
|
+
(msg.toolOutput === null ||
|
|
1851
|
+
msg.toolOutput === undefined) && (
|
|
1852
|
+
<div className="text-muted-foreground text-[11px]">
|
|
1853
|
+
No input/output data available
|
|
1854
|
+
</div>
|
|
1855
|
+
)}
|
|
1856
|
+
</div>
|
|
1857
|
+
</details>
|
|
1858
|
+
) : (
|
|
1859
|
+
<>
|
|
1860
|
+
<div className="text-xs font-medium uppercase mb-1">
|
|
1861
|
+
{msg.type === "user" ? "USER" : "ASSISTANT"}
|
|
1862
|
+
</div>
|
|
1863
|
+
<div className="text-sm whitespace-pre-wrap">
|
|
1864
|
+
{msg.content}
|
|
1865
|
+
{controlState.isStreaming &&
|
|
1866
|
+
msg.type === "assistant" &&
|
|
1867
|
+
i === controlState.messages.length - 1 && (
|
|
1868
|
+
<span className="animate-pulse">▊</span>
|
|
1869
|
+
)}
|
|
1870
|
+
</div>
|
|
1871
|
+
</>
|
|
1872
|
+
)}
|
|
1014
1873
|
</div>
|
|
1015
1874
|
))}
|
|
1016
1875
|
{controlState.error && (
|
|
1017
|
-
<div className="
|
|
1018
|
-
|
|
1876
|
+
<div className="p-3 rounded-lg border border-red-200 bg-red-50 dark:border-red-800 dark:bg-red-950/30">
|
|
1877
|
+
<div className="text-red-600 dark:text-red-400 text-sm mb-2">
|
|
1878
|
+
Error: {controlState.error}
|
|
1879
|
+
</div>
|
|
1880
|
+
{queueState.status === "running" && (
|
|
1881
|
+
<Button
|
|
1882
|
+
size="sm"
|
|
1883
|
+
variant="outline"
|
|
1884
|
+
onClick={() => {
|
|
1885
|
+
setControlState((prev) => ({ ...prev, error: null }));
|
|
1886
|
+
sendStagedToArm("control");
|
|
1887
|
+
}}
|
|
1888
|
+
className="text-red-600 border-red-300 hover:bg-red-100 dark:text-red-400"
|
|
1889
|
+
>
|
|
1890
|
+
Retry
|
|
1891
|
+
</Button>
|
|
1892
|
+
)}
|
|
1019
1893
|
</div>
|
|
1020
1894
|
)}
|
|
1021
1895
|
</CardContent>
|
|
@@ -1033,6 +1907,10 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
|
|
|
1033
1907
|
control: !prev.control,
|
|
1034
1908
|
}))
|
|
1035
1909
|
}
|
|
1910
|
+
onRunAnalysis={() =>
|
|
1911
|
+
controlState.sessionId &&
|
|
1912
|
+
triggerAnalysis(controlState.sessionId, "control")
|
|
1913
|
+
}
|
|
1036
1914
|
accentColor="blue"
|
|
1037
1915
|
/>
|
|
1038
1916
|
{/* Tool Calls */}
|
|
@@ -1054,10 +1932,36 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
|
|
|
1054
1932
|
{/* Variant */}
|
|
1055
1933
|
<Card className="flex flex-col h-full min-h-0 overflow-hidden">
|
|
1056
1934
|
<CardHeader className="py-3 border-b shrink-0">
|
|
1057
|
-
<
|
|
1058
|
-
<
|
|
1059
|
-
|
|
1060
|
-
|
|
1935
|
+
<div className="flex items-center justify-between">
|
|
1936
|
+
<CardTitle className="text-sm flex items-center gap-2">
|
|
1937
|
+
<span className="w-2 h-2 rounded-full bg-orange-500" />
|
|
1938
|
+
Variant
|
|
1939
|
+
{variantState.isStreaming && (
|
|
1940
|
+
<Loader2 className="w-3 h-3 animate-spin text-orange-500" />
|
|
1941
|
+
)}
|
|
1942
|
+
</CardTitle>
|
|
1943
|
+
{/* Auto-run toggle for Variant */}
|
|
1944
|
+
{userMessages.length > 1 &&
|
|
1945
|
+
queueState.status === "running" && (
|
|
1946
|
+
<button
|
|
1947
|
+
type="button"
|
|
1948
|
+
onClick={() => toggleAutoRun("variant")}
|
|
1949
|
+
className="flex items-center gap-1.5 text-xs text-muted-foreground hover:text-foreground transition-colors"
|
|
1950
|
+
title={
|
|
1951
|
+
variantState.autoRun
|
|
1952
|
+
? "Disable auto-run"
|
|
1953
|
+
: "Enable auto-run"
|
|
1954
|
+
}
|
|
1955
|
+
>
|
|
1956
|
+
{variantState.autoRun ? (
|
|
1957
|
+
<ToggleRight className="w-4 h-4 text-orange-500" />
|
|
1958
|
+
) : (
|
|
1959
|
+
<ToggleLeft className="w-4 h-4" />
|
|
1960
|
+
)}
|
|
1961
|
+
<span>Auto</span>
|
|
1962
|
+
</button>
|
|
1963
|
+
)}
|
|
1964
|
+
</div>
|
|
1061
1965
|
<CardDescription className="text-xs">
|
|
1062
1966
|
{getDimensionLabel()}
|
|
1063
1967
|
</CardDescription>
|
|
@@ -1065,25 +1969,96 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
|
|
|
1065
1969
|
<CardContent className="flex-1 overflow-auto py-4">
|
|
1066
1970
|
{variantState.messages.map((msg, i) => (
|
|
1067
1971
|
<div
|
|
1068
|
-
key={i}
|
|
1069
|
-
className={`mb-4 ${
|
|
1972
|
+
key={`variant-${msg.type}-${i}`}
|
|
1973
|
+
className={`mb-4 ${
|
|
1974
|
+
msg.type === "user"
|
|
1975
|
+
? "text-orange-600 dark:text-orange-400"
|
|
1976
|
+
: msg.type === "tool_call"
|
|
1977
|
+
? ""
|
|
1978
|
+
: ""
|
|
1979
|
+
}`}
|
|
1070
1980
|
>
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1981
|
+
{msg.type === "tool_call" ? (
|
|
1982
|
+
<details className="rounded bg-muted/50 border text-xs group">
|
|
1983
|
+
<summary className="flex items-center gap-2 py-1.5 px-2 cursor-pointer list-none">
|
|
1984
|
+
<span className="text-muted-foreground">🔧</span>
|
|
1985
|
+
<span className="font-medium flex-1">
|
|
1986
|
+
{msg.toolName || msg.content}
|
|
1987
|
+
</span>
|
|
1988
|
+
<ChevronDown className="w-3 h-3 text-muted-foreground group-open:rotate-180 transition-transform" />
|
|
1989
|
+
</summary>
|
|
1990
|
+
<div className="px-2 pb-2 space-y-2 border-t mt-1 pt-2">
|
|
1991
|
+
{msg.toolInput !== null &&
|
|
1992
|
+
msg.toolInput !== undefined && (
|
|
1993
|
+
<div>
|
|
1994
|
+
<div className="text-[10px] font-semibold text-muted-foreground mb-1">
|
|
1995
|
+
Args
|
|
1996
|
+
</div>
|
|
1997
|
+
<pre className="text-[11px] bg-background/50 rounded p-1.5 overflow-x-auto max-h-32 whitespace-pre-wrap break-words">
|
|
1998
|
+
{typeof msg.toolInput === "string"
|
|
1999
|
+
? msg.toolInput
|
|
2000
|
+
: JSON.stringify(msg.toolInput, null, 2)}
|
|
2001
|
+
</pre>
|
|
2002
|
+
</div>
|
|
2003
|
+
)}
|
|
2004
|
+
{msg.toolOutput !== null &&
|
|
2005
|
+
msg.toolOutput !== undefined && (
|
|
2006
|
+
<div>
|
|
2007
|
+
<div className="text-[10px] font-semibold text-muted-foreground mb-1">
|
|
2008
|
+
Result
|
|
2009
|
+
</div>
|
|
2010
|
+
<pre className="text-[11px] bg-background/50 rounded p-1.5 overflow-x-auto max-h-32 whitespace-pre-wrap break-words">
|
|
2011
|
+
{typeof msg.toolOutput === "string"
|
|
2012
|
+
? msg.toolOutput
|
|
2013
|
+
: JSON.stringify(msg.toolOutput, null, 2)}
|
|
2014
|
+
</pre>
|
|
2015
|
+
</div>
|
|
2016
|
+
)}
|
|
2017
|
+
{(msg.toolInput === null ||
|
|
2018
|
+
msg.toolInput === undefined) &&
|
|
2019
|
+
(msg.toolOutput === null ||
|
|
2020
|
+
msg.toolOutput === undefined) && (
|
|
2021
|
+
<div className="text-muted-foreground text-[11px]">
|
|
2022
|
+
No input/output data available
|
|
2023
|
+
</div>
|
|
2024
|
+
)}
|
|
2025
|
+
</div>
|
|
2026
|
+
</details>
|
|
2027
|
+
) : (
|
|
2028
|
+
<>
|
|
2029
|
+
<div className="text-xs font-medium uppercase mb-1">
|
|
2030
|
+
{msg.type === "user" ? "USER" : "ASSISTANT"}
|
|
2031
|
+
</div>
|
|
2032
|
+
<div className="text-sm whitespace-pre-wrap">
|
|
2033
|
+
{msg.content}
|
|
2034
|
+
{variantState.isStreaming &&
|
|
2035
|
+
msg.type === "assistant" &&
|
|
2036
|
+
i === variantState.messages.length - 1 && (
|
|
2037
|
+
<span className="animate-pulse">▊</span>
|
|
2038
|
+
)}
|
|
2039
|
+
</div>
|
|
2040
|
+
</>
|
|
2041
|
+
)}
|
|
1082
2042
|
</div>
|
|
1083
2043
|
))}
|
|
1084
2044
|
{variantState.error && (
|
|
1085
|
-
<div className="
|
|
1086
|
-
|
|
2045
|
+
<div className="p-3 rounded-lg border border-red-200 bg-red-50 dark:border-red-800 dark:bg-red-950/30">
|
|
2046
|
+
<div className="text-red-600 dark:text-red-400 text-sm mb-2">
|
|
2047
|
+
Error: {variantState.error}
|
|
2048
|
+
</div>
|
|
2049
|
+
{queueState.status === "running" && (
|
|
2050
|
+
<Button
|
|
2051
|
+
size="sm"
|
|
2052
|
+
variant="outline"
|
|
2053
|
+
onClick={() => {
|
|
2054
|
+
setVariantState((prev) => ({ ...prev, error: null }));
|
|
2055
|
+
sendStagedToArm("variant");
|
|
2056
|
+
}}
|
|
2057
|
+
className="text-red-600 border-red-300 hover:bg-red-100 dark:text-red-400"
|
|
2058
|
+
>
|
|
2059
|
+
Retry
|
|
2060
|
+
</Button>
|
|
2061
|
+
)}
|
|
1087
2062
|
</div>
|
|
1088
2063
|
)}
|
|
1089
2064
|
</CardContent>
|
|
@@ -1101,6 +2076,10 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
|
|
|
1101
2076
|
variant: !prev.variant,
|
|
1102
2077
|
}))
|
|
1103
2078
|
}
|
|
2079
|
+
onRunAnalysis={() =>
|
|
2080
|
+
variantState.sessionId &&
|
|
2081
|
+
triggerAnalysis(variantState.sessionId, "variant")
|
|
2082
|
+
}
|
|
1104
2083
|
accentColor="orange"
|
|
1105
2084
|
/>
|
|
1106
2085
|
{/* Tool Calls */}
|
|
@@ -1121,6 +2100,15 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
|
|
|
1121
2100
|
</div>
|
|
1122
2101
|
)}
|
|
1123
2102
|
</div>
|
|
2103
|
+
|
|
2104
|
+
{/* Comparison Analysis Dialog */}
|
|
2105
|
+
{comparisonAnalysis && (
|
|
2106
|
+
<ComparisonAnalysisDialog
|
|
2107
|
+
open={comparisonAnalysisDialogOpen}
|
|
2108
|
+
onClose={() => setComparisonAnalysisDialogOpen(false)}
|
|
2109
|
+
analysis={comparisonAnalysis}
|
|
2110
|
+
/>
|
|
2111
|
+
)}
|
|
1124
2112
|
</DebuggerLayout>
|
|
1125
2113
|
);
|
|
1126
2114
|
}
|