@townco/debugger 0.1.28 → 0.1.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,4 @@
1
+ import { ChevronDown, ChevronUp, Loader2 } from "lucide-react";
1
2
  import { useCallback, useEffect, useState } from "react";
2
3
  import { Button } from "@/components/ui/button";
3
4
  import {
@@ -7,6 +8,7 @@ import {
7
8
  CardHeader,
8
9
  CardTitle,
9
10
  } from "@/components/ui/card";
11
+ import type { SessionAnalysis } from "../analysis/types";
10
12
  import { DebuggerLayout } from "../components/DebuggerLayout";
11
13
  import { formatCost, formatDuration, formatTokens } from "../lib/metrics";
12
14
  import type { ComparisonConfig, ComparisonRun, SessionMetrics } from "../types";
@@ -33,6 +35,259 @@ const AGENT_SERVER_URL =
33
35
  ? window.location.origin.replace(":4000", ":3100")
34
36
  : "http://localhost:3100";
35
37
 
38
+ // Expandable Session Analysis Panel
39
+ function SessionAnalysisPanel({
40
+ analysis,
41
+ isLoading,
42
+ isExpanded,
43
+ onToggle,
44
+ accentColor,
45
+ }: {
46
+ analysis: SessionAnalysis | null;
47
+ isLoading: boolean;
48
+ isExpanded: boolean;
49
+ onToggle: () => void;
50
+ accentColor: "blue" | "orange";
51
+ }) {
52
+ const colorClasses =
53
+ accentColor === "blue"
54
+ ? "border-blue-200 dark:border-blue-800 bg-blue-50/50 dark:bg-blue-950/30"
55
+ : "border-orange-200 dark:border-orange-800 bg-orange-50/50 dark:bg-orange-950/30";
56
+
57
+ const headerColorClasses =
58
+ accentColor === "blue"
59
+ ? "hover:bg-blue-100/50 dark:hover:bg-blue-900/30"
60
+ : "hover:bg-orange-100/50 dark:hover:bg-orange-900/30";
61
+
62
+ if (isLoading) {
63
+ return (
64
+ <div className={`border rounded-md p-3 ${colorClasses}`}>
65
+ <div className="flex items-center gap-2 text-xs text-muted-foreground">
66
+ <Loader2 className="w-3 h-3 animate-spin" />
67
+ Loading analysis...
68
+ </div>
69
+ </div>
70
+ );
71
+ }
72
+
73
+ if (!analysis) {
74
+ return null;
75
+ }
76
+
77
+ return (
78
+ <div className={`border rounded-md overflow-hidden ${colorClasses}`}>
79
+ <button
80
+ type="button"
81
+ onClick={onToggle}
82
+ className={`w-full px-3 py-2 flex items-center justify-between text-left transition-colors ${headerColorClasses}`}
83
+ >
84
+ <div className="flex items-center gap-2">
85
+ <span className="text-xs font-semibold">Session Analysis</span>
86
+ <span
87
+ className={`text-[10px] px-1.5 py-0.5 rounded ${
88
+ analysis.outcome.status === "SUCCESS"
89
+ ? "bg-green-100 text-green-700 dark:bg-green-900/50 dark:text-green-300"
90
+ : analysis.outcome.status === "FAILURE"
91
+ ? "bg-red-100 text-red-700 dark:bg-red-900/50 dark:text-red-300"
92
+ : "bg-yellow-100 text-yellow-700 dark:bg-yellow-900/50 dark:text-yellow-300"
93
+ }`}
94
+ >
95
+ {analysis.outcome.status}
96
+ </span>
97
+ </div>
98
+ {isExpanded ? (
99
+ <ChevronUp className="w-4 h-4 text-muted-foreground" />
100
+ ) : (
101
+ <ChevronDown className="w-4 h-4 text-muted-foreground" />
102
+ )}
103
+ </button>
104
+
105
+ {isExpanded && (
106
+ <div className="px-3 pb-3 space-y-3 text-xs">
107
+ {/* Task */}
108
+ <div>
109
+ <div className="font-semibold text-muted-foreground mb-1">
110
+ Task Summary
111
+ </div>
112
+ <div className="text-foreground">{analysis.task.task_summary}</div>
113
+ </div>
114
+
115
+ {/* Intent */}
116
+ <div className="flex items-center gap-2">
117
+ <span className="font-semibold text-muted-foreground">Intent:</span>
118
+ <span className="px-2 py-0.5 bg-primary/10 text-primary rounded text-[11px] font-medium">
119
+ {analysis.task.intent_type}
120
+ </span>
121
+ </div>
122
+
123
+ {/* Trajectory */}
124
+ <div>
125
+ <div className="font-semibold text-muted-foreground mb-1">
126
+ High Level Plan
127
+ </div>
128
+ <div className="text-foreground text-[11px] leading-relaxed">
129
+ {analysis.trajectory.high_level_plan}
130
+ </div>
131
+ </div>
132
+
133
+ {/* Outcome */}
134
+ <div>
135
+ <div className="font-semibold text-muted-foreground mb-1">
136
+ Assessment
137
+ </div>
138
+ <div className="text-foreground text-[11px] leading-relaxed">
139
+ {analysis.outcome.assessment}
140
+ </div>
141
+ </div>
142
+
143
+ {/* Answer Type */}
144
+ <div className="flex items-center gap-2">
145
+ <span className="font-semibold text-muted-foreground">
146
+ Answer Type:
147
+ </span>
148
+ <span className="px-2 py-0.5 bg-secondary text-secondary-foreground rounded text-[11px] font-medium">
149
+ {analysis.outcome.answer_type}
150
+ </span>
151
+ </div>
152
+
153
+ {/* Metrics Summary */}
154
+ {analysis.metrics && (
155
+ <div className="grid grid-cols-5 gap-2 pt-2 border-t border-border/50">
156
+ <div>
157
+ <div className="text-[10px] text-muted-foreground">
158
+ Duration
159
+ </div>
160
+ <div className="font-medium">
161
+ {formatDuration(analysis.metrics.durationMs)}
162
+ </div>
163
+ </div>
164
+ <div>
165
+ <div className="text-[10px] text-muted-foreground">Input</div>
166
+ <div className="font-medium">
167
+ {formatTokens(analysis.metrics.inputTokens)}
168
+ </div>
169
+ </div>
170
+ <div>
171
+ <div className="text-[10px] text-muted-foreground">Output</div>
172
+ <div className="font-medium">
173
+ {formatTokens(analysis.metrics.outputTokens)}
174
+ </div>
175
+ </div>
176
+ <div>
177
+ <div className="text-[10px] text-muted-foreground">Total</div>
178
+ <div className="font-medium">
179
+ {formatTokens(analysis.metrics.totalTokens)}
180
+ </div>
181
+ </div>
182
+ <div>
183
+ <div className="text-[10px] text-muted-foreground">Cost</div>
184
+ <div className="font-medium text-green-600 dark:text-green-400">
185
+ {formatCost(analysis.metrics.estimatedCost)}
186
+ </div>
187
+ </div>
188
+ </div>
189
+ )}
190
+ </div>
191
+ )}
192
+ </div>
193
+ );
194
+ }
195
+
196
+ // Collapsible Tool Calls Panel
197
+ function ToolCallsPanel({
198
+ toolCalls,
199
+ isExpanded,
200
+ onToggle,
201
+ accentColor,
202
+ }: {
203
+ toolCalls: SessionMetrics["toolCalls"];
204
+ isExpanded: boolean;
205
+ onToggle: () => void;
206
+ accentColor: "blue" | "orange";
207
+ }) {
208
+ const colorClasses =
209
+ accentColor === "blue"
210
+ ? "border-blue-200 dark:border-blue-800 bg-blue-50/50 dark:bg-blue-950/30"
211
+ : "border-orange-200 dark:border-orange-800 bg-orange-50/50 dark:bg-orange-950/30";
212
+
213
+ const headerColorClasses =
214
+ accentColor === "blue"
215
+ ? "hover:bg-blue-100/50 dark:hover:bg-blue-900/30"
216
+ : "hover:bg-orange-100/50 dark:hover:bg-orange-900/30";
217
+
218
+ const toolCallCount = toolCalls?.length ?? 0;
219
+
220
+ return (
221
+ <div className={`border rounded-md overflow-hidden ${colorClasses}`}>
222
+ <button
223
+ type="button"
224
+ onClick={onToggle}
225
+ className={`w-full px-3 py-2 flex items-center justify-between text-left transition-colors ${headerColorClasses}`}
226
+ >
227
+ <div className="flex items-center gap-2">
228
+ <span className="text-xs font-semibold">Tool Calls</span>
229
+ <span className="text-[10px] px-1.5 py-0.5 rounded bg-secondary text-secondary-foreground">
230
+ {toolCallCount}
231
+ </span>
232
+ </div>
233
+ {isExpanded ? (
234
+ <ChevronUp className="w-4 h-4 text-muted-foreground" />
235
+ ) : (
236
+ <ChevronDown className="w-4 h-4 text-muted-foreground" />
237
+ )}
238
+ </button>
239
+
240
+ {isExpanded && (
241
+ <div className="px-3 pb-3">
242
+ {!toolCalls || toolCalls.length === 0 ? (
243
+ <div className="text-xs text-muted-foreground">No tool calls</div>
244
+ ) : (
245
+ <div className="space-y-2">
246
+ {toolCalls.map((call, idx) => (
247
+ <details
248
+ key={`${call.name}-${call.startTimeUnixNano ?? idx}`}
249
+ className="rounded-md border px-3 py-2 bg-background/50"
250
+ >
251
+ <summary className="text-xs font-medium cursor-pointer flex items-center justify-between">
252
+ <span>
253
+ {call.name}{" "}
254
+ {call.startTimeUnixNano ? (
255
+ <span className="text-muted-foreground">
256
+ @{" "}
257
+ {new Date(
258
+ call.startTimeUnixNano / 1_000_000,
259
+ ).toLocaleTimeString()}
260
+ </span>
261
+ ) : null}
262
+ </span>
263
+ <span className="text-muted-foreground text-[11px]">
264
+ view
265
+ </span>
266
+ </summary>
267
+ <div className="mt-2 text-[11px] space-y-1 break-words">
268
+ <div>
269
+ <span className="font-semibold">Args:</span>{" "}
270
+ <pre className="break-words whitespace-pre-wrap bg-muted rounded p-2 mt-1 overflow-x-auto max-h-40">
271
+ {JSON.stringify(call.input, null, 2)}
272
+ </pre>
273
+ </div>
274
+ <div>
275
+ <span className="font-semibold">Result:</span>{" "}
276
+ <pre className="break-words whitespace-pre-wrap bg-muted rounded p-2 mt-1 overflow-x-auto max-h-40">
277
+ {JSON.stringify(call.output, null, 2)}
278
+ </pre>
279
+ </div>
280
+ </div>
281
+ </details>
282
+ ))}
283
+ </div>
284
+ )}
285
+ </div>
286
+ )}
287
+ </div>
288
+ );
289
+ }
290
+
36
291
  export function ComparisonView({ runId }: ComparisonViewProps) {
37
292
  const [run, setRun] = useState<ComparisonRun | null>(null);
38
293
  const [config, setConfig] = useState<ComparisonConfig | null>(null);
@@ -58,6 +313,28 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
58
313
  const [isRunning, setIsRunning] = useState(false);
59
314
  const [hasRun, setHasRun] = useState(false);
60
315
 
316
+ // Session analysis state
317
+ const [controlAnalysis, setControlAnalysis] =
318
+ useState<SessionAnalysis | null>(null);
319
+ const [variantAnalysis, setVariantAnalysis] =
320
+ useState<SessionAnalysis | null>(null);
321
+ const [controlAnalysisLoading, setControlAnalysisLoading] = useState(false);
322
+ const [variantAnalysisLoading, setVariantAnalysisLoading] = useState(false);
323
+ const [analysisExpanded, setAnalysisExpanded] = useState<{
324
+ control: boolean;
325
+ variant: boolean;
326
+ }>({
327
+ control: false,
328
+ variant: false,
329
+ });
330
+ const [toolCallsExpanded, setToolCallsExpanded] = useState<{
331
+ control: boolean;
332
+ variant: boolean;
333
+ }>({
334
+ control: false,
335
+ variant: false,
336
+ });
337
+
61
338
  // Fetch comparison run details and restore saved messages
62
339
  useEffect(() => {
63
340
  Promise.all([
@@ -99,8 +376,10 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
99
376
  }
100
377
  }
101
378
 
102
- // Fetch the config
103
- return fetch(`/api/comparison-config`).then((res) => res.json());
379
+ // Fetch the config by the run's configId (not the latest config!)
380
+ return fetch(`/api/comparison-config/${runData.configId}`).then((res) =>
381
+ res.json(),
382
+ );
104
383
  })
105
384
  .then((configData) => {
106
385
  setConfig(configData);
@@ -112,6 +391,14 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
112
391
  });
113
392
  }, [runId]);
114
393
 
394
+ const generateRequestId = (prefix: string, sessionId?: string) => {
395
+ const randomPart =
396
+ typeof crypto !== "undefined" && "randomUUID" in crypto
397
+ ? crypto.randomUUID()
398
+ : `${Math.random().toString(16).slice(2)}-${Date.now().toString(16)}`;
399
+ return `${prefix}-${sessionId ? `${sessionId}-` : ""}${randomPart}`;
400
+ };
401
+
115
402
  // Create a new session with the agent server
116
403
  const createSession = async (
117
404
  configOverrides?: Record<string, unknown>,
@@ -121,7 +408,7 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
121
408
  headers: { "Content-Type": "application/json" },
122
409
  body: JSON.stringify({
123
410
  jsonrpc: "2.0",
124
- id: `init-${Date.now()}`,
411
+ id: generateRequestId("init"),
125
412
  method: "initialize",
126
413
  params: {
127
414
  protocolVersion: 1,
@@ -136,7 +423,7 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
136
423
  headers: { "Content-Type": "application/json" },
137
424
  body: JSON.stringify({
138
425
  jsonrpc: "2.0",
139
- id: `session-${Date.now()}`,
426
+ id: generateRequestId("session"),
140
427
  method: "session/new",
141
428
  params: {
142
429
  cwd: "/",
@@ -154,7 +441,7 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
154
441
  sessionId: string,
155
442
  message: string,
156
443
  onUpdate: (content: string) => void,
157
- ): Promise<void> => {
444
+ ): Promise<string> => {
158
445
  let accumulatedContent = "";
159
446
  let abortController: AbortController | null = new AbortController();
160
447
 
@@ -228,7 +515,7 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
228
515
  headers: { "Content-Type": "application/json" },
229
516
  body: JSON.stringify({
230
517
  jsonrpc: "2.0",
231
- id: `prompt-${Date.now()}`,
518
+ id: generateRequestId("prompt", sessionId),
232
519
  method: "session/prompt",
233
520
  params: {
234
521
  sessionId,
@@ -243,6 +530,9 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
243
530
  // Abort the SSE connection since we're done
244
531
  abortController.abort();
245
532
  abortController = null;
533
+
534
+ // Return the accumulated content
535
+ return accumulatedContent;
246
536
  };
247
537
 
248
538
  // Run the comparison
@@ -271,15 +561,16 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
271
561
  });
272
562
 
273
563
  try {
274
- // Build config overrides based on dimension
564
+ // Build config overrides based on all selected dimensions
275
565
  const variantOverrides: Record<string, unknown> = {};
276
- if (config.dimension === "model" && config.variantModel) {
566
+ const dimensions = config.dimensions || [];
567
+ if (dimensions.includes("model") && config.variantModel) {
277
568
  variantOverrides.model = config.variantModel;
278
569
  }
279
- if (config.dimension === "system_prompt" && config.variantSystemPrompt) {
570
+ if (dimensions.includes("system_prompt") && config.variantSystemPrompt) {
280
571
  variantOverrides.systemPrompt = config.variantSystemPrompt;
281
572
  }
282
- if (config.dimension === "tools" && config.variantTools) {
573
+ if (dimensions.includes("tools") && config.variantTools) {
283
574
  variantOverrides.tools = config.variantTools;
284
575
  }
285
576
 
@@ -307,123 +598,157 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
307
598
  const startTime = Date.now();
308
599
 
309
600
  // Track final responses and metrics
310
- let finalControlResponse = "";
311
- let finalVariantResponse = "";
312
- let finalControlMetrics: SessionMetrics | null = null;
313
- let finalVariantMetrics: SessionMetrics | null = null;
314
-
315
- await Promise.all([
316
- // Control session
317
- sendMessageAndCollect(controlSessionId, firstMessage, (content) => {
318
- finalControlResponse = content;
319
- setControlState((prev) => ({
320
- ...prev,
321
- messages: [
322
- { role: "user", content: firstMessage },
323
- { role: "assistant", content },
324
- ],
325
- }));
326
- })
327
- .then(async () => {
328
- const duration = Date.now() - startTime;
329
- // Wait for telemetry data to be written to the database
330
- await new Promise((r) => setTimeout(r, 2000));
331
- // Fetch metrics - use control model for cost calculation
332
- const controlModel =
333
- config.controlModel || "claude-sonnet-4-5-20250929";
334
- try {
335
- const metricsRes = await fetch(
336
- `/api/session-metrics/${controlSessionId}?model=${encodeURIComponent(controlModel)}`,
337
- );
338
- const metrics = await metricsRes.json();
339
- metrics.durationMs = duration;
340
- finalControlMetrics = metrics;
341
- setControlState((prev) => ({
342
- ...prev,
343
- isStreaming: false,
344
- metrics,
345
- }));
346
- } catch {
347
- finalControlMetrics = {
601
+ let finalControlMetrics: SessionMetrics = {
602
+ durationMs: 0,
603
+ inputTokens: 0,
604
+ outputTokens: 0,
605
+ totalTokens: 0,
606
+ estimatedCost: 0,
607
+ toolCallCount: 0,
608
+ };
609
+ let finalVariantMetrics: SessionMetrics = {
610
+ durationMs: 0,
611
+ inputTokens: 0,
612
+ outputTokens: 0,
613
+ totalTokens: 0,
614
+ estimatedCost: 0,
615
+ toolCallCount: 0,
616
+ };
617
+
618
+ // Helper to run a session and fetch metrics
619
+ const runSession = async (
620
+ sessionId: string,
621
+ model: string,
622
+ setState: typeof setControlState,
623
+ onContentUpdate: (content: string) => void,
624
+ ): Promise<{ response: string; metrics: SessionMetrics }> => {
625
+ try {
626
+ const response = await sendMessageAndCollect(
627
+ sessionId,
628
+ firstMessage,
629
+ onContentUpdate,
630
+ );
631
+
632
+ const duration = Date.now() - startTime;
633
+
634
+ // Poll metrics until they stabilize or we hit a max wait window.
635
+ const fetchMetricsWithRetry = async (): Promise<SessionMetrics> => {
636
+ const maxWaitMs = 60_000;
637
+ const pollIntervalMs = 2_000;
638
+ let elapsed = 0;
639
+ let previousTokens = -1;
640
+ let previousTools = -1;
641
+ let lastMetrics: SessionMetrics | null = null;
642
+
643
+ while (elapsed <= maxWaitMs) {
644
+ try {
645
+ const metricsRes = await fetch(
646
+ `/api/session-metrics/${sessionId}?model=${encodeURIComponent(model)}`,
647
+ );
648
+ const metrics = await metricsRes.json();
649
+ lastMetrics = { ...metrics, durationMs: duration };
650
+
651
+ // If tokens/tool calls stopped changing and we have data, treat as final.
652
+ if (
653
+ metrics.totalTokens > 0 &&
654
+ metrics.totalTokens === previousTokens &&
655
+ metrics.toolCallCount === previousTools
656
+ ) {
657
+ return lastMetrics!;
658
+ }
659
+
660
+ previousTokens = metrics.totalTokens ?? 0;
661
+ previousTools = metrics.toolCallCount ?? 0;
662
+ } catch {
663
+ // swallow and retry
664
+ }
665
+
666
+ await new Promise((r) => setTimeout(r, pollIntervalMs));
667
+ elapsed += pollIntervalMs;
668
+ }
669
+
670
+ // Return whatever we last saw (or zeros if nothing ever arrived)
671
+ return (
672
+ lastMetrics ?? {
348
673
  durationMs: duration,
349
674
  inputTokens: 0,
350
675
  outputTokens: 0,
351
676
  totalTokens: 0,
352
677
  estimatedCost: 0,
353
678
  toolCallCount: 0,
354
- };
355
- setControlState((prev) => ({
356
- ...prev,
357
- isStreaming: false,
358
- metrics: finalControlMetrics,
359
- }));
360
- }
361
- })
362
- .catch((err) => {
363
- setControlState((prev) => ({
364
- ...prev,
365
- isStreaming: false,
366
- error: err.message,
367
- }));
368
- }),
679
+ }
680
+ );
681
+ };
682
+
683
+ const metrics = await fetchMetricsWithRetry();
369
684
 
370
- // Variant session
371
- sendMessageAndCollect(variantSessionId, firstMessage, (content) => {
372
- finalVariantResponse = content;
373
- setVariantState((prev) => ({
685
+ setState((prev) => ({
374
686
  ...prev,
375
- messages: [
376
- { role: "user", content: firstMessage },
377
- { role: "assistant", content },
378
- ],
687
+ isStreaming: false,
688
+ metrics,
379
689
  }));
380
- })
381
- .then(async () => {
382
- const duration = Date.now() - startTime;
383
- // Wait for telemetry data to be written to the database
384
- await new Promise((r) => setTimeout(r, 2000));
385
- // Fetch metrics - use variant model for cost calculation
386
- const variantModel =
387
- config.variantModel ||
388
- config.controlModel ||
389
- "claude-sonnet-4-5-20250929";
390
- try {
391
- const metricsRes = await fetch(
392
- `/api/session-metrics/${variantSessionId}?model=${encodeURIComponent(variantModel)}`,
393
- );
394
- const metrics = await metricsRes.json();
395
- metrics.durationMs = duration;
396
- finalVariantMetrics = metrics;
397
- setVariantState((prev) => ({
398
- ...prev,
399
- isStreaming: false,
400
- metrics,
401
- }));
402
- } catch {
403
- finalVariantMetrics = {
404
- durationMs: duration,
405
- inputTokens: 0,
406
- outputTokens: 0,
407
- totalTokens: 0,
408
- estimatedCost: 0,
409
- toolCallCount: 0,
410
- };
411
- setVariantState((prev) => ({
412
- ...prev,
413
- isStreaming: false,
414
- metrics: finalVariantMetrics,
415
- }));
416
- }
417
- })
418
- .catch((err) => {
690
+
691
+ return { response, metrics };
692
+ } catch (err) {
693
+ setState((prev) => ({
694
+ ...prev,
695
+ isStreaming: false,
696
+ error: err instanceof Error ? err.message : "Unknown error",
697
+ }));
698
+ return {
699
+ response: "",
700
+ metrics: {
701
+ durationMs: 0,
702
+ inputTokens: 0,
703
+ outputTokens: 0,
704
+ totalTokens: 0,
705
+ estimatedCost: 0,
706
+ toolCallCount: 0,
707
+ },
708
+ };
709
+ }
710
+ };
711
+
712
+ const controlModel = config.controlModel || "claude-sonnet-4-5-20250929";
713
+ const variantModel =
714
+ config.variantModel ||
715
+ config.controlModel ||
716
+ "claude-sonnet-4-5-20250929";
717
+
718
+ const [controlResult, variantResult] = await Promise.all([
719
+ runSession(
720
+ controlSessionId,
721
+ controlModel,
722
+ setControlState,
723
+ (content) => {
724
+ setControlState((prev) => ({
725
+ ...prev,
726
+ messages: [
727
+ { role: "user", content: firstMessage },
728
+ { role: "assistant", content },
729
+ ],
730
+ }));
731
+ },
732
+ ),
733
+ runSession(
734
+ variantSessionId,
735
+ variantModel,
736
+ setVariantState,
737
+ (content) => {
419
738
  setVariantState((prev) => ({
420
739
  ...prev,
421
- isStreaming: false,
422
- error: err.message,
740
+ messages: [
741
+ { role: "user", content: firstMessage },
742
+ { role: "assistant", content },
743
+ ],
423
744
  }));
424
- }),
745
+ },
746
+ ),
425
747
  ]);
426
748
 
749
+ finalControlMetrics = controlResult.metrics;
750
+ finalVariantMetrics = variantResult.metrics;
751
+
427
752
  // Update run status with responses and metrics
428
753
  await fetch(`/api/comparison-run/${runId}/update`, {
429
754
  method: "POST",
@@ -432,8 +757,8 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
432
757
  status: "completed",
433
758
  controlMetrics: finalControlMetrics,
434
759
  variantMetrics: finalVariantMetrics,
435
- controlResponse: finalControlResponse,
436
- variantResponse: finalVariantResponse,
760
+ controlResponse: controlResult.response,
761
+ variantResponse: variantResult.response,
437
762
  }),
438
763
  });
439
764
  } catch (err) {
@@ -443,6 +768,91 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
443
768
  }
444
769
  }, [run, config, runId]);
445
770
 
771
+ // Function to fetch existing or trigger new session analysis
772
+ const triggerAnalysis = useCallback(
773
+ async (sessionId: string, type: "control" | "variant") => {
774
+ const setLoading =
775
+ type === "control"
776
+ ? setControlAnalysisLoading
777
+ : setVariantAnalysisLoading;
778
+ const setAnalysis =
779
+ type === "control" ? setControlAnalysis : setVariantAnalysis;
780
+
781
+ setLoading(true);
782
+ try {
783
+ // First try to fetch existing analysis from cache
784
+ const existingRes = await fetch(
785
+ `/api/session-analyses?sessionId=${sessionId}`,
786
+ );
787
+ if (existingRes.ok) {
788
+ const existingAnalysis = await existingRes.json();
789
+ if (existingAnalysis && !existingAnalysis.error) {
790
+ setAnalysis(existingAnalysis);
791
+ setAnalysisExpanded((prev) => ({ ...prev, [type]: true }));
792
+ return;
793
+ }
794
+ }
795
+
796
+ // No existing analysis, trigger new one
797
+ const res = await fetch(`/api/analyze-session/${sessionId}`, {
798
+ method: "POST",
799
+ });
800
+ if (res.ok) {
801
+ const analysis = await res.json();
802
+ setAnalysis(analysis);
803
+ // Auto-expand when analysis completes
804
+ setAnalysisExpanded((prev) => ({ ...prev, [type]: true }));
805
+ }
806
+ } catch (err) {
807
+ console.error(`Failed to analyze ${type} session:`, err);
808
+ } finally {
809
+ setLoading(false);
810
+ }
811
+ },
812
+ [],
813
+ );
814
+
815
+ // Auto-trigger analysis when sessions complete
816
+ useEffect(() => {
817
+ // Control session completed
818
+ if (
819
+ controlState.sessionId &&
820
+ !controlState.isStreaming &&
821
+ controlState.metrics &&
822
+ !controlAnalysis &&
823
+ !controlAnalysisLoading
824
+ ) {
825
+ triggerAnalysis(controlState.sessionId, "control");
826
+ }
827
+ }, [
828
+ controlState.sessionId,
829
+ controlState.isStreaming,
830
+ controlState.metrics,
831
+ controlAnalysis,
832
+ controlAnalysisLoading,
833
+ triggerAnalysis,
834
+ ]);
835
+
836
+ useEffect(() => {
837
+ // Variant session completed
838
+ if (
839
+ variantState.sessionId &&
840
+ !variantState.isStreaming &&
841
+ variantState.metrics &&
842
+ !variantAnalysis &&
843
+ !variantAnalysisLoading
844
+ ) {
845
+ triggerAnalysis(variantState.sessionId, "variant");
846
+ }
847
+ }, [
848
+ variantState.sessionId,
849
+ variantState.isStreaming,
850
+ variantState.metrics,
851
+ variantAnalysis,
852
+ variantAnalysisLoading,
853
+ triggerAnalysis,
854
+ ]);
855
+
446
856
  if (loading) {
447
857
  return (
448
858
  <DebuggerLayout title="Comparison" showBackButton backHref="/town-hall">
@@ -464,31 +874,49 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
464
874
  }
465
875
 
466
876
  const getControlDimensionLabel = () => {
467
- if (!config) return "";
468
- switch (config.dimension) {
469
- case "model":
470
- return `Model: ${config.controlModel || "unknown"}`;
471
- case "system_prompt":
472
- return "System Prompt (original)";
473
- case "tools":
474
- return "Tools (original)";
475
- default:
476
- return "";
877
+ if (!config || !config.dimensions || config.dimensions.length === 0)
878
+ return "";
879
+ const labels: string[] = [];
880
+ for (const dim of config.dimensions) {
881
+ switch (dim) {
882
+ case "model":
883
+ labels.push(`Model: ${config.controlModel || "original"}`);
884
+ break;
885
+ case "system_prompt":
886
+ labels.push("System Prompt: original");
887
+ break;
888
+ case "tools":
889
+ labels.push("Tools: original");
890
+ break;
891
+ }
477
892
  }
893
+ return labels.join(" | ");
478
894
  };
479
895
 
480
896
  const getDimensionLabel = () => {
481
- if (!config) return "";
482
- switch (config.dimension) {
483
- case "model":
484
- return `Model: ${config.variantModel}`;
485
- case "system_prompt":
486
- return "System Prompt (modified)";
487
- case "tools":
488
- return `Tools: ${config.variantTools?.join(", ")}`;
489
- default:
490
- return "";
897
+ if (!config || !config.dimensions || config.dimensions.length === 0)
898
+ return "";
899
+ const labels: string[] = [];
900
+ for (const dim of config.dimensions) {
901
+ switch (dim) {
902
+ case "model":
903
+ labels.push(`Model: ${config.variantModel}`);
904
+ break;
905
+ case "system_prompt":
906
+ labels.push("System Prompt: modified");
907
+ break;
908
+ case "tools":
909
+ labels.push(`Tools: ${config.variantTools?.join(", ")}`);
910
+ break;
911
+ }
491
912
  }
913
+ return labels.join(" | ");
914
+ };
915
+
916
+ const getDimensionsSummary = () => {
917
+ if (!config || !config.dimensions || config.dimensions.length === 0)
918
+ return "";
919
+ return config.dimensions.map((d) => d.replace("_", " ")).join(", ");
492
920
  };
493
921
 
494
922
  return (
@@ -499,8 +927,7 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
499
927
  <div>
500
928
  <h2 className="text-lg font-semibold">A/B Comparison</h2>
501
929
  <p className="text-sm text-muted-foreground">
502
- Comparing: {config?.dimension?.replace("_", " ")} -{" "}
503
- {getDimensionLabel()}
930
+ Comparing: {getDimensionsSummary()}
504
931
  </p>
505
932
  </div>
506
933
  {!hasRun && (
@@ -592,27 +1019,34 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
592
1019
  </div>
593
1020
  )}
594
1021
  </CardContent>
595
- {/* Metrics */}
1022
+ {/* Session Analysis & Tool Calls */}
596
1023
  {controlState.metrics && (
597
- <div className="border-t p-3 shrink-0 bg-muted/50">
598
- <div className="grid grid-cols-4 gap-2 text-xs">
599
- <div>
600
- <span className="text-muted-foreground">Duration:</span>{" "}
601
- {formatDuration(controlState.metrics.durationMs)}
602
- </div>
603
- <div>
604
- <span className="text-muted-foreground">Tokens:</span>{" "}
605
- {formatTokens(controlState.metrics.totalTokens)}
606
- </div>
607
- <div>
608
- <span className="text-muted-foreground">Cost:</span>{" "}
609
- {formatCost(controlState.metrics.estimatedCost)}
610
- </div>
611
- <div>
612
- <span className="text-muted-foreground">Tools:</span>{" "}
613
- {controlState.metrics.toolCallCount}
614
- </div>
615
- </div>
1024
+ <div className="border-t p-3 shrink-0 bg-muted/50 space-y-3">
1025
+ {/* Session Analysis */}
1026
+ <SessionAnalysisPanel
1027
+ analysis={controlAnalysis}
1028
+ isLoading={controlAnalysisLoading}
1029
+ isExpanded={analysisExpanded.control}
1030
+ onToggle={() =>
1031
+ setAnalysisExpanded((prev) => ({
1032
+ ...prev,
1033
+ control: !prev.control,
1034
+ }))
1035
+ }
1036
+ accentColor="blue"
1037
+ />
1038
+ {/* Tool Calls */}
1039
+ <ToolCallsPanel
1040
+ toolCalls={controlState.metrics.toolCalls}
1041
+ isExpanded={toolCallsExpanded.control}
1042
+ onToggle={() =>
1043
+ setToolCallsExpanded((prev) => ({
1044
+ ...prev,
1045
+ control: !prev.control,
1046
+ }))
1047
+ }
1048
+ accentColor="blue"
1049
+ />
616
1050
  </div>
617
1051
  )}
618
1052
  </Card>
@@ -653,27 +1087,34 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
653
1087
  </div>
654
1088
  )}
655
1089
  </CardContent>
656
- {/* Metrics */}
1090
+ {/* Session Analysis & Tool Calls */}
657
1091
  {variantState.metrics && (
658
- <div className="border-t p-3 shrink-0 bg-muted/50">
659
- <div className="grid grid-cols-4 gap-2 text-xs">
660
- <div>
661
- <span className="text-muted-foreground">Duration:</span>{" "}
662
- {formatDuration(variantState.metrics.durationMs)}
663
- </div>
664
- <div>
665
- <span className="text-muted-foreground">Tokens:</span>{" "}
666
- {formatTokens(variantState.metrics.totalTokens)}
667
- </div>
668
- <div>
669
- <span className="text-muted-foreground">Cost:</span>{" "}
670
- {formatCost(variantState.metrics.estimatedCost)}
671
- </div>
672
- <div>
673
- <span className="text-muted-foreground">Tools:</span>{" "}
674
- {variantState.metrics.toolCallCount}
675
- </div>
676
- </div>
1092
+ <div className="border-t p-3 shrink-0 bg-muted/50 space-y-3">
1093
+ {/* Session Analysis */}
1094
+ <SessionAnalysisPanel
1095
+ analysis={variantAnalysis}
1096
+ isLoading={variantAnalysisLoading}
1097
+ isExpanded={analysisExpanded.variant}
1098
+ onToggle={() =>
1099
+ setAnalysisExpanded((prev) => ({
1100
+ ...prev,
1101
+ variant: !prev.variant,
1102
+ }))
1103
+ }
1104
+ accentColor="orange"
1105
+ />
1106
+ {/* Tool Calls */}
1107
+ <ToolCallsPanel
1108
+ toolCalls={variantState.metrics.toolCalls}
1109
+ isExpanded={toolCallsExpanded.variant}
1110
+ onToggle={() =>
1111
+ setToolCallsExpanded((prev) => ({
1112
+ ...prev,
1113
+ variant: !prev.variant,
1114
+ }))
1115
+ }
1116
+ accentColor="orange"
1117
+ />
677
1118
  </div>
678
1119
  )}
679
1120
  </Card>