@townco/debugger 0.1.29 → 0.1.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@townco/debugger",
3
- "version": "0.1.29",
3
+ "version": "0.1.30",
4
4
  "type": "module",
5
5
  "engines": {
6
6
  "bun": ">=1.3.0"
@@ -22,8 +22,8 @@
22
22
  "@radix-ui/react-select": "^2.2.6",
23
23
  "@radix-ui/react-slot": "^1.2.3",
24
24
  "@radix-ui/react-tabs": "^1.1.0",
25
- "@townco/otlp-server": "0.1.29",
26
- "@townco/ui": "0.1.74",
25
+ "@townco/otlp-server": "0.1.30",
26
+ "@townco/ui": "0.1.75",
27
27
  "bun-plugin-tailwind": "^0.1.2",
28
28
  "class-variance-authority": "^0.7.1",
29
29
  "clsx": "^2.1.1",
@@ -35,7 +35,7 @@
35
35
  "zod": "^4.1.13"
36
36
  },
37
37
  "devDependencies": {
38
- "@townco/tsconfig": "0.1.71",
38
+ "@townco/tsconfig": "0.1.72",
39
39
  "@types/bun": "latest",
40
40
  "@types/react": "^19",
41
41
  "@types/react-dom": "^19",
@@ -5,6 +5,8 @@
5
5
  import Anthropic from "@anthropic-ai/sdk";
6
6
  import { LLMAnalysisOutputSchema, SessionAnalysisSchema } from "./schema";
7
7
  import type {
8
+ AnalysisMetrics,
9
+ DetailedToolCall,
8
10
  LLMAnalysisOutput,
9
11
  PreComputedFields,
10
12
  SessionAnalysis,
@@ -168,12 +170,23 @@ function extractJSON(text: string): string {
168
170
  return text.trim();
169
171
  }
170
172
 
173
+ /**
174
+ * Options for session analysis
175
+ */
176
+ export interface AnalyzeSessionOptions {
177
+ session: StoredSession;
178
+ metrics?: AnalysisMetrics;
179
+ toolCalls?: DetailedToolCall[];
180
+ }
181
+
171
182
  /**
172
183
  * Analyze a session using Claude
173
184
  */
174
185
  export async function analyzeSession(
175
- session: StoredSession,
186
+ options: AnalyzeSessionOptions,
176
187
  ): Promise<SessionAnalysis> {
188
+ const { session, metrics, toolCalls } = options;
189
+
177
190
  // 1. Pre-compute extractable fields
178
191
  const preComputed = extractPreComputedFields(session);
179
192
 
@@ -206,6 +219,19 @@ export async function analyzeSession(
206
219
  const parsed = JSON.parse(jsonText);
207
220
  const llmOutput = LLMAnalysisOutputSchema.parse(parsed);
208
221
 
222
+ // Use provided metrics or create defaults
223
+ const sessionDurationMs = metrics?.durationMs ?? calculateDurationMs(session);
224
+ const analysisMetrics: AnalysisMetrics = metrics ?? {
225
+ inputTokens: 0,
226
+ outputTokens: 0,
227
+ totalTokens: 0,
228
+ estimatedCost: 0,
229
+ durationMs: sessionDurationMs,
230
+ };
231
+
232
+ // Use provided tool calls or empty array
233
+ const detailedToolCalls: DetailedToolCall[] = toolCalls ?? [];
234
+
209
235
  // 6. Combine pre-computed and LLM data
210
236
  const analysis: SessionAnalysis = {
211
237
  session_id: session.sessionId,
@@ -222,14 +248,25 @@ export async function analyzeSession(
222
248
  tools_used: preComputed.toolsUsed,
223
249
  num_steps: preComputed.numSteps,
224
250
  num_tool_calls: preComputed.numToolCalls,
251
+ tool_calls: detailedToolCalls,
225
252
  },
226
253
  outcome: {
227
254
  status: llmOutput.status,
228
255
  answer_type: llmOutput.answer_type,
229
256
  assessment: llmOutput.assessment,
230
257
  },
258
+ metrics: analysisMetrics,
231
259
  };
232
260
 
233
261
  // 7. Validate final schema
234
262
  return SessionAnalysisSchema.parse(analysis);
235
263
  }
264
+
265
+ /**
266
+ * Calculate duration from session timestamps
267
+ */
268
+ function calculateDurationMs(session: StoredSession): number {
269
+ const startTime = new Date(session.metadata.createdAt).getTime();
270
+ const endTime = new Date(session.metadata.updatedAt).getTime();
271
+ return endTime - startTime;
272
+ }
@@ -40,6 +40,28 @@ export const LLMAnalysisOutputSchema = z.object({
40
40
  .describe("Explanation of why the status and answer_type were chosen"),
41
41
  });
42
42
 
43
+ /**
44
+ * Detailed tool call schema
45
+ */
46
+ export const DetailedToolCallSchema = z.object({
47
+ name: z.string(),
48
+ input: z.unknown(),
49
+ output: z.unknown(),
50
+ startTimeUnixNano: z.number().optional(),
51
+ endTimeUnixNano: z.number().optional(),
52
+ });
53
+
54
+ /**
55
+ * Metrics schema
56
+ */
57
+ export const AnalysisMetricsSchema = z.object({
58
+ inputTokens: z.number(),
59
+ outputTokens: z.number(),
60
+ totalTokens: z.number(),
61
+ estimatedCost: z.number(),
62
+ durationMs: z.number(),
63
+ });
64
+
43
65
  /**
44
66
  * Complete session analysis schema
45
67
  */
@@ -58,10 +80,12 @@ export const SessionAnalysisSchema = z.object({
58
80
  tools_used: z.array(z.string()),
59
81
  num_steps: z.number(),
60
82
  num_tool_calls: z.number(),
83
+ tool_calls: z.array(DetailedToolCallSchema),
61
84
  }),
62
85
  outcome: z.object({
63
86
  status: OutcomeStatusSchema,
64
87
  answer_type: AnswerTypeSchema,
65
88
  assessment: z.string(),
66
89
  }),
90
+ metrics: AnalysisMetricsSchema,
67
91
  });
@@ -80,6 +80,28 @@ export enum OutcomeStatus {
80
80
  PARTIAL_SUCCESS = "PARTIAL_SUCCESS", // Some goals achieved
81
81
  }
82
82
 
83
+ /**
84
+ * Detailed tool call with input/output
85
+ */
86
+ export interface DetailedToolCall {
87
+ name: string;
88
+ input: unknown;
89
+ output: unknown;
90
+ startTimeUnixNano?: number | undefined;
91
+ endTimeUnixNano?: number | undefined;
92
+ }
93
+
94
+ /**
95
+ * Session metrics (tokens, cost, etc.)
96
+ */
97
+ export interface AnalysisMetrics {
98
+ inputTokens: number;
99
+ outputTokens: number;
100
+ totalTokens: number;
101
+ estimatedCost: number;
102
+ durationMs: number;
103
+ }
104
+
83
105
  /**
84
106
  * Complete session analysis result
85
107
  */
@@ -100,6 +122,7 @@ export interface SessionAnalysis {
100
122
  tools_used: string[]; // Pre-computed from tool calls
101
123
  num_steps: number; // Pre-computed (count assistant messages)
102
124
  num_tool_calls: number; // Pre-computed
125
+ tool_calls: DetailedToolCall[]; // Detailed tool call info with args/results
103
126
  };
104
127
 
105
128
  outcome: {
@@ -107,6 +130,8 @@ export interface SessionAnalysis {
107
130
  answer_type: AnswerType; // LLM-selected from enum
108
131
  assessment: string; // LLM-generated explanation of status and answer_type
109
132
  };
133
+
134
+ metrics: AnalysisMetrics; // Token counts, cost, duration
110
135
  }
111
136
 
112
137
  /**
@@ -1,4 +1,5 @@
1
- import type { SessionAnalysis } from "../analysis/types";
1
+ import type { DetailedToolCall, SessionAnalysis } from "../analysis/types";
2
+ import { formatCost, formatDuration, formatTokens } from "../lib/metrics";
2
3
  import { Dialog, DialogContent, DialogHeader, DialogTitle } from "./ui/dialog";
3
4
 
4
5
  interface Props {
@@ -11,22 +12,9 @@ function formatDate(isoString: string): string {
11
12
  return new Date(isoString).toLocaleString();
12
13
  }
13
14
 
14
- function calculateDuration(start: string, end: string): string {
15
- const startTime = new Date(start).getTime();
16
- const endTime = new Date(end).getTime();
17
- const durationMs = endTime - startTime;
18
-
19
- const seconds = Math.floor(durationMs / 1000);
20
- const minutes = Math.floor(seconds / 60);
21
- const hours = Math.floor(minutes / 60);
22
-
23
- if (hours > 0) {
24
- return `${hours}h ${minutes % 60}m ${seconds % 60}s`;
25
- }
26
- if (minutes > 0) {
27
- return `${minutes}m ${seconds % 60}s`;
28
- }
29
- return `${seconds}s`;
15
+ function formatToolTime(ns?: number): string {
16
+ if (!ns) return "";
17
+ return new Date(ns / 1_000_000).toLocaleTimeString();
30
18
  }
31
19
 
32
20
  function Section({
@@ -75,6 +63,49 @@ function Metric({ label, value }: { label: string; value: number }) {
75
63
  );
76
64
  }
77
65
 
66
+ function ToolCallDetails({ toolCalls }: { toolCalls: DetailedToolCall[] }) {
67
+ if (!toolCalls || toolCalls.length === 0) {
68
+ return <div className="text-xs text-muted-foreground">No tool calls</div>;
69
+ }
70
+
71
+ return (
72
+ <div className="space-y-2">
73
+ {toolCalls.map((call, idx) => (
74
+ <details
75
+ key={`${call.name}-${call.startTimeUnixNano ?? idx}`}
76
+ className="rounded-md border px-3 py-2 bg-muted/50"
77
+ >
78
+ <summary className="text-xs font-medium cursor-pointer flex items-center justify-between">
79
+ <span>
80
+ {call.name}{" "}
81
+ {call.startTimeUnixNano ? (
82
+ <span className="text-muted-foreground">
83
+ @ {formatToolTime(call.startTimeUnixNano)}
84
+ </span>
85
+ ) : null}
86
+ </span>
87
+ <span className="text-muted-foreground text-[11px]">view</span>
88
+ </summary>
89
+ <div className="mt-2 text-[11px] space-y-1 break-words">
90
+ <div>
91
+ <span className="font-semibold">Args:</span>{" "}
92
+ <pre className="break-words whitespace-pre-wrap bg-muted rounded p-2 mt-1 overflow-x-auto max-h-40">
93
+ {JSON.stringify(call.input, null, 2)}
94
+ </pre>
95
+ </div>
96
+ <div>
97
+ <span className="font-semibold">Result:</span>{" "}
98
+ <pre className="break-words whitespace-pre-wrap bg-muted rounded p-2 mt-1 overflow-x-auto max-h-40">
99
+ {JSON.stringify(call.output, null, 2)}
100
+ </pre>
101
+ </div>
102
+ </div>
103
+ </details>
104
+ ))}
105
+ </div>
106
+ );
107
+ }
108
+
78
109
  export function SessionAnalysisDialog({ open, onClose, analysis }: Props) {
79
110
  return (
80
111
  <Dialog open={open} onOpenChange={onClose}>
@@ -121,6 +152,16 @@ export function SessionAnalysisDialog({ open, onClose, analysis }: Props) {
121
152
  </div>
122
153
  </div>
123
154
  )}
155
+ {/* Detailed Tool Calls */}
156
+ {analysis.trajectory.tool_calls &&
157
+ analysis.trajectory.tool_calls.length > 0 && (
158
+ <div className="space-y-2 pt-2">
159
+ <div className="text-xs font-medium text-muted-foreground">
160
+ Tool Call Details
161
+ </div>
162
+ <ToolCallDetails toolCalls={analysis.trajectory.tool_calls} />
163
+ </div>
164
+ )}
124
165
  </Section>
125
166
 
126
167
  {/* Outcome Section */}
@@ -132,16 +173,60 @@ export function SessionAnalysisDialog({ open, onClose, analysis }: Props) {
132
173
  <Field label="Assessment" value={analysis.outcome.assessment} />
133
174
  </Section>
134
175
 
176
+ {/* Metrics Section */}
177
+ {analysis.metrics && (
178
+ <Section title="Metrics">
179
+ <div className="grid grid-cols-5 gap-4">
180
+ <div className="space-y-1">
181
+ <div className="text-xs font-medium text-muted-foreground">
182
+ Duration
183
+ </div>
184
+ <div className="text-lg font-semibold">
185
+ {formatDuration(analysis.metrics.durationMs)}
186
+ </div>
187
+ </div>
188
+ <div className="space-y-1">
189
+ <div className="text-xs font-medium text-muted-foreground">
190
+ Input Tokens
191
+ </div>
192
+ <div className="text-lg font-semibold">
193
+ {formatTokens(analysis.metrics.inputTokens)}
194
+ </div>
195
+ </div>
196
+ <div className="space-y-1">
197
+ <div className="text-xs font-medium text-muted-foreground">
198
+ Output Tokens
199
+ </div>
200
+ <div className="text-lg font-semibold">
201
+ {formatTokens(analysis.metrics.outputTokens)}
202
+ </div>
203
+ </div>
204
+ <div className="space-y-1">
205
+ <div className="text-xs font-medium text-muted-foreground">
206
+ Total Tokens
207
+ </div>
208
+ <div className="text-lg font-semibold">
209
+ {formatTokens(analysis.metrics.totalTokens)}
210
+ </div>
211
+ </div>
212
+ <div className="space-y-1">
213
+ <div className="text-xs font-medium text-muted-foreground">
214
+ Estimated Cost
215
+ </div>
216
+ <div className="text-lg font-semibold text-green-600 dark:text-green-400">
217
+ {formatCost(analysis.metrics.estimatedCost)}
218
+ </div>
219
+ </div>
220
+ </div>
221
+ </Section>
222
+ )}
223
+
135
224
  {/* Metadata Section */}
136
225
  <Section title="Metadata">
137
226
  <div className="grid grid-cols-2 gap-4">
138
227
  <Field label="Started" value={formatDate(analysis.started_at)} />
139
228
  <Field label="Ended" value={formatDate(analysis.ended_at)} />
140
229
  </div>
141
- <Field
142
- label="Duration"
143
- value={calculateDuration(analysis.started_at, analysis.ended_at)}
144
- />
145
230
  <Field label="Agent" value={analysis.agent_name} />
146
231
  <Field
147
232
  label="Session ID"
@@ -136,10 +136,15 @@ export function extractMetricsFromSpans(
136
136
  const totalTokens = inputTokens + outputTokens;
137
137
  const estimatedCost = calculateCost(model, inputTokens, outputTokens);
138
138
  // Dedupe tool calls using name + start time to avoid double counting when captured in multiple places
139
+ // Prefer entries with actual output over entries with null output
139
140
  const deduped = new Map<string, ToolCall>();
140
141
  for (const call of toolCalls) {
141
142
  const key = `${call.name}-${call.startTimeUnixNano ?? ""}`;
142
- if (!deduped.has(key)) {
143
+ const existing = deduped.get(key);
144
+ if (!existing) {
145
+ deduped.set(key, call);
146
+ } else if (existing.output == null && call.output != null) {
147
+ // Replace null-output entry with one that has actual output
143
148
  deduped.set(key, call);
144
149
  }
145
150
  }
@@ -163,8 +168,10 @@ export function extractSessionMetrics(
163
168
  spans: Span[],
164
169
  model: string,
165
170
  ): SessionMetrics {
166
- // Calculate total duration from traces
167
- let minStartTime = Number.MAX_SAFE_INTEGER;
171
+ // Calculate total duration from traces first
172
+ // Note: Using Infinity instead of Number.MAX_SAFE_INTEGER because nanosecond
173
+ // timestamps exceed MAX_SAFE_INTEGER and JS number comparison doesn't work correctly
174
+ let minStartTime = Infinity;
168
175
  let maxEndTime = 0;
169
176
 
170
177
  for (const trace of traces) {
@@ -176,10 +183,27 @@ export function extractSessionMetrics(
176
183
  }
177
184
  }
178
185
 
179
- const durationMs =
180
- minStartTime < Number.MAX_SAFE_INTEGER
181
- ? (maxEndTime - minStartTime) / 1_000_000
182
- : 0;
186
+ let durationMs =
187
+ minStartTime < Infinity ? (maxEndTime - minStartTime) / 1_000_000 : 0;
188
+
189
+ // If traces didn't give us duration, calculate from spans as fallback
190
+ if (durationMs === 0 && spans.length > 0) {
191
+ let spanMinStart = Infinity;
192
+ let spanMaxEnd = 0;
193
+
194
+ for (const span of spans) {
195
+ if (span.start_time_unix_nano < spanMinStart) {
196
+ spanMinStart = span.start_time_unix_nano;
197
+ }
198
+ if (span.end_time_unix_nano > spanMaxEnd) {
199
+ spanMaxEnd = span.end_time_unix_nano;
200
+ }
201
+ }
202
+
203
+ if (spanMinStart < Infinity) {
204
+ durationMs = (spanMaxEnd - spanMinStart) / 1_000_000;
205
+ }
206
+ }
183
207
 
184
208
  // Extract token metrics from spans
185
209
  const tokenMetrics = extractMetricsFromSpans(spans, model);
@@ -1,3 +1,4 @@
1
+ import { ChevronDown, ChevronUp, Loader2 } from "lucide-react";
1
2
  import { useCallback, useEffect, useState } from "react";
2
3
  import { Button } from "@/components/ui/button";
3
4
  import {
@@ -7,6 +8,7 @@ import {
7
8
  CardHeader,
8
9
  CardTitle,
9
10
  } from "@/components/ui/card";
11
+ import type { SessionAnalysis } from "../analysis/types";
10
12
  import { DebuggerLayout } from "../components/DebuggerLayout";
11
13
  import { formatCost, formatDuration, formatTokens } from "../lib/metrics";
12
14
  import type { ComparisonConfig, ComparisonRun, SessionMetrics } from "../types";
@@ -33,6 +35,259 @@ const AGENT_SERVER_URL =
33
35
  ? window.location.origin.replace(":4000", ":3100")
34
36
  : "http://localhost:3100";
35
37
 
38
+ // Expandable Session Analysis Panel
39
+ function SessionAnalysisPanel({
40
+ analysis,
41
+ isLoading,
42
+ isExpanded,
43
+ onToggle,
44
+ accentColor,
45
+ }: {
46
+ analysis: SessionAnalysis | null;
47
+ isLoading: boolean;
48
+ isExpanded: boolean;
49
+ onToggle: () => void;
50
+ accentColor: "blue" | "orange";
51
+ }) {
52
+ const colorClasses =
53
+ accentColor === "blue"
54
+ ? "border-blue-200 dark:border-blue-800 bg-blue-50/50 dark:bg-blue-950/30"
55
+ : "border-orange-200 dark:border-orange-800 bg-orange-50/50 dark:bg-orange-950/30";
56
+
57
+ const headerColorClasses =
58
+ accentColor === "blue"
59
+ ? "hover:bg-blue-100/50 dark:hover:bg-blue-900/30"
60
+ : "hover:bg-orange-100/50 dark:hover:bg-orange-900/30";
61
+
62
+ if (isLoading) {
63
+ return (
64
+ <div className={`border rounded-md p-3 ${colorClasses}`}>
65
+ <div className="flex items-center gap-2 text-xs text-muted-foreground">
66
+ <Loader2 className="w-3 h-3 animate-spin" />
67
+ Loading analysis...
68
+ </div>
69
+ </div>
70
+ );
71
+ }
72
+
73
+ if (!analysis) {
74
+ return null;
75
+ }
76
+
77
+ return (
78
+ <div className={`border rounded-md overflow-hidden ${colorClasses}`}>
79
+ <button
80
+ type="button"
81
+ onClick={onToggle}
82
+ className={`w-full px-3 py-2 flex items-center justify-between text-left transition-colors ${headerColorClasses}`}
83
+ >
84
+ <div className="flex items-center gap-2">
85
+ <span className="text-xs font-semibold">Session Analysis</span>
86
+ <span
87
+ className={`text-[10px] px-1.5 py-0.5 rounded ${
88
+ analysis.outcome.status === "SUCCESS"
89
+ ? "bg-green-100 text-green-700 dark:bg-green-900/50 dark:text-green-300"
90
+ : analysis.outcome.status === "FAILURE"
91
+ ? "bg-red-100 text-red-700 dark:bg-red-900/50 dark:text-red-300"
92
+ : "bg-yellow-100 text-yellow-700 dark:bg-yellow-900/50 dark:text-yellow-300"
93
+ }`}
94
+ >
95
+ {analysis.outcome.status}
96
+ </span>
97
+ </div>
98
+ {isExpanded ? (
99
+ <ChevronUp className="w-4 h-4 text-muted-foreground" />
100
+ ) : (
101
+ <ChevronDown className="w-4 h-4 text-muted-foreground" />
102
+ )}
103
+ </button>
104
+
105
+ {isExpanded && (
106
+ <div className="px-3 pb-3 space-y-3 text-xs">
107
+ {/* Task */}
108
+ <div>
109
+ <div className="font-semibold text-muted-foreground mb-1">
110
+ Task Summary
111
+ </div>
112
+ <div className="text-foreground">{analysis.task.task_summary}</div>
113
+ </div>
114
+
115
+ {/* Intent */}
116
+ <div className="flex items-center gap-2">
117
+ <span className="font-semibold text-muted-foreground">Intent:</span>
118
+ <span className="px-2 py-0.5 bg-primary/10 text-primary rounded text-[11px] font-medium">
119
+ {analysis.task.intent_type}
120
+ </span>
121
+ </div>
122
+
123
+ {/* Trajectory */}
124
+ <div>
125
+ <div className="font-semibold text-muted-foreground mb-1">
126
+ High Level Plan
127
+ </div>
128
+ <div className="text-foreground text-[11px] leading-relaxed">
129
+ {analysis.trajectory.high_level_plan}
130
+ </div>
131
+ </div>
132
+
133
+ {/* Outcome */}
134
+ <div>
135
+ <div className="font-semibold text-muted-foreground mb-1">
136
+ Assessment
137
+ </div>
138
+ <div className="text-foreground text-[11px] leading-relaxed">
139
+ {analysis.outcome.assessment}
140
+ </div>
141
+ </div>
142
+
143
+ {/* Answer Type */}
144
+ <div className="flex items-center gap-2">
145
+ <span className="font-semibold text-muted-foreground">
146
+ Answer Type:
147
+ </span>
148
+ <span className="px-2 py-0.5 bg-secondary text-secondary-foreground rounded text-[11px] font-medium">
149
+ {analysis.outcome.answer_type}
150
+ </span>
151
+ </div>
152
+
153
+ {/* Metrics Summary */}
154
+ {analysis.metrics && (
155
+ <div className="grid grid-cols-5 gap-2 pt-2 border-t border-border/50">
156
+ <div>
157
+ <div className="text-[10px] text-muted-foreground">
158
+ Duration
159
+ </div>
160
+ <div className="font-medium">
161
+ {formatDuration(analysis.metrics.durationMs)}
162
+ </div>
163
+ </div>
164
+ <div>
165
+ <div className="text-[10px] text-muted-foreground">Input</div>
166
+ <div className="font-medium">
167
+ {formatTokens(analysis.metrics.inputTokens)}
168
+ </div>
169
+ </div>
170
+ <div>
171
+ <div className="text-[10px] text-muted-foreground">Output</div>
172
+ <div className="font-medium">
173
+ {formatTokens(analysis.metrics.outputTokens)}
174
+ </div>
175
+ </div>
176
+ <div>
177
+ <div className="text-[10px] text-muted-foreground">Total</div>
178
+ <div className="font-medium">
179
+ {formatTokens(analysis.metrics.totalTokens)}
180
+ </div>
181
+ </div>
182
+ <div>
183
+ <div className="text-[10px] text-muted-foreground">Cost</div>
184
+ <div className="font-medium text-green-600 dark:text-green-400">
185
+ {formatCost(analysis.metrics.estimatedCost)}
186
+ </div>
187
+ </div>
188
+ </div>
189
+ )}
190
+ </div>
191
+ )}
192
+ </div>
193
+ );
194
+ }
195
+
196
+ // Collapsible Tool Calls Panel
197
+ function ToolCallsPanel({
198
+ toolCalls,
199
+ isExpanded,
200
+ onToggle,
201
+ accentColor,
202
+ }: {
203
+ toolCalls: SessionMetrics["toolCalls"];
204
+ isExpanded: boolean;
205
+ onToggle: () => void;
206
+ accentColor: "blue" | "orange";
207
+ }) {
208
+ const colorClasses =
209
+ accentColor === "blue"
210
+ ? "border-blue-200 dark:border-blue-800 bg-blue-50/50 dark:bg-blue-950/30"
211
+ : "border-orange-200 dark:border-orange-800 bg-orange-50/50 dark:bg-orange-950/30";
212
+
213
+ const headerColorClasses =
214
+ accentColor === "blue"
215
+ ? "hover:bg-blue-100/50 dark:hover:bg-blue-900/30"
216
+ : "hover:bg-orange-100/50 dark:hover:bg-orange-900/30";
217
+
218
+ const toolCallCount = toolCalls?.length ?? 0;
219
+
220
+ return (
221
+ <div className={`border rounded-md overflow-hidden ${colorClasses}`}>
222
+ <button
223
+ type="button"
224
+ onClick={onToggle}
225
+ className={`w-full px-3 py-2 flex items-center justify-between text-left transition-colors ${headerColorClasses}`}
226
+ >
227
+ <div className="flex items-center gap-2">
228
+ <span className="text-xs font-semibold">Tool Calls</span>
229
+ <span className="text-[10px] px-1.5 py-0.5 rounded bg-secondary text-secondary-foreground">
230
+ {toolCallCount}
231
+ </span>
232
+ </div>
233
+ {isExpanded ? (
234
+ <ChevronUp className="w-4 h-4 text-muted-foreground" />
235
+ ) : (
236
+ <ChevronDown className="w-4 h-4 text-muted-foreground" />
237
+ )}
238
+ </button>
239
+
240
+ {isExpanded && (
241
+ <div className="px-3 pb-3">
242
+ {!toolCalls || toolCalls.length === 0 ? (
243
+ <div className="text-xs text-muted-foreground">No tool calls</div>
244
+ ) : (
245
+ <div className="space-y-2">
246
+ {toolCalls.map((call, idx) => (
247
+ <details
248
+ key={`${call.name}-${call.startTimeUnixNano ?? idx}`}
249
+ className="rounded-md border px-3 py-2 bg-background/50"
250
+ >
251
+ <summary className="text-xs font-medium cursor-pointer flex items-center justify-between">
252
+ <span>
253
+ {call.name}{" "}
254
+ {call.startTimeUnixNano ? (
255
+ <span className="text-muted-foreground">
256
+ @{" "}
257
+ {new Date(
258
+ call.startTimeUnixNano / 1_000_000,
259
+ ).toLocaleTimeString()}
260
+ </span>
261
+ ) : null}
262
+ </span>
263
+ <span className="text-muted-foreground text-[11px]">
264
+ view
265
+ </span>
266
+ </summary>
267
+ <div className="mt-2 text-[11px] space-y-1 break-words">
268
+ <div>
269
+ <span className="font-semibold">Args:</span>{" "}
270
+ <pre className="break-words whitespace-pre-wrap bg-muted rounded p-2 mt-1 overflow-x-auto max-h-40">
271
+ {JSON.stringify(call.input, null, 2)}
272
+ </pre>
273
+ </div>
274
+ <div>
275
+ <span className="font-semibold">Result:</span>{" "}
276
+ <pre className="break-words whitespace-pre-wrap bg-muted rounded p-2 mt-1 overflow-x-auto max-h-40">
277
+ {JSON.stringify(call.output, null, 2)}
278
+ </pre>
279
+ </div>
280
+ </div>
281
+ </details>
282
+ ))}
283
+ </div>
284
+ )}
285
+ </div>
286
+ )}
287
+ </div>
288
+ );
289
+ }
290
+
36
291
  export function ComparisonView({ runId }: ComparisonViewProps) {
37
292
  const [run, setRun] = useState<ComparisonRun | null>(null);
38
293
  const [config, setConfig] = useState<ComparisonConfig | null>(null);
@@ -58,6 +313,28 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
58
313
  const [isRunning, setIsRunning] = useState(false);
59
314
  const [hasRun, setHasRun] = useState(false);
60
315
 
316
+ // Session analysis state
317
+ const [controlAnalysis, setControlAnalysis] =
318
+ useState<SessionAnalysis | null>(null);
319
+ const [variantAnalysis, setVariantAnalysis] =
320
+ useState<SessionAnalysis | null>(null);
321
+ const [controlAnalysisLoading, setControlAnalysisLoading] = useState(false);
322
+ const [variantAnalysisLoading, setVariantAnalysisLoading] = useState(false);
323
+ const [analysisExpanded, setAnalysisExpanded] = useState<{
324
+ control: boolean;
325
+ variant: boolean;
326
+ }>({
327
+ control: false,
328
+ variant: false,
329
+ });
330
+ const [toolCallsExpanded, setToolCallsExpanded] = useState<{
331
+ control: boolean;
332
+ variant: boolean;
333
+ }>({
334
+ control: false,
335
+ variant: false,
336
+ });
337
+
61
338
  // Fetch comparison run details and restore saved messages
62
339
  useEffect(() => {
63
340
  Promise.all([
@@ -99,8 +376,10 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
99
376
  }
100
377
  }
101
378
 
102
- // Fetch the config
103
- return fetch(`/api/comparison-config`).then((res) => res.json());
379
+ // Fetch the config by the run's configId (not the latest config!)
380
+ return fetch(`/api/comparison-config/${runData.configId}`).then((res) =>
381
+ res.json(),
382
+ );
104
383
  })
105
384
  .then((configData) => {
106
385
  setConfig(configData);
@@ -489,6 +768,91 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
489
768
  }
490
769
  }, [run, config, runId]);
491
770
 
771
+ // Function to fetch existing or trigger new session analysis
772
+ const triggerAnalysis = useCallback(
773
+ async (sessionId: string, type: "control" | "variant") => {
774
+ const setLoading =
775
+ type === "control"
776
+ ? setControlAnalysisLoading
777
+ : setVariantAnalysisLoading;
778
+ const setAnalysis =
779
+ type === "control" ? setControlAnalysis : setVariantAnalysis;
780
+
781
+ setLoading(true);
782
+ try {
783
+ // First try to fetch existing analysis from cache
784
+ const existingRes = await fetch(
785
+ `/api/session-analyses?sessionId=${sessionId}`,
786
+ );
787
+ if (existingRes.ok) {
788
+ const existingAnalysis = await existingRes.json();
789
+ if (existingAnalysis && !existingAnalysis.error) {
790
+ setAnalysis(existingAnalysis);
791
+ setAnalysisExpanded((prev) => ({ ...prev, [type]: true }));
792
+ return;
793
+ }
794
+ }
795
+
796
+ // No existing analysis, trigger new one
797
+ const res = await fetch(`/api/analyze-session/${sessionId}`, {
798
+ method: "POST",
799
+ });
800
+ if (res.ok) {
801
+ const analysis = await res.json();
802
+ setAnalysis(analysis);
803
+ // Auto-expand when analysis completes
804
+ setAnalysisExpanded((prev) => ({ ...prev, [type]: true }));
805
+ }
806
+ } catch (err) {
807
+ console.error(`Failed to analyze ${type} session:`, err);
808
+ } finally {
809
+ setLoading(false);
810
+ }
811
+ },
812
+ [],
813
+ );
814
+
815
+ // Auto-trigger analysis when sessions complete
816
+ useEffect(() => {
817
+ // Control session completed
818
+ if (
819
+ controlState.sessionId &&
820
+ !controlState.isStreaming &&
821
+ controlState.metrics &&
822
+ !controlAnalysis &&
823
+ !controlAnalysisLoading
824
+ ) {
825
+ triggerAnalysis(controlState.sessionId, "control");
826
+ }
827
+ }, [
828
+ controlState.sessionId,
829
+ controlState.isStreaming,
830
+ controlState.metrics,
831
+ controlAnalysis,
832
+ controlAnalysisLoading,
833
+ triggerAnalysis,
834
+ ]);
835
+
836
+ useEffect(() => {
837
+ // Variant session completed
838
+ if (
839
+ variantState.sessionId &&
840
+ !variantState.isStreaming &&
841
+ variantState.metrics &&
842
+ !variantAnalysis &&
843
+ !variantAnalysisLoading
844
+ ) {
845
+ triggerAnalysis(variantState.sessionId, "variant");
846
+ }
847
+ }, [
848
+ variantState.sessionId,
849
+ variantState.isStreaming,
850
+ variantState.metrics,
851
+ variantAnalysis,
852
+ variantAnalysisLoading,
853
+ triggerAnalysis,
854
+ ]);
855
+
492
856
  if (loading) {
493
857
  return (
494
858
  <DebuggerLayout title="Comparison" showBackButton backHref="/town-hall">
@@ -555,54 +919,6 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
555
919
  return config.dimensions.map((d) => d.replace("_", " ")).join(", ");
556
920
  };
557
921
 
558
- const formatToolTime = (ns?: number) => {
559
- if (!ns) return "";
560
- return new Date(ns / 1_000_000).toLocaleTimeString();
561
- };
562
-
563
- const renderToolCalls = (toolCalls?: SessionMetrics["toolCalls"]) => {
564
- if (!toolCalls || toolCalls.length === 0) {
565
- return <div className="text-xs text-muted-foreground">No tool calls</div>;
566
- }
567
-
568
- return (
569
- <div className="space-y-2">
570
- {toolCalls.map((call, idx) => (
571
- <details
572
- key={`${call.name}-${call.startTimeUnixNano ?? idx}`}
573
- className="rounded-md border px-3 py-2 bg-muted/50"
574
- >
575
- <summary className="text-xs font-medium cursor-pointer flex items-center justify-between">
576
- <span>
577
- {call.name}{" "}
578
- {call.startTimeUnixNano ? (
579
- <span className="text-muted-foreground">
580
- @ {formatToolTime(call.startTimeUnixNano)}
581
- </span>
582
- ) : null}
583
- </span>
584
- <span className="text-muted-foreground text-[11px]">view</span>
585
- </summary>
586
- <div className="mt-2 text-[11px] space-y-1 break-words">
587
- <div>
588
- <span className="font-semibold">Args:</span>{" "}
589
- <code className="break-words">
590
- {JSON.stringify(call.input, null, 2)}
591
- </code>
592
- </div>
593
- <div>
594
- <span className="font-semibold">Result:</span>{" "}
595
- <code className="break-words">
596
- {JSON.stringify(call.output, null, 2)}
597
- </code>
598
- </div>
599
- </div>
600
- </details>
601
- ))}
602
- </div>
603
- );
604
- };
605
-
606
922
  return (
607
923
  <DebuggerLayout title="Comparison" showBackButton backHref="/town-hall">
608
924
  <div className="container mx-auto p-4 h-[calc(100vh-4rem)] flex flex-col overflow-hidden">
@@ -703,33 +1019,34 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
703
1019
  </div>
704
1020
  )}
705
1021
  </CardContent>
706
- {/* Metrics */}
1022
+ {/* Session Analysis & Tool Calls */}
707
1023
  {controlState.metrics && (
708
- <div className="border-t p-3 shrink-0 bg-muted/50">
709
- <div className="grid grid-cols-4 gap-2 text-xs">
710
- <div>
711
- <span className="text-muted-foreground">Duration:</span>{" "}
712
- {formatDuration(controlState.metrics.durationMs)}
713
- </div>
714
- <div>
715
- <span className="text-muted-foreground">Tokens:</span>{" "}
716
- {formatTokens(controlState.metrics.totalTokens)}
717
- </div>
718
- <div>
719
- <span className="text-muted-foreground">Cost:</span>{" "}
720
- {formatCost(controlState.metrics.estimatedCost)}
721
- </div>
722
- <div>
723
- <span className="text-muted-foreground">Tools:</span>{" "}
724
- {controlState.metrics.toolCallCount}
725
- </div>
726
- </div>
727
- <div className="mt-3">
728
- <div className="text-[11px] font-semibold mb-1">
729
- Tool calls
730
- </div>
731
- {renderToolCalls(controlState.metrics.toolCalls)}
732
- </div>
1024
+ <div className="border-t p-3 shrink-0 bg-muted/50 space-y-3">
1025
+ {/* Session Analysis */}
1026
+ <SessionAnalysisPanel
1027
+ analysis={controlAnalysis}
1028
+ isLoading={controlAnalysisLoading}
1029
+ isExpanded={analysisExpanded.control}
1030
+ onToggle={() =>
1031
+ setAnalysisExpanded((prev) => ({
1032
+ ...prev,
1033
+ control: !prev.control,
1034
+ }))
1035
+ }
1036
+ accentColor="blue"
1037
+ />
1038
+ {/* Tool Calls */}
1039
+ <ToolCallsPanel
1040
+ toolCalls={controlState.metrics.toolCalls}
1041
+ isExpanded={toolCallsExpanded.control}
1042
+ onToggle={() =>
1043
+ setToolCallsExpanded((prev) => ({
1044
+ ...prev,
1045
+ control: !prev.control,
1046
+ }))
1047
+ }
1048
+ accentColor="blue"
1049
+ />
733
1050
  </div>
734
1051
  )}
735
1052
  </Card>
@@ -770,33 +1087,34 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
770
1087
  </div>
771
1088
  )}
772
1089
  </CardContent>
773
- {/* Metrics */}
1090
+ {/* Session Analysis & Tool Calls */}
774
1091
  {variantState.metrics && (
775
- <div className="border-t p-3 shrink-0 bg-muted/50">
776
- <div className="grid grid-cols-4 gap-2 text-xs">
777
- <div>
778
- <span className="text-muted-foreground">Duration:</span>{" "}
779
- {formatDuration(variantState.metrics.durationMs)}
780
- </div>
781
- <div>
782
- <span className="text-muted-foreground">Tokens:</span>{" "}
783
- {formatTokens(variantState.metrics.totalTokens)}
784
- </div>
785
- <div>
786
- <span className="text-muted-foreground">Cost:</span>{" "}
787
- {formatCost(variantState.metrics.estimatedCost)}
788
- </div>
789
- <div>
790
- <span className="text-muted-foreground">Tools:</span>{" "}
791
- {variantState.metrics.toolCallCount}
792
- </div>
793
- </div>
794
- <div className="mt-3">
795
- <div className="text-[11px] font-semibold mb-1">
796
- Tool calls
797
- </div>
798
- {renderToolCalls(variantState.metrics.toolCalls)}
799
- </div>
1092
+ <div className="border-t p-3 shrink-0 bg-muted/50 space-y-3">
1093
+ {/* Session Analysis */}
1094
+ <SessionAnalysisPanel
1095
+ analysis={variantAnalysis}
1096
+ isLoading={variantAnalysisLoading}
1097
+ isExpanded={analysisExpanded.variant}
1098
+ onToggle={() =>
1099
+ setAnalysisExpanded((prev) => ({
1100
+ ...prev,
1101
+ variant: !prev.variant,
1102
+ }))
1103
+ }
1104
+ accentColor="orange"
1105
+ />
1106
+ {/* Tool Calls */}
1107
+ <ToolCallsPanel
1108
+ toolCalls={variantState.metrics.toolCalls}
1109
+ isExpanded={toolCallsExpanded.variant}
1110
+ onToggle={() =>
1111
+ setToolCallsExpanded((prev) => ({
1112
+ ...prev,
1113
+ variant: !prev.variant,
1114
+ }))
1115
+ }
1116
+ accentColor="orange"
1117
+ />
800
1118
  </div>
801
1119
  )}
802
1120
  </Card>
@@ -21,6 +21,7 @@ import {
21
21
  interface SimilarSession {
22
22
  session_id: string;
23
23
  distance: number;
24
+ analysis?: SessionAnalysis;
24
25
  }
25
26
 
26
27
  export function FindSessions() {
@@ -65,7 +66,26 @@ export function FindSessions() {
65
66
  }
66
67
 
67
68
  const data = await response.json();
68
- setSimilarSessions(data.similar);
69
+
70
+ // Fetch full analysis data for each similar session
71
+ const similarWithAnalysis = await Promise.all(
72
+ data.similar.map(async (similar: SimilarSession) => {
73
+ try {
74
+ const analysisRes = await fetch(
75
+ `/api/session-analyses?sessionId=${similar.session_id}`,
76
+ );
77
+ if (analysisRes.ok) {
78
+ const analysis = await analysisRes.json();
79
+ return { ...similar, analysis };
80
+ }
81
+ } catch (err) {
82
+ console.error(`Failed to fetch analysis for ${similar.session_id}:`, err);
83
+ }
84
+ return similar;
85
+ }),
86
+ );
87
+
88
+ setSimilarSessions(similarWithAnalysis);
69
89
  } catch (err) {
70
90
  setError(err instanceof Error ? err.message : "Unknown error");
71
91
  } finally {
@@ -179,10 +199,7 @@ export function FindSessions() {
179
199
  <CardContent>
180
200
  <div className="space-y-3">
181
201
  {similarSessions.map((similar) => {
182
- const session = sessions.find(
183
- (s) => s.session_id === similar.session_id,
184
- );
185
- if (!session) return null;
202
+ if (!similar.analysis) return null;
186
203
 
187
204
  return (
188
205
  <a
@@ -197,14 +214,14 @@ export function FindSessions() {
197
214
  {similar.session_id}
198
215
  </span>
199
216
  <span className="text-xs px-2 py-0.5 bg-primary/10 text-primary rounded">
200
- {session.task.intent_type}
217
+ {similar.analysis.task.intent_type}
201
218
  </span>
202
219
  </div>
203
220
  <p className="text-sm line-clamp-2">
204
- {session.task.user_query}
221
+ {similar.analysis.task.user_query}
205
222
  </p>
206
223
  <p className="text-xs text-muted-foreground line-clamp-1">
207
- {session.task.task_summary}
224
+ {similar.analysis.task.task_summary}
208
225
  </p>
209
226
  </div>
210
227
  <div className="flex flex-col items-end gap-1">
package/src/server.ts CHANGED
@@ -285,6 +285,20 @@ export function startDebuggerServer(
285
285
  },
286
286
  },
287
287
 
288
+ "/api/comparison-config/:configId": {
289
+ GET(req) {
290
+ const configId = req.params.configId;
291
+ const config = comparisonDb.getConfig(configId);
292
+ if (!config) {
293
+ return Response.json(
294
+ { error: "Comparison config not found" },
295
+ { status: 404 },
296
+ );
297
+ }
298
+ return Response.json(config);
299
+ },
300
+ },
301
+
288
302
  "/api/comparison-session-ids": {
289
303
  GET() {
290
304
  const sessionIds = comparisonDb.getComparisonSessionIds();
@@ -530,8 +544,43 @@ export function startDebuggerServer(
530
544
 
531
545
  const sessionData = await sessionResponse.json();
532
546
 
547
+ // Fetch agent config to get model for cost calculation
548
+ const agentConfig = await fetchAgentConfig();
549
+ const model = agentConfig?.model || "unknown";
550
+
551
+ // Fetch metrics from OTLP spans
552
+ const allSpans = db.getSpansBySessionAttribute(sessionId);
553
+ const traces = db.listTraces(100, 0, sessionId);
554
+ const sessionMetrics = extractSessionMetrics(
555
+ traces,
556
+ allSpans,
557
+ model,
558
+ );
559
+
560
+ // Convert to AnalysisMetrics format
561
+ const metrics = {
562
+ inputTokens: sessionMetrics.inputTokens,
563
+ outputTokens: sessionMetrics.outputTokens,
564
+ totalTokens: sessionMetrics.totalTokens,
565
+ estimatedCost: sessionMetrics.estimatedCost,
566
+ durationMs: sessionMetrics.durationMs,
567
+ };
568
+
569
+ // Convert tool calls to DetailedToolCall format
570
+ const toolCalls = (sessionMetrics.toolCalls || []).map((tc) => ({
571
+ name: tc.name,
572
+ input: tc.input,
573
+ output: tc.output,
574
+ startTimeUnixNano: tc.startTimeUnixNano,
575
+ endTimeUnixNano: tc.endTimeUnixNano,
576
+ }));
577
+
533
578
  // Analyze with LLM
534
- const analysis = await analyzeSession(sessionData);
579
+ const analysis = await analyzeSession({
580
+ session: sessionData,
581
+ metrics,
582
+ toolCalls,
583
+ });
535
584
 
536
585
  // Persist to database
537
586
  analysisDb.saveAnalysis(analysis);
@@ -581,6 +630,10 @@ export function startDebuggerServer(
581
630
  // Import analyzer dynamically
582
631
  const { analyzeSession } = await import("./analysis/analyzer.js");
583
632
 
633
+ // Fetch agent config once for all sessions
634
+ const agentConfig = await fetchAgentConfig();
635
+ const model = agentConfig?.model || "unknown";
636
+
584
637
  // Process in batches of 25
585
638
  const BATCH_SIZE = 25;
586
639
  const results: Array<{
@@ -616,8 +669,41 @@ export function startDebuggerServer(
616
669
 
617
670
  const sessionData = await sessionResponse.json();
618
671
 
672
+ // Fetch metrics from OTLP spans
673
+ const allSpans = db.getSpansBySessionAttribute(sessionId);
674
+ const traces = db.listTraces(100, 0, sessionId);
675
+ const sessionMetrics = extractSessionMetrics(
676
+ traces,
677
+ allSpans,
678
+ model,
679
+ );
680
+
681
+ // Convert to AnalysisMetrics format
682
+ const metrics = {
683
+ inputTokens: sessionMetrics.inputTokens,
684
+ outputTokens: sessionMetrics.outputTokens,
685
+ totalTokens: sessionMetrics.totalTokens,
686
+ estimatedCost: sessionMetrics.estimatedCost,
687
+ durationMs: sessionMetrics.durationMs,
688
+ };
689
+
690
+ // Convert tool calls to DetailedToolCall format
691
+ const toolCalls = (sessionMetrics.toolCalls || []).map(
692
+ (tc) => ({
693
+ name: tc.name,
694
+ input: tc.input,
695
+ output: tc.output,
696
+ startTimeUnixNano: tc.startTimeUnixNano,
697
+ endTimeUnixNano: tc.endTimeUnixNano,
698
+ }),
699
+ );
700
+
619
701
  // Analyze
620
- const analysis = await analyzeSession(sessionData);
702
+ const analysis = await analyzeSession({
703
+ session: sessionData,
704
+ metrics,
705
+ toolCalls,
706
+ });
621
707
 
622
708
  // Persist
623
709
  analysisDb.saveAnalysis(analysis);