@townco/debugger 0.1.38 → 0.1.48

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +3 -3
  2. package/src/server.ts +883 -864
package/src/server.ts CHANGED
@@ -1,5 +1,6 @@
1
1
  import { resetDb } from "@townco/otlp-server/db";
2
2
  import { createOtlpServer } from "@townco/otlp-server/http";
3
+ import type { Serve } from "bun";
3
4
  import { serve } from "bun";
4
5
  import { AnalysisDb } from "./analysis-db";
5
6
  import { ComparisonDb } from "./comparison-db";
@@ -17,287 +18,323 @@ import type {
17
18
  export const DEFAULT_DEBUGGER_PORT = 4000;
18
19
  export const DEFAULT_OTLP_PORT = 4318;
19
20
 
20
- export interface DebuggerServerOptions {
21
- port?: number;
22
- otlpPort?: number;
21
+ /**
22
+ * Creates the debugger API routes. Can be used standalone without starting a
23
+ * server.
24
+ */
25
+ export function createDebuggerRoutes(options: {
23
26
  dbPath: string;
24
- agentName?: string;
25
- agentServerUrl?: string;
26
- }
27
-
28
- export interface DebuggerServerResult {
29
- server: ReturnType<typeof serve>;
30
- otlpServer: ReturnType<typeof serve>;
31
- stop: () => void;
32
- }
33
-
34
- export function startDebuggerServer(
35
- options: DebuggerServerOptions,
36
- ): DebuggerServerResult {
37
- const {
38
- port = DEFAULT_DEBUGGER_PORT,
39
- otlpPort = DEFAULT_OTLP_PORT,
40
- dbPath,
41
- agentName = "Agent",
42
- agentServerUrl = "http://localhost:3100",
43
- } = options;
44
-
45
- // Start OTLP server (initializes database internally)
46
- const otlpApp = createOtlpServer({ dbPath });
47
- const otlpServer = serve({
48
- fetch: otlpApp.fetch,
49
- hostname: Bun.env.BIND_HOST || "localhost",
50
- port: otlpPort,
51
- });
27
+ agentName: string;
28
+ agentServerUrl: string;
29
+ }) {
30
+ const { dbPath, agentName, agentServerUrl } = options;
52
31
 
53
- // Create debugger database connection for reading
32
+ // Initialize databases
54
33
  const db = new DebuggerDb(dbPath);
55
-
56
- // Create comparison database for Town Hall feature
57
34
  const comparisonDbPath = dbPath.replace(/\.db$/, "-comparison.db");
58
35
  const comparisonDb = new ComparisonDb(comparisonDbPath);
59
-
60
- // Create analysis database - uses main debugger database
61
36
  const analysisDb = new AnalysisDb(dbPath);
62
37
 
63
- // Helper to fetch agent config from agent server
64
- async function fetchAgentConfig(): Promise<AgentConfig | null> {
65
- try {
66
- // Call agent's initialize RPC to get config
67
- const response = await fetch(`${agentServerUrl}/rpc`, {
68
- method: "POST",
69
- headers: { "Content-Type": "application/json" },
70
- body: JSON.stringify({
71
- jsonrpc: "2.0",
72
- id: "debugger-config",
73
- method: "initialize",
74
- params: {
75
- protocolVersion: 1, // ACP protocol version as number
76
- clientCapabilities: {},
77
- },
78
- }),
79
- });
80
-
81
- if (!response.ok) {
82
- console.error("Failed to fetch agent config:", response.statusText);
83
- return null;
84
- }
85
-
86
- const data = await response.json();
87
-
88
- // Check for JSON-RPC error
89
- if (data.error) {
90
- console.error("Agent RPC error:", data.error);
91
- return null;
92
- }
93
-
94
- const result = data.result;
95
- if (!result) {
96
- console.error("No result in agent response");
97
- return null;
98
- }
99
-
100
- // Extract config from initialize response
101
- return {
102
- model: result._meta?.model || "unknown",
103
- systemPrompt: result._meta?.systemPrompt || null,
104
- tools: result._meta?.tools || [],
105
- };
106
- } catch (error) {
107
- console.error("Error fetching agent config:", error);
108
- return null;
109
- }
110
- }
38
+ const fetchAgentConfig = () => fetchAgentConfigFromServer(agentServerUrl);
111
39
 
112
- // Start debugger UI server
113
- const server = serve({
114
- port,
115
- idleTimeout: 120, // 2 minutes for long-running LLM analysis requests
116
- routes: {
117
- "/api/config": {
118
- GET() {
119
- return Response.json({ agentName });
120
- },
40
+ return defineRoutes({
41
+ "/api/config": {
42
+ GET() {
43
+ return Response.json({ agentName });
121
44
  },
45
+ },
122
46
 
123
- "/api/reset-database": {
124
- POST() {
125
- try {
126
- resetDb();
127
- return new Response("Database reset successfully", { status: 200 });
128
- } catch (error) {
129
- console.error("Error resetting database:", error);
130
- return new Response(
131
- `Failed to reset database: ${error instanceof Error ? error.message : String(error)}`,
132
- { status: 500 },
133
- );
134
- }
135
- },
47
+ "/api/reset-database": {
48
+ POST() {
49
+ try {
50
+ resetDb();
51
+ return new Response("Database reset successfully", { status: 200 });
52
+ } catch (error) {
53
+ console.error("Error resetting database:", error);
54
+ return new Response(
55
+ `Failed to reset database: ${error instanceof Error ? error.message : String(error)}`,
56
+ { status: 500 },
57
+ );
58
+ }
136
59
  },
60
+ },
137
61
 
138
- "/api/sessions": {
139
- GET(req) {
140
- const url = new URL(req.url);
141
- const limit = Number.parseInt(
142
- url.searchParams.get("limit") || "1000",
143
- 10,
144
- );
145
- const offset = Number.parseInt(
146
- url.searchParams.get("offset") || "0",
147
- 10,
148
- );
149
- const sessions = db.listSessions(limit, offset);
150
- return Response.json(sessions);
151
- },
62
+ "/api/sessions": {
63
+ GET(req) {
64
+ const url = new URL(req.url);
65
+ const limit = Number.parseInt(
66
+ url.searchParams.get("limit") || "1000",
67
+ 10,
68
+ );
69
+ const offset = Number.parseInt(
70
+ url.searchParams.get("offset") || "0",
71
+ 10,
72
+ );
73
+ const sessions = db.listSessions(limit, offset);
74
+ return Response.json(sessions);
152
75
  },
76
+ },
153
77
 
154
- "/api/traces": {
155
- GET(req) {
156
- const url = new URL(req.url);
157
- const limit = Number.parseInt(
158
- url.searchParams.get("limit") || "50",
159
- 10,
160
- );
161
- const offset = Number.parseInt(
162
- url.searchParams.get("offset") || "0",
163
- 10,
164
- );
165
- const sessionId = url.searchParams.get("sessionId") || undefined;
166
- const traces = db.listTraces(limit, offset, sessionId);
167
- return Response.json(traces);
168
- },
78
+ "/api/traces": {
79
+ GET(req) {
80
+ const url = new URL(req.url);
81
+ const limit = Number.parseInt(
82
+ url.searchParams.get("limit") || "50",
83
+ 10,
84
+ );
85
+ const offset = Number.parseInt(
86
+ url.searchParams.get("offset") || "0",
87
+ 10,
88
+ );
89
+ const sessionId = url.searchParams.get("sessionId") || undefined;
90
+ const traces = db.listTraces(limit, offset, sessionId);
91
+ return Response.json(traces);
169
92
  },
93
+ },
170
94
 
171
- "/api/traces/:traceId": {
172
- GET(req) {
173
- const traceId = req.params.traceId;
174
- const data = db.getTraceById(traceId);
175
- if (!data.trace) {
176
- return Response.json({ error: "Trace not found" }, { status: 404 });
177
- }
178
- // Extract messages on the server side
95
+ "/api/traces/:traceId": {
96
+ GET(req) {
97
+ const traceId = req.params.traceId;
98
+ const data = db.getTraceById(traceId);
99
+ if (!data.trace) {
100
+ return Response.json({ error: "Trace not found" }, { status: 404 });
101
+ }
102
+ // Extract messages on the server side
103
+ const messages = extractTurnMessages(data.spans, data.logs);
104
+ return Response.json({ ...data, messages });
105
+ },
106
+ },
107
+
108
+ "/api/session-conversation": {
109
+ GET(req) {
110
+ const url = new URL(req.url);
111
+ const sessionId = url.searchParams.get("sessionId");
112
+ if (!sessionId) {
113
+ return Response.json(
114
+ { error: "sessionId parameter is required" },
115
+ { status: 400 },
116
+ );
117
+ }
118
+
119
+ // Query traces by session attribute to avoid race conditions
120
+ const traceIds = db.getTraceIdsBySessionAttribute(sessionId);
121
+
122
+ // Extract messages for each trace
123
+ const conversation: ConversationTrace[] = traceIds.map((traceInfo) => {
124
+ const data = db.getTraceById(traceInfo.trace_id);
179
125
  const messages = extractTurnMessages(data.spans, data.logs);
180
- return Response.json({ ...data, messages });
181
- },
126
+ return {
127
+ trace_id: traceInfo.trace_id,
128
+ start_time_unix_nano: traceInfo.start_time_unix_nano,
129
+ userInput: messages.userInput,
130
+ llmOutput: messages.llmOutput,
131
+ agentMessages: messages.agentMessages,
132
+ };
133
+ });
134
+
135
+ return Response.json(conversation);
182
136
  },
137
+ },
183
138
 
184
- "/api/session-conversation": {
185
- GET(req) {
186
- const url = new URL(req.url);
187
- const sessionId = url.searchParams.get("sessionId");
188
- if (!sessionId) {
189
- return Response.json(
190
- { error: "sessionId parameter is required" },
191
- { status: 400 },
192
- );
193
- }
139
+ // Town Hall API endpoints
194
140
 
195
- // Query traces by session attribute to avoid race conditions
196
- const traceIds = db.getTraceIdsBySessionAttribute(sessionId);
197
-
198
- // Extract messages for each trace
199
- const conversation: ConversationTrace[] = traceIds.map(
200
- (traceInfo) => {
201
- const data = db.getTraceById(traceInfo.trace_id);
202
- const messages = extractTurnMessages(data.spans, data.logs);
203
- return {
204
- trace_id: traceInfo.trace_id,
205
- start_time_unix_nano: traceInfo.start_time_unix_nano,
206
- userInput: messages.userInput,
207
- llmOutput: messages.llmOutput,
208
- agentMessages: messages.agentMessages,
209
- };
210
- },
141
+ "/api/agent-config": {
142
+ async GET() {
143
+ const config = await fetchAgentConfig();
144
+ if (!config) {
145
+ return Response.json(
146
+ { error: "Failed to fetch agent config" },
147
+ { status: 503 },
211
148
  );
149
+ }
150
+ return Response.json(config);
151
+ },
152
+ },
212
153
 
213
- return Response.json(conversation);
214
- },
154
+ "/api/available-models": {
155
+ GET() {
156
+ // List of supported models for comparison
157
+ const models = [
158
+ // Anthropic models
159
+ "claude-sonnet-4-5-20250929",
160
+ "claude-3-5-haiku-20241022",
161
+ "claude-opus-4-5-20251101",
162
+ // Google Gemini models
163
+ "gemini-2.0-flash",
164
+ "gemini-1.5-pro",
165
+ "gemini-1.5-flash",
166
+ ];
167
+ return Response.json({ models });
215
168
  },
169
+ },
216
170
 
217
- // Town Hall API endpoints
171
+ "/api/session-first-message/:sessionId": {
172
+ GET(req) {
173
+ const sessionId = req.params.sessionId;
218
174
 
219
- "/api/agent-config": {
220
- async GET() {
221
- const config = await fetchAgentConfig();
222
- if (!config) {
223
- return Response.json(
224
- { error: "Failed to fetch agent config" },
225
- { status: 503 },
226
- );
227
- }
228
- return Response.json(config);
229
- },
175
+ // Query logs directly by session attribute to avoid race conditions
176
+ // with trace.session_id association during concurrent sessions
177
+ const message = db.getFirstUserMessageBySession(sessionId);
178
+
179
+ if (!message) {
180
+ return Response.json(
181
+ { error: "Session not found or has no user message" },
182
+ { status: 404 },
183
+ );
184
+ }
185
+
186
+ return Response.json({ message });
230
187
  },
188
+ },
231
189
 
232
- "/api/available-models": {
233
- GET() {
234
- // List of supported models for comparison
235
- const models = [
236
- // Anthropic models
237
- "claude-sonnet-4-5-20250929",
238
- "claude-3-5-haiku-20241022",
239
- "claude-opus-4-5-20251101",
240
- // Google Gemini models
241
- "gemini-2.0-flash",
242
- "gemini-1.5-pro",
243
- "gemini-1.5-flash",
244
- ];
245
- return Response.json({ models });
246
- },
190
+ "/api/comparison-config": {
191
+ GET() {
192
+ const config = comparisonDb.getLatestConfig();
193
+ return Response.json(config);
247
194
  },
195
+ async POST(req) {
196
+ try {
197
+ const body = await req.json();
198
+ const config: ComparisonConfig = {
199
+ id: body.id || crypto.randomUUID(),
200
+ dimensions: body.dimensions || [],
201
+ controlModel: body.controlModel,
202
+ variantModel: body.variantModel,
203
+ variantSystemPrompt: body.variantSystemPrompt,
204
+ variantTools: body.variantTools,
205
+ createdAt: body.createdAt || new Date().toISOString(),
206
+ updatedAt: new Date().toISOString(),
207
+ };
208
+ comparisonDb.saveConfig(config);
209
+ return Response.json({ id: config.id });
210
+ } catch (error) {
211
+ console.error("Error saving comparison config:", error);
212
+ return Response.json(
213
+ { error: "Invalid request body" },
214
+ { status: 400 },
215
+ );
216
+ }
217
+ },
218
+ },
248
219
 
249
- "/api/session-first-message/:sessionId": {
250
- GET(req) {
251
- const sessionId = req.params.sessionId;
220
+ "/api/comparison-config/:configId": {
221
+ GET(req) {
222
+ const configId = req.params.configId;
223
+ const config = comparisonDb.getConfig(configId);
224
+ if (!config) {
225
+ return Response.json(
226
+ { error: "Comparison config not found" },
227
+ { status: 404 },
228
+ );
229
+ }
230
+ return Response.json(config);
231
+ },
232
+ },
252
233
 
253
- // Query logs directly by session attribute to avoid race conditions
254
- // with trace.session_id association during concurrent sessions
255
- const message = db.getFirstUserMessageBySession(sessionId);
234
+ "/api/comparison-session-ids": {
235
+ GET() {
236
+ const sessionIds = comparisonDb.getComparisonSessionIds();
237
+ return Response.json({ sessionIds });
238
+ },
239
+ },
256
240
 
257
- if (!message) {
258
- return Response.json(
259
- { error: "Session not found or has no user message" },
260
- { status: 404 },
261
- );
262
- }
241
+ "/api/comparison-runs": {
242
+ GET(req) {
243
+ const url = new URL(req.url);
244
+ const limit = Number.parseInt(
245
+ url.searchParams.get("limit") || "50",
246
+ 10,
247
+ );
248
+ const offset = Number.parseInt(
249
+ url.searchParams.get("offset") || "0",
250
+ 10,
251
+ );
252
+ const sourceSessionId = url.searchParams.get("sourceSessionId");
253
+
254
+ if (sourceSessionId) {
255
+ const runs = comparisonDb.listRunsBySourceSession(sourceSessionId);
256
+ return Response.json(runs);
257
+ }
263
258
 
264
- return Response.json({ message });
265
- },
259
+ const runs = comparisonDb.listRuns(limit, offset);
260
+ return Response.json(runs);
266
261
  },
262
+ },
267
263
 
268
- "/api/comparison-config": {
269
- GET() {
270
- const config = comparisonDb.getLatestConfig();
271
- return Response.json(config);
272
- },
273
- async POST(req) {
274
- try {
275
- const body = await req.json();
276
- const config: ComparisonConfig = {
277
- id: body.id || crypto.randomUUID(),
278
- dimensions: body.dimensions || [],
279
- controlModel: body.controlModel,
280
- variantModel: body.variantModel,
281
- variantSystemPrompt: body.variantSystemPrompt,
282
- variantTools: body.variantTools,
283
- createdAt: body.createdAt || new Date().toISOString(),
284
- updatedAt: new Date().toISOString(),
285
- };
286
- comparisonDb.saveConfig(config);
287
- return Response.json({ id: config.id });
288
- } catch (error) {
289
- console.error("Error saving comparison config:", error);
264
+ "/api/comparison-run/:runId": {
265
+ GET(req) {
266
+ const runId = req.params.runId;
267
+ const run = comparisonDb.getRun(runId);
268
+ if (!run) {
269
+ return Response.json(
270
+ { error: "Comparison run not found" },
271
+ { status: 404 },
272
+ );
273
+ }
274
+
275
+ const config = comparisonDb.getConfig(run.configId);
276
+ const controlModel =
277
+ config?.controlModel ??
278
+ config?.variantModel ??
279
+ "claude-sonnet-4-5-20250929";
280
+ const variantModel =
281
+ config?.variantModel ??
282
+ config?.controlModel ??
283
+ "claude-sonnet-4-5-20250929";
284
+
285
+ const maybeRefreshMetrics = (
286
+ sessionId: string | null,
287
+ cached: SessionMetrics | null,
288
+ model: string,
289
+ ): SessionMetrics | null => {
290
+ if (!sessionId) return cached;
291
+ const needsRefresh =
292
+ !cached ||
293
+ cached.totalTokens === 0 ||
294
+ cached.toolCallCount === 0 ||
295
+ !cached.toolCalls ||
296
+ cached.toolCalls.length === 0;
297
+ if (!needsRefresh) return cached;
298
+
299
+ const spans = db.getSpansBySessionAttribute(sessionId);
300
+ if (spans.length === 0) return cached;
301
+ const traces = db.listTraces(100, 0, sessionId);
302
+ return extractSessionMetrics(traces, spans, model);
303
+ };
304
+
305
+ const controlMetrics = maybeRefreshMetrics(
306
+ run.controlSessionId,
307
+ run.controlMetrics,
308
+ controlModel,
309
+ );
310
+ const variantMetrics = maybeRefreshMetrics(
311
+ run.variantSessionId,
312
+ run.variantMetrics,
313
+ variantModel,
314
+ );
315
+
316
+ return Response.json({
317
+ ...run,
318
+ controlMetrics,
319
+ variantMetrics,
320
+ });
321
+ },
322
+ },
323
+
324
+ "/api/run-comparison": {
325
+ async POST(req) {
326
+ try {
327
+ const body = await req.json();
328
+ const { sessionId, configId } = body;
329
+
330
+ if (!sessionId || !configId) {
290
331
  return Response.json(
291
- { error: "Invalid request body" },
332
+ { error: "sessionId and configId are required" },
292
333
  { status: 400 },
293
334
  );
294
335
  }
295
- },
296
- },
297
336
 
298
- "/api/comparison-config/:configId": {
299
- GET(req) {
300
- const configId = req.params.configId;
337
+ // Get the comparison config
301
338
  const config = comparisonDb.getConfig(configId);
302
339
  if (!config) {
303
340
  return Response.json(
@@ -305,733 +342,664 @@ export function startDebuggerServer(
305
342
  { status: 404 },
306
343
  );
307
344
  }
308
- return Response.json(config);
309
- },
310
- },
311
345
 
312
- "/api/comparison-session-ids": {
313
- GET() {
314
- const sessionIds = comparisonDb.getComparisonSessionIds();
315
- return Response.json({ sessionIds });
316
- },
317
- },
318
-
319
- "/api/comparison-runs": {
320
- GET(req) {
321
- const url = new URL(req.url);
322
- const limit = Number.parseInt(
323
- url.searchParams.get("limit") || "50",
324
- 10,
325
- );
326
- const offset = Number.parseInt(
327
- url.searchParams.get("offset") || "0",
328
- 10,
329
- );
330
- const sourceSessionId = url.searchParams.get("sourceSessionId");
331
-
332
- if (sourceSessionId) {
333
- const runs = comparisonDb.listRunsBySourceSession(sourceSessionId);
334
- return Response.json(runs);
346
+ // Get the first user message from the source session
347
+ const traces = db.listTraces(1, 0, sessionId);
348
+ if (traces.length === 0) {
349
+ return Response.json(
350
+ { error: "Source session not found" },
351
+ { status: 404 },
352
+ );
335
353
  }
336
354
 
337
- const runs = comparisonDb.listRuns(limit, offset);
338
- return Response.json(runs);
339
- },
340
- },
341
-
342
- "/api/comparison-run/:runId": {
343
- GET(req) {
344
- const runId = req.params.runId;
345
- const run = comparisonDb.getRun(runId);
346
- if (!run) {
355
+ const trace = traces[0];
356
+ if (!trace) {
347
357
  return Response.json(
348
- { error: "Comparison run not found" },
358
+ { error: "Source session not found" },
349
359
  { status: 404 },
350
360
  );
351
361
  }
352
362
 
353
- const config = comparisonDb.getConfig(run.configId);
354
- const controlModel =
355
- config?.controlModel ??
356
- config?.variantModel ??
357
- "claude-sonnet-4-5-20250929";
358
- const variantModel =
359
- config?.variantModel ??
360
- config?.controlModel ??
361
- "claude-sonnet-4-5-20250929";
362
-
363
- const maybeRefreshMetrics = (
364
- sessionId: string | null,
365
- cached: SessionMetrics | null,
366
- model: string,
367
- ): SessionMetrics | null => {
368
- if (!sessionId) return cached;
369
- const needsRefresh =
370
- !cached ||
371
- cached.totalTokens === 0 ||
372
- cached.toolCallCount === 0 ||
373
- !cached.toolCalls ||
374
- cached.toolCalls.length === 0;
375
- if (!needsRefresh) return cached;
363
+ const data = db.getTraceById(trace.trace_id);
364
+ const messages = extractTurnMessages(data.spans, data.logs);
376
365
 
377
- const spans = db.getSpansBySessionAttribute(sessionId);
378
- if (spans.length === 0) return cached;
379
- const traces = db.listTraces(100, 0, sessionId);
380
- return extractSessionMetrics(traces, spans, model);
381
- };
366
+ if (!messages.userInput) {
367
+ return Response.json(
368
+ { error: "No user message found in source session" },
369
+ { status: 400 },
370
+ );
371
+ }
382
372
 
383
- const controlMetrics = maybeRefreshMetrics(
384
- run.controlSessionId,
385
- run.controlMetrics,
386
- controlModel,
387
- );
388
- const variantMetrics = maybeRefreshMetrics(
389
- run.variantSessionId,
390
- run.variantMetrics,
391
- variantModel,
373
+ // Create the comparison run
374
+ const run = comparisonDb.createRun(
375
+ configId,
376
+ sessionId,
377
+ messages.userInput,
392
378
  );
393
379
 
380
+ // Return the run info - actual execution will be handled by the frontend
381
+ // which will create two ACP sessions and run them in parallel
394
382
  return Response.json({
395
- ...run,
383
+ runId: run.id,
384
+ firstUserMessage: run.firstUserMessage,
385
+ config,
386
+ });
387
+ } catch (error) {
388
+ console.error("Error starting comparison:", error);
389
+ return Response.json(
390
+ { error: "Failed to start comparison" },
391
+ { status: 500 },
392
+ );
393
+ }
394
+ },
395
+ },
396
+
397
+ "/api/comparison-run/:runId/update": {
398
+ async POST(req) {
399
+ try {
400
+ const runId = req.params.runId;
401
+ const body = await req.json();
402
+ const {
403
+ status,
404
+ controlSessionId,
405
+ variantSessionId,
396
406
  controlMetrics,
397
407
  variantMetrics,
408
+ controlResponse,
409
+ variantResponse,
410
+ } = body;
411
+
412
+ comparisonDb.updateRunStatus(runId, status, {
413
+ controlSessionId,
414
+ variantSessionId,
415
+ controlMetrics,
416
+ variantMetrics,
417
+ controlResponse,
418
+ variantResponse,
398
419
  });
399
- },
420
+
421
+ return Response.json({ success: true });
422
+ } catch (_error) {
423
+ return Response.json(
424
+ { error: "Failed to update comparison run" },
425
+ { status: 500 },
426
+ );
427
+ }
400
428
  },
429
+ },
401
430
 
402
- "/api/run-comparison": {
403
- async POST(req) {
404
- try {
405
- const body = await req.json();
406
- const { sessionId, configId } = body;
431
+ "/api/session-metrics/:sessionId": {
432
+ async GET(req) {
433
+ const sessionId = req.params.sessionId;
434
+ const url = new URL(req.url);
435
+ const model = url.searchParams.get("model") || "unknown";
436
+
437
+ // Query spans by their agent.session_id attribute directly
438
+ // This is more reliable than trace-based lookup because concurrent
439
+ // sessions can cause race conditions in trace association
440
+ const allSpans = db.getSpansBySessionAttribute(sessionId);
441
+
442
+ if (allSpans.length === 0) {
443
+ return Response.json(
444
+ { error: "Session not found or has no traces" },
445
+ { status: 404 },
446
+ );
447
+ }
407
448
 
408
- if (!sessionId || !configId) {
409
- return Response.json(
410
- { error: "sessionId and configId are required" },
411
- { status: 400 },
412
- );
413
- }
449
+ // Get traces for duration calculation (use empty array if not found)
450
+ const traces = db.listTraces(100, 0, sessionId);
414
451
 
415
- // Get the comparison config
416
- const config = comparisonDb.getConfig(configId);
417
- if (!config) {
418
- return Response.json(
419
- { error: "Comparison config not found" },
420
- { status: 404 },
421
- );
422
- }
452
+ // Extract metrics
453
+ const metrics = extractSessionMetrics(traces, allSpans, model);
454
+ return Response.json(metrics);
455
+ },
456
+ },
423
457
 
424
- // Get the first user message from the source session
425
- const traces = db.listTraces(1, 0, sessionId);
426
- if (traces.length === 0) {
427
- return Response.json(
428
- { error: "Source session not found" },
429
- { status: 404 },
430
- );
431
- }
458
+ "/api/analyze-session/:sessionId": {
459
+ async POST(req) {
460
+ const sessionId = req.params.sessionId;
432
461
 
433
- const trace = traces[0];
434
- if (!trace) {
435
- return Response.json(
436
- { error: "Source session not found" },
437
- { status: 404 },
438
- );
439
- }
462
+ try {
463
+ // Import analyzer dynamically to avoid loading at startup
464
+ const { analyzeSession } = await import("./analysis/analyzer.js");
440
465
 
441
- const data = db.getTraceById(trace.trace_id);
442
- const messages = extractTurnMessages(data.spans, data.logs);
466
+ // Fetch session from agent server via ACP HTTP API
467
+ const sessionResponse = await fetch(
468
+ `${agentServerUrl}/sessions/${sessionId}`,
469
+ );
443
470
 
444
- if (!messages.userInput) {
471
+ if (!sessionResponse.ok) {
472
+ if (sessionResponse.status === 404) {
445
473
  return Response.json(
446
- { error: "No user message found in source session" },
447
- { status: 400 },
474
+ { error: "Session not found" },
475
+ { status: 404 },
448
476
  );
449
477
  }
450
-
451
- // Create the comparison run
452
- const run = comparisonDb.createRun(
453
- configId,
454
- sessionId,
455
- messages.userInput,
456
- );
457
-
458
- // Return the run info - actual execution will be handled by the frontend
459
- // which will create two ACP sessions and run them in parallel
460
- return Response.json({
461
- runId: run.id,
462
- firstUserMessage: run.firstUserMessage,
463
- config,
464
- });
465
- } catch (error) {
466
- console.error("Error starting comparison:", error);
467
- return Response.json(
468
- { error: "Failed to start comparison" },
469
- { status: 500 },
478
+ throw new Error(
479
+ `Failed to fetch session: ${sessionResponse.statusText}`,
470
480
  );
471
481
  }
472
- },
473
- },
474
482
 
475
- "/api/comparison-run/:runId/update": {
476
- async POST(req) {
477
- try {
478
- const runId = req.params.runId;
479
- const body = await req.json();
480
- const {
481
- status,
482
- controlSessionId,
483
- variantSessionId,
484
- controlMetrics,
485
- variantMetrics,
486
- controlResponse,
487
- variantResponse,
488
- } = body;
489
-
490
- comparisonDb.updateRunStatus(runId, status, {
491
- controlSessionId,
492
- variantSessionId,
493
- controlMetrics,
494
- variantMetrics,
495
- controlResponse,
496
- variantResponse,
497
- });
498
-
499
- return Response.json({ success: true });
500
- } catch (_error) {
501
- return Response.json(
502
- { error: "Failed to update comparison run" },
503
- { status: 500 },
504
- );
505
- }
506
- },
507
- },
483
+ const sessionData = await sessionResponse.json();
508
484
 
509
- "/api/session-metrics/:sessionId": {
510
- async GET(req) {
511
- const sessionId = req.params.sessionId;
512
- const url = new URL(req.url);
513
- const model = url.searchParams.get("model") || "unknown";
485
+ // Fetch agent config to get model for cost calculation
486
+ const agentConfig = await fetchAgentConfig();
487
+ const model = agentConfig?.model || "unknown";
514
488
 
515
- // Query spans by their agent.session_id attribute directly
516
- // This is more reliable than trace-based lookup because concurrent
517
- // sessions can cause race conditions in trace association
489
+ // Fetch metrics from OTLP spans
518
490
  const allSpans = db.getSpansBySessionAttribute(sessionId);
519
-
520
- if (allSpans.length === 0) {
521
- return Response.json(
522
- { error: "Session not found or has no traces" },
523
- { status: 404 },
524
- );
525
- }
526
-
527
- // Get traces for duration calculation (use empty array if not found)
528
491
  const traces = db.listTraces(100, 0, sessionId);
492
+ const sessionMetrics = extractSessionMetrics(traces, allSpans, model);
493
+
494
+ // Convert to AnalysisMetrics format
495
+ const metrics = {
496
+ inputTokens: sessionMetrics.inputTokens,
497
+ outputTokens: sessionMetrics.outputTokens,
498
+ totalTokens: sessionMetrics.totalTokens,
499
+ estimatedCost: sessionMetrics.estimatedCost,
500
+ durationMs: sessionMetrics.durationMs,
501
+ };
529
502
 
530
- // Extract metrics
531
- const metrics = extractSessionMetrics(traces, allSpans, model);
532
- return Response.json(metrics);
533
- },
534
- },
503
+ // Convert tool calls to DetailedToolCall format
504
+ const toolCalls = (sessionMetrics.toolCalls || []).map((tc) => ({
505
+ name: tc.name,
506
+ input: tc.input,
507
+ output: tc.output,
508
+ startTimeUnixNano: tc.startTimeUnixNano,
509
+ endTimeUnixNano: tc.endTimeUnixNano,
510
+ }));
511
+
512
+ // Analyze with LLM
513
+ const analysis = await analyzeSession({
514
+ session: sessionData,
515
+ metrics,
516
+ toolCalls,
517
+ });
535
518
 
536
- "/api/analyze-session/:sessionId": {
537
- async POST(req) {
538
- const sessionId = req.params.sessionId;
519
+ // Persist to database
520
+ analysisDb.saveAnalysis(analysis);
539
521
 
522
+ // Generate and save embedding
540
523
  try {
541
- // Import analyzer dynamically to avoid loading at startup
542
- const { analyzeSession } = await import("./analysis/analyzer.js");
543
-
544
- // Fetch session from agent server via ACP HTTP API
545
- const sessionResponse = await fetch(
546
- `${agentServerUrl}/sessions/${sessionId}`,
524
+ const { embedAnalysis } = await import("./analysis/embeddings.js");
525
+ const embedding = await embedAnalysis(analysis);
526
+ await analysisDb.saveEmbedding(analysis.session_id, embedding);
527
+ } catch (error) {
528
+ console.error(
529
+ `Failed to generate embedding for ${sessionId}:`,
530
+ error,
547
531
  );
532
+ // Continue - don't fail entire analysis
533
+ }
548
534
 
549
- if (!sessionResponse.ok) {
550
- if (sessionResponse.status === 404) {
551
- return Response.json(
552
- { error: "Session not found" },
553
- { status: 404 },
554
- );
555
- }
556
- throw new Error(
557
- `Failed to fetch session: ${sessionResponse.statusText}`,
558
- );
559
- }
560
-
561
- const sessionData = await sessionResponse.json();
562
-
563
- // Fetch agent config to get model for cost calculation
564
- const agentConfig = await fetchAgentConfig();
565
- const model = agentConfig?.model || "unknown";
566
-
567
- // Fetch metrics from OTLP spans
568
- const allSpans = db.getSpansBySessionAttribute(sessionId);
569
- const traces = db.listTraces(100, 0, sessionId);
570
- const sessionMetrics = extractSessionMetrics(
571
- traces,
572
- allSpans,
573
- model,
574
- );
535
+ return Response.json(analysis);
536
+ } catch (error) {
537
+ console.error("Session analysis error:", error);
538
+ return Response.json(
539
+ {
540
+ error: error instanceof Error ? error.message : "Analysis failed",
541
+ },
542
+ { status: 500 },
543
+ );
544
+ }
545
+ },
546
+ },
575
547
 
576
- // Convert to AnalysisMetrics format
577
- const metrics = {
578
- inputTokens: sessionMetrics.inputTokens,
579
- outputTokens: sessionMetrics.outputTokens,
580
- totalTokens: sessionMetrics.totalTokens,
581
- estimatedCost: sessionMetrics.estimatedCost,
582
- durationMs: sessionMetrics.durationMs,
583
- };
584
-
585
- // Convert tool calls to DetailedToolCall format
586
- const toolCalls = (sessionMetrics.toolCalls || []).map((tc) => ({
587
- name: tc.name,
588
- input: tc.input,
589
- output: tc.output,
590
- startTimeUnixNano: tc.startTimeUnixNano,
591
- endTimeUnixNano: tc.endTimeUnixNano,
592
- }));
593
-
594
- // Analyze with LLM
595
- const analysis = await analyzeSession({
596
- session: sessionData,
597
- metrics,
598
- toolCalls,
599
- });
600
-
601
- // Persist to database
602
- analysisDb.saveAnalysis(analysis);
603
-
604
- // Generate and save embedding
605
- try {
606
- const { embedAnalysis } = await import(
607
- "./analysis/embeddings.js"
608
- );
609
- const embedding = await embedAnalysis(analysis);
610
- await analysisDb.saveEmbedding(analysis.session_id, embedding);
611
- } catch (error) {
612
- console.error(
613
- `Failed to generate embedding for ${sessionId}:`,
614
- error,
615
- );
616
- // Continue - don't fail entire analysis
617
- }
548
+ "/api/analyze-all-sessions": {
549
+ async POST(req) {
550
+ try {
551
+ const body = await req.json();
552
+ const { sessionIds } = body as { sessionIds: string[] };
618
553
 
619
- return Response.json(analysis);
620
- } catch (error) {
621
- console.error("Session analysis error:", error);
554
+ if (!Array.isArray(sessionIds)) {
622
555
  return Response.json(
623
- {
624
- error:
625
- error instanceof Error ? error.message : "Analysis failed",
626
- },
627
- { status: 500 },
556
+ { error: "sessionIds must be an array" },
557
+ { status: 400 },
628
558
  );
629
559
  }
630
- },
631
- },
632
560
 
633
- "/api/analyze-all-sessions": {
634
- async POST(req) {
635
- try {
636
- const body = await req.json();
637
- const { sessionIds } = body as { sessionIds: string[] };
561
+ // Import analyzer dynamically
562
+ const { analyzeSession } = await import("./analysis/analyzer.js");
638
563
 
639
- if (!Array.isArray(sessionIds)) {
640
- return Response.json(
641
- { error: "sessionIds must be an array" },
642
- { status: 400 },
643
- );
644
- }
564
+ // Fetch agent config once for all sessions
565
+ const agentConfig = await fetchAgentConfig();
566
+ const model = agentConfig?.model || "unknown";
645
567
 
646
- // Import analyzer dynamically
647
- const { analyzeSession } = await import("./analysis/analyzer.js");
568
+ // Process in batches of 25
569
+ const BATCH_SIZE = 25;
570
+ const results: Array<{
571
+ session_id: string;
572
+ success: boolean;
573
+ error?: string;
574
+ }> = [];
648
575
 
649
- // Fetch agent config once for all sessions
650
- const agentConfig = await fetchAgentConfig();
651
- const model = agentConfig?.model || "unknown";
576
+ const totalBatches = Math.ceil(sessionIds.length / BATCH_SIZE);
577
+ console.log(
578
+ `✨ Starting batch analysis of ${sessionIds.length} sessions (${totalBatches} batches)...`,
579
+ );
652
580
 
653
- // Process in batches of 25
654
- const BATCH_SIZE = 25;
655
- const results: Array<{
656
- session_id: string;
657
- success: boolean;
658
- error?: string;
659
- }> = [];
581
+ for (let i = 0; i < sessionIds.length; i += BATCH_SIZE) {
582
+ const batch = sessionIds.slice(i, i + BATCH_SIZE);
583
+ const batchNum = Math.floor(i / BATCH_SIZE) + 1;
660
584
 
661
- const totalBatches = Math.ceil(sessionIds.length / BATCH_SIZE);
662
585
  console.log(
663
- `✨ Starting batch analysis of ${sessionIds.length} sessions (${totalBatches} batches)...`,
586
+ `📊 Processing batch ${batchNum}/${totalBatches} (${batch.length} sessions)...`,
664
587
  );
665
588
 
666
- for (let i = 0; i < sessionIds.length; i += BATCH_SIZE) {
667
- const batch = sessionIds.slice(i, i + BATCH_SIZE);
668
- const batchNum = Math.floor(i / BATCH_SIZE) + 1;
589
+ // Run batch in parallel
590
+ const batchResults = await Promise.allSettled(
591
+ batch.map(async (sessionId) => {
592
+ // Fetch session data
593
+ const sessionResponse = await fetch(
594
+ `${agentServerUrl}/sessions/${sessionId}`,
595
+ );
669
596
 
670
- console.log(
671
- `📊 Processing batch ${batchNum}/${totalBatches} (${batch.length} sessions)...`,
672
- );
597
+ if (!sessionResponse.ok) {
598
+ throw new Error(`Failed to fetch session ${sessionId}`);
599
+ }
673
600
 
674
- // Run batch in parallel
675
- const batchResults = await Promise.allSettled(
676
- batch.map(async (sessionId) => {
677
- // Fetch session data
678
- const sessionResponse = await fetch(
679
- `${agentServerUrl}/sessions/${sessionId}`,
680
- );
601
+ const sessionData = await sessionResponse.json();
681
602
 
682
- if (!sessionResponse.ok) {
683
- throw new Error(`Failed to fetch session ${sessionId}`);
684
- }
603
+ // Fetch metrics from OTLP spans
604
+ const allSpans = db.getSpansBySessionAttribute(sessionId);
605
+ const traces = db.listTraces(100, 0, sessionId);
606
+ const sessionMetrics = extractSessionMetrics(
607
+ traces,
608
+ allSpans,
609
+ model,
610
+ );
685
611
 
686
- const sessionData = await sessionResponse.json();
612
+ // Convert to AnalysisMetrics format
613
+ const metrics = {
614
+ inputTokens: sessionMetrics.inputTokens,
615
+ outputTokens: sessionMetrics.outputTokens,
616
+ totalTokens: sessionMetrics.totalTokens,
617
+ estimatedCost: sessionMetrics.estimatedCost,
618
+ durationMs: sessionMetrics.durationMs,
619
+ };
620
+
621
+ // Convert tool calls to DetailedToolCall format
622
+ const toolCalls = (sessionMetrics.toolCalls || []).map(
623
+ (tc) => ({
624
+ name: tc.name,
625
+ input: tc.input,
626
+ output: tc.output,
627
+ startTimeUnixNano: tc.startTimeUnixNano,
628
+ endTimeUnixNano: tc.endTimeUnixNano,
629
+ }),
630
+ );
687
631
 
688
- // Fetch metrics from OTLP spans
689
- const allSpans = db.getSpansBySessionAttribute(sessionId);
690
- const traces = db.listTraces(100, 0, sessionId);
691
- const sessionMetrics = extractSessionMetrics(
692
- traces,
693
- allSpans,
694
- model,
695
- );
632
+ // Analyze
633
+ const analysis = await analyzeSession({
634
+ session: sessionData,
635
+ metrics,
636
+ toolCalls,
637
+ });
696
638
 
697
- // Convert to AnalysisMetrics format
698
- const metrics = {
699
- inputTokens: sessionMetrics.inputTokens,
700
- outputTokens: sessionMetrics.outputTokens,
701
- totalTokens: sessionMetrics.totalTokens,
702
- estimatedCost: sessionMetrics.estimatedCost,
703
- durationMs: sessionMetrics.durationMs,
704
- };
705
-
706
- // Convert tool calls to DetailedToolCall format
707
- const toolCalls = (sessionMetrics.toolCalls || []).map(
708
- (tc) => ({
709
- name: tc.name,
710
- input: tc.input,
711
- output: tc.output,
712
- startTimeUnixNano: tc.startTimeUnixNano,
713
- endTimeUnixNano: tc.endTimeUnixNano,
714
- }),
639
+ // Persist
640
+ analysisDb.saveAnalysis(analysis);
641
+
642
+ // Generate and save embedding
643
+ try {
644
+ const { embedAnalysis } = await import(
645
+ "./analysis/embeddings.js"
646
+ );
647
+ const embedding = await embedAnalysis(analysis);
648
+ await analysisDb.saveEmbedding(sessionId, embedding);
649
+ } catch (error) {
650
+ console.error(
651
+ `Failed to generate embedding for ${sessionId}:`,
652
+ error,
715
653
  );
654
+ // Continue - batch processing continues
655
+ }
716
656
 
717
- // Analyze
718
- const analysis = await analyzeSession({
719
- session: sessionData,
720
- metrics,
721
- toolCalls,
722
- });
723
-
724
- // Persist
725
- analysisDb.saveAnalysis(analysis);
726
-
727
- // Generate and save embedding
728
- try {
729
- const { embedAnalysis } = await import(
730
- "./analysis/embeddings.js"
731
- );
732
- const embedding = await embedAnalysis(analysis);
733
- await analysisDb.saveEmbedding(sessionId, embedding);
734
- } catch (error) {
735
- console.error(
736
- `Failed to generate embedding for ${sessionId}:`,
737
- error,
738
- );
739
- // Continue - batch processing continues
740
- }
741
-
742
- return { session_id: sessionId, success: true };
743
- }),
744
- );
657
+ return { session_id: sessionId, success: true };
658
+ }),
659
+ );
745
660
 
746
- // Collect results
747
- for (let j = 0; j < batchResults.length; j++) {
748
- const result = batchResults[j];
749
- const sessionId = batch[j];
750
- if (!sessionId) continue;
751
-
752
- if (result && result.status === "fulfilled") {
753
- results.push(result.value);
754
- } else if (result && result.status === "rejected") {
755
- results.push({
756
- session_id: sessionId,
757
- success: false,
758
- error:
759
- result.reason instanceof Error
760
- ? result.reason.message
761
- : String(result.reason || "Unknown error"),
762
- });
763
- }
661
+ // Collect results
662
+ for (let j = 0; j < batchResults.length; j++) {
663
+ const result = batchResults[j];
664
+ const sessionId = batch[j];
665
+ if (!sessionId) continue;
666
+
667
+ if (result && result.status === "fulfilled") {
668
+ results.push(result.value);
669
+ } else if (result && result.status === "rejected") {
670
+ results.push({
671
+ session_id: sessionId,
672
+ success: false,
673
+ error:
674
+ result.reason instanceof Error
675
+ ? result.reason.message
676
+ : String(result.reason || "Unknown error"),
677
+ });
764
678
  }
765
-
766
- const batchSuccesses = batchResults.filter(
767
- (r) => r.status === "fulfilled",
768
- ).length;
769
- const batchErrors = batchResults.filter(
770
- (r) => r.status === "rejected",
771
- ).length;
772
- console.log(
773
- `✅ Batch ${batchNum}/${totalBatches} complete: ${batchSuccesses} successful, ${batchErrors} failed`,
774
- );
775
679
  }
776
680
 
777
- const totalSuccesses = results.filter((r) => r.success).length;
778
- const totalErrors = results.filter((r) => !r.success).length;
681
+ const batchSuccesses = batchResults.filter(
682
+ (r) => r.status === "fulfilled",
683
+ ).length;
684
+ const batchErrors = batchResults.filter(
685
+ (r) => r.status === "rejected",
686
+ ).length;
779
687
  console.log(
780
- `🎉 Batch analysis complete: ${totalSuccesses} successful, ${totalErrors} failed`,
781
- );
782
-
783
- return Response.json({ results });
784
- } catch (error) {
785
- console.error("Batch analysis error:", error);
786
- return Response.json(
787
- {
788
- error:
789
- error instanceof Error ? error.message : "Analysis failed",
790
- },
791
- { status: 500 },
688
+ `✅ Batch ${batchNum}/${totalBatches} complete: ${batchSuccesses} successful, ${batchErrors} failed`,
792
689
  );
793
690
  }
794
- },
795
- },
796
691
 
797
- "/api/session-analyses": {
798
- async GET(req) {
799
- try {
800
- const url = new URL(req.url);
801
- const sessionId = url.searchParams.get("sessionId");
802
-
803
- if (sessionId) {
804
- // Get single analysis
805
- const analysis = analysisDb.getAnalysis(sessionId);
806
- if (!analysis) {
807
- return Response.json(
808
- { error: "Analysis not found" },
809
- { status: 404 },
810
- );
811
- }
812
- return Response.json(analysis);
813
- }
814
-
815
- // List all analyses
816
- const limit = Number.parseInt(
817
- url.searchParams.get("limit") || "50",
818
- 10,
819
- );
820
- const offset = Number.parseInt(
821
- url.searchParams.get("offset") || "0",
822
- 10,
823
- );
692
+ const totalSuccesses = results.filter((r) => r.success).length;
693
+ const totalErrors = results.filter((r) => !r.success).length;
694
+ console.log(
695
+ `🎉 Batch analysis complete: ${totalSuccesses} successful, ${totalErrors} failed`,
696
+ );
824
697
 
825
- const analyses = analysisDb.listAnalyses(limit, offset);
826
- return Response.json({ analyses });
827
- } catch (error) {
828
- console.error("Error retrieving analyses:", error);
829
- return Response.json(
830
- {
831
- error:
832
- error instanceof Error
833
- ? error.message
834
- : "Failed to retrieve analyses",
835
- },
836
- { status: 500 },
837
- );
838
- }
839
- },
698
+ return Response.json({ results });
699
+ } catch (error) {
700
+ console.error("Batch analysis error:", error);
701
+ return Response.json(
702
+ {
703
+ error: error instanceof Error ? error.message : "Analysis failed",
704
+ },
705
+ { status: 500 },
706
+ );
707
+ }
840
708
  },
709
+ },
841
710
 
842
- "/api/session-analyses/:sessionId/similar": {
843
- async GET(req) {
844
- try {
845
- const sessionId = req.params.sessionId;
846
- const url = new URL(req.url);
847
- const limit = Number.parseInt(
848
- url.searchParams.get("limit") || "10",
849
- 10,
850
- );
711
+ "/api/session-analyses": {
712
+ async GET(req) {
713
+ try {
714
+ const url = new URL(req.url);
715
+ const sessionId = url.searchParams.get("sessionId");
851
716
 
852
- // Get embedding for this session
853
- const embedding = await analysisDb.getEmbedding(sessionId);
854
- if (!embedding) {
717
+ if (sessionId) {
718
+ // Get single analysis
719
+ const analysis = analysisDb.getAnalysis(sessionId);
720
+ if (!analysis) {
855
721
  return Response.json(
856
- { error: "No embedding found for this session" },
722
+ { error: "Analysis not found" },
857
723
  { status: 404 },
858
724
  );
859
725
  }
726
+ return Response.json(analysis);
727
+ }
860
728
 
861
- // Search for similar sessions
862
- const similar = (
863
- await analysisDb.searchSimilarSessions(embedding, limit + 1)
864
- )
865
- .filter((s) => s.session_id !== sessionId)
866
- .slice(0, limit);
729
+ // List all analyses
730
+ const limit = Number.parseInt(
731
+ url.searchParams.get("limit") || "50",
732
+ 10,
733
+ );
734
+ const offset = Number.parseInt(
735
+ url.searchParams.get("offset") || "0",
736
+ 10,
737
+ );
867
738
 
868
- return Response.json({ similar });
869
- } catch (error) {
870
- console.error("Error finding similar sessions:", error);
871
- return Response.json(
872
- {
873
- error:
874
- error instanceof Error
875
- ? error.message
876
- : "Failed to find similar sessions",
877
- },
878
- { status: 500 },
879
- );
880
- }
881
- },
739
+ const analyses = analysisDb.listAnalyses(limit, offset);
740
+ return Response.json({ analyses });
741
+ } catch (error) {
742
+ console.error("Error retrieving analyses:", error);
743
+ return Response.json(
744
+ {
745
+ error:
746
+ error instanceof Error
747
+ ? error.message
748
+ : "Failed to retrieve analyses",
749
+ },
750
+ { status: 500 },
751
+ );
752
+ }
882
753
  },
754
+ },
883
755
 
884
- // Comparison analysis endpoints
885
- "/api/analyze-comparison/:runId": {
886
- async POST(req) {
887
- const runId = req.params.runId;
756
+ "/api/session-analyses/:sessionId/similar": {
757
+ async GET(req) {
758
+ try {
759
+ const sessionId = req.params.sessionId;
760
+ const url = new URL(req.url);
761
+ const limit = Number.parseInt(
762
+ url.searchParams.get("limit") || "10",
763
+ 10,
764
+ );
888
765
 
889
- try {
890
- // Import analyzer dynamically
891
- const { analyzeComparison } = await import(
892
- "./analysis/comparison-analyzer.js"
766
+ // Get embedding for this session
767
+ const embedding = await analysisDb.getEmbedding(sessionId);
768
+ if (!embedding) {
769
+ return Response.json(
770
+ { error: "No embedding found for this session" },
771
+ { status: 404 },
893
772
  );
773
+ }
894
774
 
895
- // Get the comparison run
896
- const run = comparisonDb.getRun(runId);
897
- if (!run) {
898
- return Response.json(
899
- { error: "Comparison run not found" },
900
- { status: 404 },
901
- );
902
- }
775
+ // Search for similar sessions
776
+ const similar = (
777
+ await analysisDb.searchSimilarSessions(embedding, limit + 1)
778
+ )
779
+ .filter((s) => s.session_id !== sessionId)
780
+ .slice(0, limit);
781
+
782
+ return Response.json({ similar });
783
+ } catch (error) {
784
+ console.error("Error finding similar sessions:", error);
785
+ return Response.json(
786
+ {
787
+ error:
788
+ error instanceof Error
789
+ ? error.message
790
+ : "Failed to find similar sessions",
791
+ },
792
+ { status: 500 },
793
+ );
794
+ }
795
+ },
796
+ },
903
797
 
904
- // Get the comparison config
905
- const config = comparisonDb.getConfig(run.configId);
906
- if (!config) {
907
- return Response.json(
908
- { error: "Comparison config not found" },
909
- { status: 404 },
910
- );
911
- }
798
+ // Comparison analysis endpoints
799
+ "/api/analyze-comparison/:runId": {
800
+ async POST(req) {
801
+ const runId = req.params.runId;
912
802
 
913
- // Verify all sessions exist
914
- if (!run.controlSessionId || !run.variantSessionId) {
915
- return Response.json(
916
- { error: "Comparison run is incomplete - missing session IDs" },
917
- { status: 400 },
918
- );
919
- }
803
+ try {
804
+ // Import analyzer dynamically
805
+ const { analyzeComparison } = await import(
806
+ "./analysis/comparison-analyzer.js"
807
+ );
920
808
 
921
- // Fetch all three sessions from agent server
922
- const [originalRes, controlRes, variantRes] = await Promise.all([
923
- fetch(`${agentServerUrl}/sessions/${run.sourceSessionId}`),
924
- fetch(`${agentServerUrl}/sessions/${run.controlSessionId}`),
925
- fetch(`${agentServerUrl}/sessions/${run.variantSessionId}`),
926
- ]);
809
+ // Get the comparison run
810
+ const run = comparisonDb.getRun(runId);
811
+ if (!run) {
812
+ return Response.json(
813
+ { error: "Comparison run not found" },
814
+ { status: 404 },
815
+ );
816
+ }
927
817
 
928
- if (!originalRes.ok || !controlRes.ok || !variantRes.ok) {
929
- return Response.json(
930
- { error: "Failed to fetch one or more sessions" },
931
- { status: 500 },
932
- );
933
- }
818
+ // Get the comparison config
819
+ const config = comparisonDb.getConfig(run.configId);
820
+ if (!config) {
821
+ return Response.json(
822
+ { error: "Comparison config not found" },
823
+ { status: 404 },
824
+ );
825
+ }
934
826
 
935
- const [originalSession, controlSession, variantSession] =
936
- await Promise.all([
937
- originalRes.json(),
938
- controlRes.json(),
939
- variantRes.json(),
940
- ]);
941
-
942
- // Get agent config for original tools and system prompt
943
- const agentConfig = await fetchAgentConfig();
944
-
945
- // Get metrics for each session
946
- const getMetrics = (sessionId: string) => {
947
- const spans = db.getSpansBySessionAttribute(sessionId);
948
- const traces = db.listTraces(100, 0, sessionId);
949
- return extractSessionMetrics(
950
- traces,
951
- spans,
952
- agentConfig?.model || "unknown",
953
- );
954
- };
955
-
956
- const originalMetrics = getMetrics(run.sourceSessionId);
957
- const controlMetrics = getMetrics(run.controlSessionId);
958
- const variantMetrics = getMetrics(run.variantSessionId);
959
-
960
- // Run the comparison analysis
961
- const analysis = await analyzeComparison({
962
- runId,
963
- hypothesis: config.hypothesis || "",
964
- config,
965
- originalSession,
966
- controlSession,
967
- variantSession,
968
- originalMetrics,
969
- controlMetrics,
970
- variantMetrics,
971
- originalSystemPrompt: agentConfig?.systemPrompt || undefined,
972
- originalTools: agentConfig?.tools?.map((t) => t.name) || [],
973
- });
974
-
975
- // Save to database
976
- comparisonDb.saveComparisonAnalysis(runId, analysis);
827
+ // Verify all sessions exist
828
+ if (!run.controlSessionId || !run.variantSessionId) {
829
+ return Response.json(
830
+ { error: "Comparison run is incomplete - missing session IDs" },
831
+ { status: 400 },
832
+ );
833
+ }
977
834
 
978
- return Response.json(analysis);
979
- } catch (error) {
980
- console.error("Comparison analysis error:", error);
835
+ // Fetch all three sessions from agent server
836
+ const [originalRes, controlRes, variantRes] = await Promise.all([
837
+ fetch(`${agentServerUrl}/sessions/${run.sourceSessionId}`),
838
+ fetch(`${agentServerUrl}/sessions/${run.controlSessionId}`),
839
+ fetch(`${agentServerUrl}/sessions/${run.variantSessionId}`),
840
+ ]);
841
+
842
+ if (!originalRes.ok || !controlRes.ok || !variantRes.ok) {
981
843
  return Response.json(
982
- {
983
- error:
984
- error instanceof Error
985
- ? error.message
986
- : "Comparison analysis failed",
987
- },
844
+ { error: "Failed to fetch one or more sessions" },
988
845
  { status: 500 },
989
846
  );
990
847
  }
991
- },
992
- },
993
848
 
994
- "/api/comparison-analysis/:runId": {
995
- async GET(req) {
996
- try {
997
- const runId = req.params.runId;
998
- const analysis = comparisonDb.getComparisonAnalysis(runId);
849
+ const [originalSession, controlSession, variantSession] =
850
+ await Promise.all([
851
+ originalRes.json(),
852
+ controlRes.json(),
853
+ variantRes.json(),
854
+ ]);
999
855
 
1000
- if (!analysis) {
1001
- return Response.json(
1002
- { error: "Comparison analysis not found" },
1003
- { status: 404 },
1004
- );
1005
- }
856
+ // Get agent config for original tools and system prompt
857
+ const agentConfig = await fetchAgentConfig();
1006
858
 
1007
- return Response.json(analysis);
1008
- } catch (error) {
1009
- console.error("Error fetching comparison analysis:", error);
859
+ // Get metrics for each session
860
+ const getMetrics = (sessionId: string) => {
861
+ const spans = db.getSpansBySessionAttribute(sessionId);
862
+ const traces = db.listTraces(100, 0, sessionId);
863
+ return extractSessionMetrics(
864
+ traces,
865
+ spans,
866
+ agentConfig?.model || "unknown",
867
+ );
868
+ };
869
+
870
+ const originalMetrics = getMetrics(run.sourceSessionId);
871
+ const controlMetrics = getMetrics(run.controlSessionId);
872
+ const variantMetrics = getMetrics(run.variantSessionId);
873
+
874
+ // Run the comparison analysis
875
+ const analysis = await analyzeComparison({
876
+ runId,
877
+ hypothesis: config.hypothesis || "",
878
+ config,
879
+ originalSession,
880
+ controlSession,
881
+ variantSession,
882
+ originalMetrics,
883
+ controlMetrics,
884
+ variantMetrics,
885
+ originalSystemPrompt: agentConfig?.systemPrompt || undefined,
886
+ originalTools: agentConfig?.tools?.map((t) => t.name) || [],
887
+ });
888
+
889
+ // Save to database
890
+ comparisonDb.saveComparisonAnalysis(runId, analysis);
891
+
892
+ return Response.json(analysis);
893
+ } catch (error) {
894
+ console.error("Comparison analysis error:", error);
895
+ return Response.json(
896
+ {
897
+ error:
898
+ error instanceof Error
899
+ ? error.message
900
+ : "Comparison analysis failed",
901
+ },
902
+ { status: 500 },
903
+ );
904
+ }
905
+ },
906
+ },
907
+
908
+ "/api/comparison-analysis/:runId": {
909
+ async GET(req) {
910
+ try {
911
+ const runId = req.params.runId;
912
+ const analysis = comparisonDb.getComparisonAnalysis(runId);
913
+
914
+ if (!analysis) {
1010
915
  return Response.json(
1011
- {
1012
- error:
1013
- error instanceof Error
1014
- ? error.message
1015
- : "Failed to fetch comparison analysis",
1016
- },
1017
- { status: 500 },
916
+ { error: "Comparison analysis not found" },
917
+ { status: 404 },
1018
918
  );
1019
919
  }
1020
- },
920
+
921
+ return Response.json(analysis);
922
+ } catch (error) {
923
+ console.error("Error fetching comparison analysis:", error);
924
+ return Response.json(
925
+ {
926
+ error:
927
+ error instanceof Error
928
+ ? error.message
929
+ : "Failed to fetch comparison analysis",
930
+ },
931
+ { status: 500 },
932
+ );
933
+ }
1021
934
  },
935
+ },
1022
936
 
1023
- "/api/comparison-analysis/:runId/exists": {
1024
- async GET(req) {
1025
- try {
1026
- const runId = req.params.runId;
1027
- const exists = comparisonDb.hasComparisonAnalysis(runId);
1028
- return Response.json({ exists });
1029
- } catch (_error) {
1030
- return Response.json({ exists: false });
1031
- }
1032
- },
937
+ "/api/comparison-analysis/:runId/exists": {
938
+ async GET(req) {
939
+ try {
940
+ const runId = req.params.runId;
941
+ const exists = comparisonDb.hasComparisonAnalysis(runId);
942
+ return Response.json({ exists });
943
+ } catch (_error) {
944
+ return Response.json({ exists: false });
945
+ }
1033
946
  },
947
+ },
948
+ });
949
+ }
950
+
951
+ // Enables TypeScript to infer route path parameters. This provides the same
952
+ // type inference that Bun.serve() uses internally.
953
+ function defineRoutes<R extends string>(routes: Serve.Routes<undefined, R>) {
954
+ return routes;
955
+ }
956
+
957
+ export interface DebuggerServerOptions {
958
+ port?: number;
959
+ otlpPort?: number;
960
+ dbPath: string;
961
+ agentName?: string;
962
+ agentServerUrl?: string;
963
+ }
964
+
965
+ export interface DebuggerServerResult {
966
+ server: ReturnType<typeof serve>;
967
+ otlpServer: ReturnType<typeof serve>;
968
+ stop: () => void;
969
+ }
1034
970
 
971
+ export function startDebuggerServer(
972
+ options: DebuggerServerOptions,
973
+ ): DebuggerServerResult {
974
+ const {
975
+ port = DEFAULT_DEBUGGER_PORT,
976
+ otlpPort = DEFAULT_OTLP_PORT,
977
+ dbPath,
978
+ agentName = "Agent",
979
+ agentServerUrl = "http://localhost:3100",
980
+ } = options;
981
+
982
+ // Start OTLP server (initializes database internally)
983
+ const otlpApp = createOtlpServer({ dbPath });
984
+ const otlpServer = serve({
985
+ fetch: otlpApp.fetch,
986
+ hostname: Bun.env.BIND_HOST || "localhost",
987
+ port: otlpPort,
988
+ });
989
+
990
+ // Create routes using the factory function
991
+ const routes = createDebuggerRoutes({
992
+ dbPath,
993
+ agentName,
994
+ agentServerUrl,
995
+ });
996
+
997
+ // Start debugger UI server
998
+ const server = serve({
999
+ port,
1000
+ idleTimeout: 120, // 2 minutes for long-running LLM analysis requests
1001
+ routes: {
1002
+ ...routes,
1035
1003
  // Serve index.html for all unmatched routes (SPA routing)
1036
1004
  "/*": index,
1037
1005
  },
@@ -1056,3 +1024,54 @@ export function startDebuggerServer(
1056
1024
 
1057
1025
  return { server, otlpServer, stop };
1058
1026
  }
1027
+
1028
+ // Helper to fetch agent config from an agent server
1029
+ async function fetchAgentConfigFromServer(
1030
+ agentServerUrl: string,
1031
+ ): Promise<AgentConfig | null> {
1032
+ try {
1033
+ // Call agent's initialize RPC to get config
1034
+ const response = await fetch(`${agentServerUrl}/rpc`, {
1035
+ method: "POST",
1036
+ headers: { "Content-Type": "application/json" },
1037
+ body: JSON.stringify({
1038
+ jsonrpc: "2.0",
1039
+ id: "debugger-config",
1040
+ method: "initialize",
1041
+ params: {
1042
+ protocolVersion: 1, // ACP protocol version as number
1043
+ clientCapabilities: {},
1044
+ },
1045
+ }),
1046
+ });
1047
+
1048
+ if (!response.ok) {
1049
+ console.error("Failed to fetch agent config:", response.statusText);
1050
+ return null;
1051
+ }
1052
+
1053
+ const data = await response.json();
1054
+
1055
+ // Check for JSON-RPC error
1056
+ if (data.error) {
1057
+ console.error("Agent RPC error:", data.error);
1058
+ return null;
1059
+ }
1060
+
1061
+ const result = data.result;
1062
+ if (!result) {
1063
+ console.error("No result in agent response");
1064
+ return null;
1065
+ }
1066
+
1067
+ // Extract config from initialize response
1068
+ return {
1069
+ model: result._meta?.model || "unknown",
1070
+ systemPrompt: result._meta?.systemPrompt || null,
1071
+ tools: result._meta?.tools || [],
1072
+ };
1073
+ } catch (error) {
1074
+ console.error("Error fetching agent config:", error);
1075
+ return null;
1076
+ }
1077
+ }