@townco/debugger 0.1.38 → 0.1.48
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +3 -3
- package/src/server.ts +883 -864
package/src/server.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { resetDb } from "@townco/otlp-server/db";
|
|
2
2
|
import { createOtlpServer } from "@townco/otlp-server/http";
|
|
3
|
+
import type { Serve } from "bun";
|
|
3
4
|
import { serve } from "bun";
|
|
4
5
|
import { AnalysisDb } from "./analysis-db";
|
|
5
6
|
import { ComparisonDb } from "./comparison-db";
|
|
@@ -17,287 +18,323 @@ import type {
|
|
|
17
18
|
export const DEFAULT_DEBUGGER_PORT = 4000;
|
|
18
19
|
export const DEFAULT_OTLP_PORT = 4318;
|
|
19
20
|
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
21
|
+
/**
|
|
22
|
+
* Creates the debugger API routes. Can be used standalone without starting a
|
|
23
|
+
* server.
|
|
24
|
+
*/
|
|
25
|
+
export function createDebuggerRoutes(options: {
|
|
23
26
|
dbPath: string;
|
|
24
|
-
agentName
|
|
25
|
-
agentServerUrl
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
export interface DebuggerServerResult {
|
|
29
|
-
server: ReturnType<typeof serve>;
|
|
30
|
-
otlpServer: ReturnType<typeof serve>;
|
|
31
|
-
stop: () => void;
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
export function startDebuggerServer(
|
|
35
|
-
options: DebuggerServerOptions,
|
|
36
|
-
): DebuggerServerResult {
|
|
37
|
-
const {
|
|
38
|
-
port = DEFAULT_DEBUGGER_PORT,
|
|
39
|
-
otlpPort = DEFAULT_OTLP_PORT,
|
|
40
|
-
dbPath,
|
|
41
|
-
agentName = "Agent",
|
|
42
|
-
agentServerUrl = "http://localhost:3100",
|
|
43
|
-
} = options;
|
|
44
|
-
|
|
45
|
-
// Start OTLP server (initializes database internally)
|
|
46
|
-
const otlpApp = createOtlpServer({ dbPath });
|
|
47
|
-
const otlpServer = serve({
|
|
48
|
-
fetch: otlpApp.fetch,
|
|
49
|
-
hostname: Bun.env.BIND_HOST || "localhost",
|
|
50
|
-
port: otlpPort,
|
|
51
|
-
});
|
|
27
|
+
agentName: string;
|
|
28
|
+
agentServerUrl: string;
|
|
29
|
+
}) {
|
|
30
|
+
const { dbPath, agentName, agentServerUrl } = options;
|
|
52
31
|
|
|
53
|
-
//
|
|
32
|
+
// Initialize databases
|
|
54
33
|
const db = new DebuggerDb(dbPath);
|
|
55
|
-
|
|
56
|
-
// Create comparison database for Town Hall feature
|
|
57
34
|
const comparisonDbPath = dbPath.replace(/\.db$/, "-comparison.db");
|
|
58
35
|
const comparisonDb = new ComparisonDb(comparisonDbPath);
|
|
59
|
-
|
|
60
|
-
// Create analysis database - uses main debugger database
|
|
61
36
|
const analysisDb = new AnalysisDb(dbPath);
|
|
62
37
|
|
|
63
|
-
|
|
64
|
-
async function fetchAgentConfig(): Promise<AgentConfig | null> {
|
|
65
|
-
try {
|
|
66
|
-
// Call agent's initialize RPC to get config
|
|
67
|
-
const response = await fetch(`${agentServerUrl}/rpc`, {
|
|
68
|
-
method: "POST",
|
|
69
|
-
headers: { "Content-Type": "application/json" },
|
|
70
|
-
body: JSON.stringify({
|
|
71
|
-
jsonrpc: "2.0",
|
|
72
|
-
id: "debugger-config",
|
|
73
|
-
method: "initialize",
|
|
74
|
-
params: {
|
|
75
|
-
protocolVersion: 1, // ACP protocol version as number
|
|
76
|
-
clientCapabilities: {},
|
|
77
|
-
},
|
|
78
|
-
}),
|
|
79
|
-
});
|
|
80
|
-
|
|
81
|
-
if (!response.ok) {
|
|
82
|
-
console.error("Failed to fetch agent config:", response.statusText);
|
|
83
|
-
return null;
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
const data = await response.json();
|
|
87
|
-
|
|
88
|
-
// Check for JSON-RPC error
|
|
89
|
-
if (data.error) {
|
|
90
|
-
console.error("Agent RPC error:", data.error);
|
|
91
|
-
return null;
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
const result = data.result;
|
|
95
|
-
if (!result) {
|
|
96
|
-
console.error("No result in agent response");
|
|
97
|
-
return null;
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
// Extract config from initialize response
|
|
101
|
-
return {
|
|
102
|
-
model: result._meta?.model || "unknown",
|
|
103
|
-
systemPrompt: result._meta?.systemPrompt || null,
|
|
104
|
-
tools: result._meta?.tools || [],
|
|
105
|
-
};
|
|
106
|
-
} catch (error) {
|
|
107
|
-
console.error("Error fetching agent config:", error);
|
|
108
|
-
return null;
|
|
109
|
-
}
|
|
110
|
-
}
|
|
38
|
+
const fetchAgentConfig = () => fetchAgentConfigFromServer(agentServerUrl);
|
|
111
39
|
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
routes: {
|
|
117
|
-
"/api/config": {
|
|
118
|
-
GET() {
|
|
119
|
-
return Response.json({ agentName });
|
|
120
|
-
},
|
|
40
|
+
return defineRoutes({
|
|
41
|
+
"/api/config": {
|
|
42
|
+
GET() {
|
|
43
|
+
return Response.json({ agentName });
|
|
121
44
|
},
|
|
45
|
+
},
|
|
122
46
|
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
},
|
|
47
|
+
"/api/reset-database": {
|
|
48
|
+
POST() {
|
|
49
|
+
try {
|
|
50
|
+
resetDb();
|
|
51
|
+
return new Response("Database reset successfully", { status: 200 });
|
|
52
|
+
} catch (error) {
|
|
53
|
+
console.error("Error resetting database:", error);
|
|
54
|
+
return new Response(
|
|
55
|
+
`Failed to reset database: ${error instanceof Error ? error.message : String(error)}`,
|
|
56
|
+
{ status: 500 },
|
|
57
|
+
);
|
|
58
|
+
}
|
|
136
59
|
},
|
|
60
|
+
},
|
|
137
61
|
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
},
|
|
62
|
+
"/api/sessions": {
|
|
63
|
+
GET(req) {
|
|
64
|
+
const url = new URL(req.url);
|
|
65
|
+
const limit = Number.parseInt(
|
|
66
|
+
url.searchParams.get("limit") || "1000",
|
|
67
|
+
10,
|
|
68
|
+
);
|
|
69
|
+
const offset = Number.parseInt(
|
|
70
|
+
url.searchParams.get("offset") || "0",
|
|
71
|
+
10,
|
|
72
|
+
);
|
|
73
|
+
const sessions = db.listSessions(limit, offset);
|
|
74
|
+
return Response.json(sessions);
|
|
152
75
|
},
|
|
76
|
+
},
|
|
153
77
|
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
},
|
|
78
|
+
"/api/traces": {
|
|
79
|
+
GET(req) {
|
|
80
|
+
const url = new URL(req.url);
|
|
81
|
+
const limit = Number.parseInt(
|
|
82
|
+
url.searchParams.get("limit") || "50",
|
|
83
|
+
10,
|
|
84
|
+
);
|
|
85
|
+
const offset = Number.parseInt(
|
|
86
|
+
url.searchParams.get("offset") || "0",
|
|
87
|
+
10,
|
|
88
|
+
);
|
|
89
|
+
const sessionId = url.searchParams.get("sessionId") || undefined;
|
|
90
|
+
const traces = db.listTraces(limit, offset, sessionId);
|
|
91
|
+
return Response.json(traces);
|
|
169
92
|
},
|
|
93
|
+
},
|
|
170
94
|
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
95
|
+
"/api/traces/:traceId": {
|
|
96
|
+
GET(req) {
|
|
97
|
+
const traceId = req.params.traceId;
|
|
98
|
+
const data = db.getTraceById(traceId);
|
|
99
|
+
if (!data.trace) {
|
|
100
|
+
return Response.json({ error: "Trace not found" }, { status: 404 });
|
|
101
|
+
}
|
|
102
|
+
// Extract messages on the server side
|
|
103
|
+
const messages = extractTurnMessages(data.spans, data.logs);
|
|
104
|
+
return Response.json({ ...data, messages });
|
|
105
|
+
},
|
|
106
|
+
},
|
|
107
|
+
|
|
108
|
+
"/api/session-conversation": {
|
|
109
|
+
GET(req) {
|
|
110
|
+
const url = new URL(req.url);
|
|
111
|
+
const sessionId = url.searchParams.get("sessionId");
|
|
112
|
+
if (!sessionId) {
|
|
113
|
+
return Response.json(
|
|
114
|
+
{ error: "sessionId parameter is required" },
|
|
115
|
+
{ status: 400 },
|
|
116
|
+
);
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// Query traces by session attribute to avoid race conditions
|
|
120
|
+
const traceIds = db.getTraceIdsBySessionAttribute(sessionId);
|
|
121
|
+
|
|
122
|
+
// Extract messages for each trace
|
|
123
|
+
const conversation: ConversationTrace[] = traceIds.map((traceInfo) => {
|
|
124
|
+
const data = db.getTraceById(traceInfo.trace_id);
|
|
179
125
|
const messages = extractTurnMessages(data.spans, data.logs);
|
|
180
|
-
return
|
|
181
|
-
|
|
126
|
+
return {
|
|
127
|
+
trace_id: traceInfo.trace_id,
|
|
128
|
+
start_time_unix_nano: traceInfo.start_time_unix_nano,
|
|
129
|
+
userInput: messages.userInput,
|
|
130
|
+
llmOutput: messages.llmOutput,
|
|
131
|
+
agentMessages: messages.agentMessages,
|
|
132
|
+
};
|
|
133
|
+
});
|
|
134
|
+
|
|
135
|
+
return Response.json(conversation);
|
|
182
136
|
},
|
|
137
|
+
},
|
|
183
138
|
|
|
184
|
-
|
|
185
|
-
GET(req) {
|
|
186
|
-
const url = new URL(req.url);
|
|
187
|
-
const sessionId = url.searchParams.get("sessionId");
|
|
188
|
-
if (!sessionId) {
|
|
189
|
-
return Response.json(
|
|
190
|
-
{ error: "sessionId parameter is required" },
|
|
191
|
-
{ status: 400 },
|
|
192
|
-
);
|
|
193
|
-
}
|
|
139
|
+
// Town Hall API endpoints
|
|
194
140
|
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
const messages = extractTurnMessages(data.spans, data.logs);
|
|
203
|
-
return {
|
|
204
|
-
trace_id: traceInfo.trace_id,
|
|
205
|
-
start_time_unix_nano: traceInfo.start_time_unix_nano,
|
|
206
|
-
userInput: messages.userInput,
|
|
207
|
-
llmOutput: messages.llmOutput,
|
|
208
|
-
agentMessages: messages.agentMessages,
|
|
209
|
-
};
|
|
210
|
-
},
|
|
141
|
+
"/api/agent-config": {
|
|
142
|
+
async GET() {
|
|
143
|
+
const config = await fetchAgentConfig();
|
|
144
|
+
if (!config) {
|
|
145
|
+
return Response.json(
|
|
146
|
+
{ error: "Failed to fetch agent config" },
|
|
147
|
+
{ status: 503 },
|
|
211
148
|
);
|
|
149
|
+
}
|
|
150
|
+
return Response.json(config);
|
|
151
|
+
},
|
|
152
|
+
},
|
|
212
153
|
|
|
213
|
-
|
|
214
|
-
|
|
154
|
+
"/api/available-models": {
|
|
155
|
+
GET() {
|
|
156
|
+
// List of supported models for comparison
|
|
157
|
+
const models = [
|
|
158
|
+
// Anthropic models
|
|
159
|
+
"claude-sonnet-4-5-20250929",
|
|
160
|
+
"claude-3-5-haiku-20241022",
|
|
161
|
+
"claude-opus-4-5-20251101",
|
|
162
|
+
// Google Gemini models
|
|
163
|
+
"gemini-2.0-flash",
|
|
164
|
+
"gemini-1.5-pro",
|
|
165
|
+
"gemini-1.5-flash",
|
|
166
|
+
];
|
|
167
|
+
return Response.json({ models });
|
|
215
168
|
},
|
|
169
|
+
},
|
|
216
170
|
|
|
217
|
-
|
|
171
|
+
"/api/session-first-message/:sessionId": {
|
|
172
|
+
GET(req) {
|
|
173
|
+
const sessionId = req.params.sessionId;
|
|
218
174
|
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
175
|
+
// Query logs directly by session attribute to avoid race conditions
|
|
176
|
+
// with trace.session_id association during concurrent sessions
|
|
177
|
+
const message = db.getFirstUserMessageBySession(sessionId);
|
|
178
|
+
|
|
179
|
+
if (!message) {
|
|
180
|
+
return Response.json(
|
|
181
|
+
{ error: "Session not found or has no user message" },
|
|
182
|
+
{ status: 404 },
|
|
183
|
+
);
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
return Response.json({ message });
|
|
230
187
|
},
|
|
188
|
+
},
|
|
231
189
|
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
// Anthropic models
|
|
237
|
-
"claude-sonnet-4-5-20250929",
|
|
238
|
-
"claude-3-5-haiku-20241022",
|
|
239
|
-
"claude-opus-4-5-20251101",
|
|
240
|
-
// Google Gemini models
|
|
241
|
-
"gemini-2.0-flash",
|
|
242
|
-
"gemini-1.5-pro",
|
|
243
|
-
"gemini-1.5-flash",
|
|
244
|
-
];
|
|
245
|
-
return Response.json({ models });
|
|
246
|
-
},
|
|
190
|
+
"/api/comparison-config": {
|
|
191
|
+
GET() {
|
|
192
|
+
const config = comparisonDb.getLatestConfig();
|
|
193
|
+
return Response.json(config);
|
|
247
194
|
},
|
|
195
|
+
async POST(req) {
|
|
196
|
+
try {
|
|
197
|
+
const body = await req.json();
|
|
198
|
+
const config: ComparisonConfig = {
|
|
199
|
+
id: body.id || crypto.randomUUID(),
|
|
200
|
+
dimensions: body.dimensions || [],
|
|
201
|
+
controlModel: body.controlModel,
|
|
202
|
+
variantModel: body.variantModel,
|
|
203
|
+
variantSystemPrompt: body.variantSystemPrompt,
|
|
204
|
+
variantTools: body.variantTools,
|
|
205
|
+
createdAt: body.createdAt || new Date().toISOString(),
|
|
206
|
+
updatedAt: new Date().toISOString(),
|
|
207
|
+
};
|
|
208
|
+
comparisonDb.saveConfig(config);
|
|
209
|
+
return Response.json({ id: config.id });
|
|
210
|
+
} catch (error) {
|
|
211
|
+
console.error("Error saving comparison config:", error);
|
|
212
|
+
return Response.json(
|
|
213
|
+
{ error: "Invalid request body" },
|
|
214
|
+
{ status: 400 },
|
|
215
|
+
);
|
|
216
|
+
}
|
|
217
|
+
},
|
|
218
|
+
},
|
|
248
219
|
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
220
|
+
"/api/comparison-config/:configId": {
|
|
221
|
+
GET(req) {
|
|
222
|
+
const configId = req.params.configId;
|
|
223
|
+
const config = comparisonDb.getConfig(configId);
|
|
224
|
+
if (!config) {
|
|
225
|
+
return Response.json(
|
|
226
|
+
{ error: "Comparison config not found" },
|
|
227
|
+
{ status: 404 },
|
|
228
|
+
);
|
|
229
|
+
}
|
|
230
|
+
return Response.json(config);
|
|
231
|
+
},
|
|
232
|
+
},
|
|
252
233
|
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
234
|
+
"/api/comparison-session-ids": {
|
|
235
|
+
GET() {
|
|
236
|
+
const sessionIds = comparisonDb.getComparisonSessionIds();
|
|
237
|
+
return Response.json({ sessionIds });
|
|
238
|
+
},
|
|
239
|
+
},
|
|
256
240
|
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
241
|
+
"/api/comparison-runs": {
|
|
242
|
+
GET(req) {
|
|
243
|
+
const url = new URL(req.url);
|
|
244
|
+
const limit = Number.parseInt(
|
|
245
|
+
url.searchParams.get("limit") || "50",
|
|
246
|
+
10,
|
|
247
|
+
);
|
|
248
|
+
const offset = Number.parseInt(
|
|
249
|
+
url.searchParams.get("offset") || "0",
|
|
250
|
+
10,
|
|
251
|
+
);
|
|
252
|
+
const sourceSessionId = url.searchParams.get("sourceSessionId");
|
|
253
|
+
|
|
254
|
+
if (sourceSessionId) {
|
|
255
|
+
const runs = comparisonDb.listRunsBySourceSession(sourceSessionId);
|
|
256
|
+
return Response.json(runs);
|
|
257
|
+
}
|
|
263
258
|
|
|
264
|
-
|
|
265
|
-
|
|
259
|
+
const runs = comparisonDb.listRuns(limit, offset);
|
|
260
|
+
return Response.json(runs);
|
|
266
261
|
},
|
|
262
|
+
},
|
|
267
263
|
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
264
|
+
"/api/comparison-run/:runId": {
|
|
265
|
+
GET(req) {
|
|
266
|
+
const runId = req.params.runId;
|
|
267
|
+
const run = comparisonDb.getRun(runId);
|
|
268
|
+
if (!run) {
|
|
269
|
+
return Response.json(
|
|
270
|
+
{ error: "Comparison run not found" },
|
|
271
|
+
{ status: 404 },
|
|
272
|
+
);
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
const config = comparisonDb.getConfig(run.configId);
|
|
276
|
+
const controlModel =
|
|
277
|
+
config?.controlModel ??
|
|
278
|
+
config?.variantModel ??
|
|
279
|
+
"claude-sonnet-4-5-20250929";
|
|
280
|
+
const variantModel =
|
|
281
|
+
config?.variantModel ??
|
|
282
|
+
config?.controlModel ??
|
|
283
|
+
"claude-sonnet-4-5-20250929";
|
|
284
|
+
|
|
285
|
+
const maybeRefreshMetrics = (
|
|
286
|
+
sessionId: string | null,
|
|
287
|
+
cached: SessionMetrics | null,
|
|
288
|
+
model: string,
|
|
289
|
+
): SessionMetrics | null => {
|
|
290
|
+
if (!sessionId) return cached;
|
|
291
|
+
const needsRefresh =
|
|
292
|
+
!cached ||
|
|
293
|
+
cached.totalTokens === 0 ||
|
|
294
|
+
cached.toolCallCount === 0 ||
|
|
295
|
+
!cached.toolCalls ||
|
|
296
|
+
cached.toolCalls.length === 0;
|
|
297
|
+
if (!needsRefresh) return cached;
|
|
298
|
+
|
|
299
|
+
const spans = db.getSpansBySessionAttribute(sessionId);
|
|
300
|
+
if (spans.length === 0) return cached;
|
|
301
|
+
const traces = db.listTraces(100, 0, sessionId);
|
|
302
|
+
return extractSessionMetrics(traces, spans, model);
|
|
303
|
+
};
|
|
304
|
+
|
|
305
|
+
const controlMetrics = maybeRefreshMetrics(
|
|
306
|
+
run.controlSessionId,
|
|
307
|
+
run.controlMetrics,
|
|
308
|
+
controlModel,
|
|
309
|
+
);
|
|
310
|
+
const variantMetrics = maybeRefreshMetrics(
|
|
311
|
+
run.variantSessionId,
|
|
312
|
+
run.variantMetrics,
|
|
313
|
+
variantModel,
|
|
314
|
+
);
|
|
315
|
+
|
|
316
|
+
return Response.json({
|
|
317
|
+
...run,
|
|
318
|
+
controlMetrics,
|
|
319
|
+
variantMetrics,
|
|
320
|
+
});
|
|
321
|
+
},
|
|
322
|
+
},
|
|
323
|
+
|
|
324
|
+
"/api/run-comparison": {
|
|
325
|
+
async POST(req) {
|
|
326
|
+
try {
|
|
327
|
+
const body = await req.json();
|
|
328
|
+
const { sessionId, configId } = body;
|
|
329
|
+
|
|
330
|
+
if (!sessionId || !configId) {
|
|
290
331
|
return Response.json(
|
|
291
|
-
{ error: "
|
|
332
|
+
{ error: "sessionId and configId are required" },
|
|
292
333
|
{ status: 400 },
|
|
293
334
|
);
|
|
294
335
|
}
|
|
295
|
-
},
|
|
296
|
-
},
|
|
297
336
|
|
|
298
|
-
|
|
299
|
-
GET(req) {
|
|
300
|
-
const configId = req.params.configId;
|
|
337
|
+
// Get the comparison config
|
|
301
338
|
const config = comparisonDb.getConfig(configId);
|
|
302
339
|
if (!config) {
|
|
303
340
|
return Response.json(
|
|
@@ -305,733 +342,664 @@ export function startDebuggerServer(
|
|
|
305
342
|
{ status: 404 },
|
|
306
343
|
);
|
|
307
344
|
}
|
|
308
|
-
return Response.json(config);
|
|
309
|
-
},
|
|
310
|
-
},
|
|
311
345
|
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
"/api/comparison-runs": {
|
|
320
|
-
GET(req) {
|
|
321
|
-
const url = new URL(req.url);
|
|
322
|
-
const limit = Number.parseInt(
|
|
323
|
-
url.searchParams.get("limit") || "50",
|
|
324
|
-
10,
|
|
325
|
-
);
|
|
326
|
-
const offset = Number.parseInt(
|
|
327
|
-
url.searchParams.get("offset") || "0",
|
|
328
|
-
10,
|
|
329
|
-
);
|
|
330
|
-
const sourceSessionId = url.searchParams.get("sourceSessionId");
|
|
331
|
-
|
|
332
|
-
if (sourceSessionId) {
|
|
333
|
-
const runs = comparisonDb.listRunsBySourceSession(sourceSessionId);
|
|
334
|
-
return Response.json(runs);
|
|
346
|
+
// Get the first user message from the source session
|
|
347
|
+
const traces = db.listTraces(1, 0, sessionId);
|
|
348
|
+
if (traces.length === 0) {
|
|
349
|
+
return Response.json(
|
|
350
|
+
{ error: "Source session not found" },
|
|
351
|
+
{ status: 404 },
|
|
352
|
+
);
|
|
335
353
|
}
|
|
336
354
|
|
|
337
|
-
const
|
|
338
|
-
|
|
339
|
-
},
|
|
340
|
-
},
|
|
341
|
-
|
|
342
|
-
"/api/comparison-run/:runId": {
|
|
343
|
-
GET(req) {
|
|
344
|
-
const runId = req.params.runId;
|
|
345
|
-
const run = comparisonDb.getRun(runId);
|
|
346
|
-
if (!run) {
|
|
355
|
+
const trace = traces[0];
|
|
356
|
+
if (!trace) {
|
|
347
357
|
return Response.json(
|
|
348
|
-
{ error: "
|
|
358
|
+
{ error: "Source session not found" },
|
|
349
359
|
{ status: 404 },
|
|
350
360
|
);
|
|
351
361
|
}
|
|
352
362
|
|
|
353
|
-
const
|
|
354
|
-
const
|
|
355
|
-
config?.controlModel ??
|
|
356
|
-
config?.variantModel ??
|
|
357
|
-
"claude-sonnet-4-5-20250929";
|
|
358
|
-
const variantModel =
|
|
359
|
-
config?.variantModel ??
|
|
360
|
-
config?.controlModel ??
|
|
361
|
-
"claude-sonnet-4-5-20250929";
|
|
362
|
-
|
|
363
|
-
const maybeRefreshMetrics = (
|
|
364
|
-
sessionId: string | null,
|
|
365
|
-
cached: SessionMetrics | null,
|
|
366
|
-
model: string,
|
|
367
|
-
): SessionMetrics | null => {
|
|
368
|
-
if (!sessionId) return cached;
|
|
369
|
-
const needsRefresh =
|
|
370
|
-
!cached ||
|
|
371
|
-
cached.totalTokens === 0 ||
|
|
372
|
-
cached.toolCallCount === 0 ||
|
|
373
|
-
!cached.toolCalls ||
|
|
374
|
-
cached.toolCalls.length === 0;
|
|
375
|
-
if (!needsRefresh) return cached;
|
|
363
|
+
const data = db.getTraceById(trace.trace_id);
|
|
364
|
+
const messages = extractTurnMessages(data.spans, data.logs);
|
|
376
365
|
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
366
|
+
if (!messages.userInput) {
|
|
367
|
+
return Response.json(
|
|
368
|
+
{ error: "No user message found in source session" },
|
|
369
|
+
{ status: 400 },
|
|
370
|
+
);
|
|
371
|
+
}
|
|
382
372
|
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
const variantMetrics = maybeRefreshMetrics(
|
|
389
|
-
run.variantSessionId,
|
|
390
|
-
run.variantMetrics,
|
|
391
|
-
variantModel,
|
|
373
|
+
// Create the comparison run
|
|
374
|
+
const run = comparisonDb.createRun(
|
|
375
|
+
configId,
|
|
376
|
+
sessionId,
|
|
377
|
+
messages.userInput,
|
|
392
378
|
);
|
|
393
379
|
|
|
380
|
+
// Return the run info - actual execution will be handled by the frontend
|
|
381
|
+
// which will create two ACP sessions and run them in parallel
|
|
394
382
|
return Response.json({
|
|
395
|
-
|
|
383
|
+
runId: run.id,
|
|
384
|
+
firstUserMessage: run.firstUserMessage,
|
|
385
|
+
config,
|
|
386
|
+
});
|
|
387
|
+
} catch (error) {
|
|
388
|
+
console.error("Error starting comparison:", error);
|
|
389
|
+
return Response.json(
|
|
390
|
+
{ error: "Failed to start comparison" },
|
|
391
|
+
{ status: 500 },
|
|
392
|
+
);
|
|
393
|
+
}
|
|
394
|
+
},
|
|
395
|
+
},
|
|
396
|
+
|
|
397
|
+
"/api/comparison-run/:runId/update": {
|
|
398
|
+
async POST(req) {
|
|
399
|
+
try {
|
|
400
|
+
const runId = req.params.runId;
|
|
401
|
+
const body = await req.json();
|
|
402
|
+
const {
|
|
403
|
+
status,
|
|
404
|
+
controlSessionId,
|
|
405
|
+
variantSessionId,
|
|
396
406
|
controlMetrics,
|
|
397
407
|
variantMetrics,
|
|
408
|
+
controlResponse,
|
|
409
|
+
variantResponse,
|
|
410
|
+
} = body;
|
|
411
|
+
|
|
412
|
+
comparisonDb.updateRunStatus(runId, status, {
|
|
413
|
+
controlSessionId,
|
|
414
|
+
variantSessionId,
|
|
415
|
+
controlMetrics,
|
|
416
|
+
variantMetrics,
|
|
417
|
+
controlResponse,
|
|
418
|
+
variantResponse,
|
|
398
419
|
});
|
|
399
|
-
|
|
420
|
+
|
|
421
|
+
return Response.json({ success: true });
|
|
422
|
+
} catch (_error) {
|
|
423
|
+
return Response.json(
|
|
424
|
+
{ error: "Failed to update comparison run" },
|
|
425
|
+
{ status: 500 },
|
|
426
|
+
);
|
|
427
|
+
}
|
|
400
428
|
},
|
|
429
|
+
},
|
|
401
430
|
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
431
|
+
"/api/session-metrics/:sessionId": {
|
|
432
|
+
async GET(req) {
|
|
433
|
+
const sessionId = req.params.sessionId;
|
|
434
|
+
const url = new URL(req.url);
|
|
435
|
+
const model = url.searchParams.get("model") || "unknown";
|
|
436
|
+
|
|
437
|
+
// Query spans by their agent.session_id attribute directly
|
|
438
|
+
// This is more reliable than trace-based lookup because concurrent
|
|
439
|
+
// sessions can cause race conditions in trace association
|
|
440
|
+
const allSpans = db.getSpansBySessionAttribute(sessionId);
|
|
441
|
+
|
|
442
|
+
if (allSpans.length === 0) {
|
|
443
|
+
return Response.json(
|
|
444
|
+
{ error: "Session not found or has no traces" },
|
|
445
|
+
{ status: 404 },
|
|
446
|
+
);
|
|
447
|
+
}
|
|
407
448
|
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
{ error: "sessionId and configId are required" },
|
|
411
|
-
{ status: 400 },
|
|
412
|
-
);
|
|
413
|
-
}
|
|
449
|
+
// Get traces for duration calculation (use empty array if not found)
|
|
450
|
+
const traces = db.listTraces(100, 0, sessionId);
|
|
414
451
|
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
{ status: 404 },
|
|
421
|
-
);
|
|
422
|
-
}
|
|
452
|
+
// Extract metrics
|
|
453
|
+
const metrics = extractSessionMetrics(traces, allSpans, model);
|
|
454
|
+
return Response.json(metrics);
|
|
455
|
+
},
|
|
456
|
+
},
|
|
423
457
|
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
return Response.json(
|
|
428
|
-
{ error: "Source session not found" },
|
|
429
|
-
{ status: 404 },
|
|
430
|
-
);
|
|
431
|
-
}
|
|
458
|
+
"/api/analyze-session/:sessionId": {
|
|
459
|
+
async POST(req) {
|
|
460
|
+
const sessionId = req.params.sessionId;
|
|
432
461
|
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
{ error: "Source session not found" },
|
|
437
|
-
{ status: 404 },
|
|
438
|
-
);
|
|
439
|
-
}
|
|
462
|
+
try {
|
|
463
|
+
// Import analyzer dynamically to avoid loading at startup
|
|
464
|
+
const { analyzeSession } = await import("./analysis/analyzer.js");
|
|
440
465
|
|
|
441
|
-
|
|
442
|
-
|
|
466
|
+
// Fetch session from agent server via ACP HTTP API
|
|
467
|
+
const sessionResponse = await fetch(
|
|
468
|
+
`${agentServerUrl}/sessions/${sessionId}`,
|
|
469
|
+
);
|
|
443
470
|
|
|
444
|
-
|
|
471
|
+
if (!sessionResponse.ok) {
|
|
472
|
+
if (sessionResponse.status === 404) {
|
|
445
473
|
return Response.json(
|
|
446
|
-
{ error: "
|
|
447
|
-
{ status:
|
|
474
|
+
{ error: "Session not found" },
|
|
475
|
+
{ status: 404 },
|
|
448
476
|
);
|
|
449
477
|
}
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
const run = comparisonDb.createRun(
|
|
453
|
-
configId,
|
|
454
|
-
sessionId,
|
|
455
|
-
messages.userInput,
|
|
456
|
-
);
|
|
457
|
-
|
|
458
|
-
// Return the run info - actual execution will be handled by the frontend
|
|
459
|
-
// which will create two ACP sessions and run them in parallel
|
|
460
|
-
return Response.json({
|
|
461
|
-
runId: run.id,
|
|
462
|
-
firstUserMessage: run.firstUserMessage,
|
|
463
|
-
config,
|
|
464
|
-
});
|
|
465
|
-
} catch (error) {
|
|
466
|
-
console.error("Error starting comparison:", error);
|
|
467
|
-
return Response.json(
|
|
468
|
-
{ error: "Failed to start comparison" },
|
|
469
|
-
{ status: 500 },
|
|
478
|
+
throw new Error(
|
|
479
|
+
`Failed to fetch session: ${sessionResponse.statusText}`,
|
|
470
480
|
);
|
|
471
481
|
}
|
|
472
|
-
},
|
|
473
|
-
},
|
|
474
482
|
|
|
475
|
-
|
|
476
|
-
async POST(req) {
|
|
477
|
-
try {
|
|
478
|
-
const runId = req.params.runId;
|
|
479
|
-
const body = await req.json();
|
|
480
|
-
const {
|
|
481
|
-
status,
|
|
482
|
-
controlSessionId,
|
|
483
|
-
variantSessionId,
|
|
484
|
-
controlMetrics,
|
|
485
|
-
variantMetrics,
|
|
486
|
-
controlResponse,
|
|
487
|
-
variantResponse,
|
|
488
|
-
} = body;
|
|
489
|
-
|
|
490
|
-
comparisonDb.updateRunStatus(runId, status, {
|
|
491
|
-
controlSessionId,
|
|
492
|
-
variantSessionId,
|
|
493
|
-
controlMetrics,
|
|
494
|
-
variantMetrics,
|
|
495
|
-
controlResponse,
|
|
496
|
-
variantResponse,
|
|
497
|
-
});
|
|
498
|
-
|
|
499
|
-
return Response.json({ success: true });
|
|
500
|
-
} catch (_error) {
|
|
501
|
-
return Response.json(
|
|
502
|
-
{ error: "Failed to update comparison run" },
|
|
503
|
-
{ status: 500 },
|
|
504
|
-
);
|
|
505
|
-
}
|
|
506
|
-
},
|
|
507
|
-
},
|
|
483
|
+
const sessionData = await sessionResponse.json();
|
|
508
484
|
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
const
|
|
512
|
-
const url = new URL(req.url);
|
|
513
|
-
const model = url.searchParams.get("model") || "unknown";
|
|
485
|
+
// Fetch agent config to get model for cost calculation
|
|
486
|
+
const agentConfig = await fetchAgentConfig();
|
|
487
|
+
const model = agentConfig?.model || "unknown";
|
|
514
488
|
|
|
515
|
-
//
|
|
516
|
-
// This is more reliable than trace-based lookup because concurrent
|
|
517
|
-
// sessions can cause race conditions in trace association
|
|
489
|
+
// Fetch metrics from OTLP spans
|
|
518
490
|
const allSpans = db.getSpansBySessionAttribute(sessionId);
|
|
519
|
-
|
|
520
|
-
if (allSpans.length === 0) {
|
|
521
|
-
return Response.json(
|
|
522
|
-
{ error: "Session not found or has no traces" },
|
|
523
|
-
{ status: 404 },
|
|
524
|
-
);
|
|
525
|
-
}
|
|
526
|
-
|
|
527
|
-
// Get traces for duration calculation (use empty array if not found)
|
|
528
491
|
const traces = db.listTraces(100, 0, sessionId);
|
|
492
|
+
const sessionMetrics = extractSessionMetrics(traces, allSpans, model);
|
|
493
|
+
|
|
494
|
+
// Convert to AnalysisMetrics format
|
|
495
|
+
const metrics = {
|
|
496
|
+
inputTokens: sessionMetrics.inputTokens,
|
|
497
|
+
outputTokens: sessionMetrics.outputTokens,
|
|
498
|
+
totalTokens: sessionMetrics.totalTokens,
|
|
499
|
+
estimatedCost: sessionMetrics.estimatedCost,
|
|
500
|
+
durationMs: sessionMetrics.durationMs,
|
|
501
|
+
};
|
|
529
502
|
|
|
530
|
-
//
|
|
531
|
-
const
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
503
|
+
// Convert tool calls to DetailedToolCall format
|
|
504
|
+
const toolCalls = (sessionMetrics.toolCalls || []).map((tc) => ({
|
|
505
|
+
name: tc.name,
|
|
506
|
+
input: tc.input,
|
|
507
|
+
output: tc.output,
|
|
508
|
+
startTimeUnixNano: tc.startTimeUnixNano,
|
|
509
|
+
endTimeUnixNano: tc.endTimeUnixNano,
|
|
510
|
+
}));
|
|
511
|
+
|
|
512
|
+
// Analyze with LLM
|
|
513
|
+
const analysis = await analyzeSession({
|
|
514
|
+
session: sessionData,
|
|
515
|
+
metrics,
|
|
516
|
+
toolCalls,
|
|
517
|
+
});
|
|
535
518
|
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
const sessionId = req.params.sessionId;
|
|
519
|
+
// Persist to database
|
|
520
|
+
analysisDb.saveAnalysis(analysis);
|
|
539
521
|
|
|
522
|
+
// Generate and save embedding
|
|
540
523
|
try {
|
|
541
|
-
|
|
542
|
-
const
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
524
|
+
const { embedAnalysis } = await import("./analysis/embeddings.js");
|
|
525
|
+
const embedding = await embedAnalysis(analysis);
|
|
526
|
+
await analysisDb.saveEmbedding(analysis.session_id, embedding);
|
|
527
|
+
} catch (error) {
|
|
528
|
+
console.error(
|
|
529
|
+
`Failed to generate embedding for ${sessionId}:`,
|
|
530
|
+
error,
|
|
547
531
|
);
|
|
532
|
+
// Continue - don't fail entire analysis
|
|
533
|
+
}
|
|
548
534
|
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
const sessionData = await sessionResponse.json();
|
|
562
|
-
|
|
563
|
-
// Fetch agent config to get model for cost calculation
|
|
564
|
-
const agentConfig = await fetchAgentConfig();
|
|
565
|
-
const model = agentConfig?.model || "unknown";
|
|
566
|
-
|
|
567
|
-
// Fetch metrics from OTLP spans
|
|
568
|
-
const allSpans = db.getSpansBySessionAttribute(sessionId);
|
|
569
|
-
const traces = db.listTraces(100, 0, sessionId);
|
|
570
|
-
const sessionMetrics = extractSessionMetrics(
|
|
571
|
-
traces,
|
|
572
|
-
allSpans,
|
|
573
|
-
model,
|
|
574
|
-
);
|
|
535
|
+
return Response.json(analysis);
|
|
536
|
+
} catch (error) {
|
|
537
|
+
console.error("Session analysis error:", error);
|
|
538
|
+
return Response.json(
|
|
539
|
+
{
|
|
540
|
+
error: error instanceof Error ? error.message : "Analysis failed",
|
|
541
|
+
},
|
|
542
|
+
{ status: 500 },
|
|
543
|
+
);
|
|
544
|
+
}
|
|
545
|
+
},
|
|
546
|
+
},
|
|
575
547
|
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
estimatedCost: sessionMetrics.estimatedCost,
|
|
582
|
-
durationMs: sessionMetrics.durationMs,
|
|
583
|
-
};
|
|
584
|
-
|
|
585
|
-
// Convert tool calls to DetailedToolCall format
|
|
586
|
-
const toolCalls = (sessionMetrics.toolCalls || []).map((tc) => ({
|
|
587
|
-
name: tc.name,
|
|
588
|
-
input: tc.input,
|
|
589
|
-
output: tc.output,
|
|
590
|
-
startTimeUnixNano: tc.startTimeUnixNano,
|
|
591
|
-
endTimeUnixNano: tc.endTimeUnixNano,
|
|
592
|
-
}));
|
|
593
|
-
|
|
594
|
-
// Analyze with LLM
|
|
595
|
-
const analysis = await analyzeSession({
|
|
596
|
-
session: sessionData,
|
|
597
|
-
metrics,
|
|
598
|
-
toolCalls,
|
|
599
|
-
});
|
|
600
|
-
|
|
601
|
-
// Persist to database
|
|
602
|
-
analysisDb.saveAnalysis(analysis);
|
|
603
|
-
|
|
604
|
-
// Generate and save embedding
|
|
605
|
-
try {
|
|
606
|
-
const { embedAnalysis } = await import(
|
|
607
|
-
"./analysis/embeddings.js"
|
|
608
|
-
);
|
|
609
|
-
const embedding = await embedAnalysis(analysis);
|
|
610
|
-
await analysisDb.saveEmbedding(analysis.session_id, embedding);
|
|
611
|
-
} catch (error) {
|
|
612
|
-
console.error(
|
|
613
|
-
`Failed to generate embedding for ${sessionId}:`,
|
|
614
|
-
error,
|
|
615
|
-
);
|
|
616
|
-
// Continue - don't fail entire analysis
|
|
617
|
-
}
|
|
548
|
+
"/api/analyze-all-sessions": {
|
|
549
|
+
async POST(req) {
|
|
550
|
+
try {
|
|
551
|
+
const body = await req.json();
|
|
552
|
+
const { sessionIds } = body as { sessionIds: string[] };
|
|
618
553
|
|
|
619
|
-
|
|
620
|
-
} catch (error) {
|
|
621
|
-
console.error("Session analysis error:", error);
|
|
554
|
+
if (!Array.isArray(sessionIds)) {
|
|
622
555
|
return Response.json(
|
|
623
|
-
{
|
|
624
|
-
|
|
625
|
-
error instanceof Error ? error.message : "Analysis failed",
|
|
626
|
-
},
|
|
627
|
-
{ status: 500 },
|
|
556
|
+
{ error: "sessionIds must be an array" },
|
|
557
|
+
{ status: 400 },
|
|
628
558
|
);
|
|
629
559
|
}
|
|
630
|
-
},
|
|
631
|
-
},
|
|
632
560
|
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
try {
|
|
636
|
-
const body = await req.json();
|
|
637
|
-
const { sessionIds } = body as { sessionIds: string[] };
|
|
561
|
+
// Import analyzer dynamically
|
|
562
|
+
const { analyzeSession } = await import("./analysis/analyzer.js");
|
|
638
563
|
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
{ status: 400 },
|
|
643
|
-
);
|
|
644
|
-
}
|
|
564
|
+
// Fetch agent config once for all sessions
|
|
565
|
+
const agentConfig = await fetchAgentConfig();
|
|
566
|
+
const model = agentConfig?.model || "unknown";
|
|
645
567
|
|
|
646
|
-
|
|
647
|
-
|
|
568
|
+
// Process in batches of 25
|
|
569
|
+
const BATCH_SIZE = 25;
|
|
570
|
+
const results: Array<{
|
|
571
|
+
session_id: string;
|
|
572
|
+
success: boolean;
|
|
573
|
+
error?: string;
|
|
574
|
+
}> = [];
|
|
648
575
|
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
576
|
+
const totalBatches = Math.ceil(sessionIds.length / BATCH_SIZE);
|
|
577
|
+
console.log(
|
|
578
|
+
`✨ Starting batch analysis of ${sessionIds.length} sessions (${totalBatches} batches)...`,
|
|
579
|
+
);
|
|
652
580
|
|
|
653
|
-
|
|
654
|
-
const
|
|
655
|
-
const
|
|
656
|
-
session_id: string;
|
|
657
|
-
success: boolean;
|
|
658
|
-
error?: string;
|
|
659
|
-
}> = [];
|
|
581
|
+
for (let i = 0; i < sessionIds.length; i += BATCH_SIZE) {
|
|
582
|
+
const batch = sessionIds.slice(i, i + BATCH_SIZE);
|
|
583
|
+
const batchNum = Math.floor(i / BATCH_SIZE) + 1;
|
|
660
584
|
|
|
661
|
-
const totalBatches = Math.ceil(sessionIds.length / BATCH_SIZE);
|
|
662
585
|
console.log(
|
|
663
|
-
|
|
586
|
+
`📊 Processing batch ${batchNum}/${totalBatches} (${batch.length} sessions)...`,
|
|
664
587
|
);
|
|
665
588
|
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
589
|
+
// Run batch in parallel
|
|
590
|
+
const batchResults = await Promise.allSettled(
|
|
591
|
+
batch.map(async (sessionId) => {
|
|
592
|
+
// Fetch session data
|
|
593
|
+
const sessionResponse = await fetch(
|
|
594
|
+
`${agentServerUrl}/sessions/${sessionId}`,
|
|
595
|
+
);
|
|
669
596
|
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
597
|
+
if (!sessionResponse.ok) {
|
|
598
|
+
throw new Error(`Failed to fetch session ${sessionId}`);
|
|
599
|
+
}
|
|
673
600
|
|
|
674
|
-
|
|
675
|
-
const batchResults = await Promise.allSettled(
|
|
676
|
-
batch.map(async (sessionId) => {
|
|
677
|
-
// Fetch session data
|
|
678
|
-
const sessionResponse = await fetch(
|
|
679
|
-
`${agentServerUrl}/sessions/${sessionId}`,
|
|
680
|
-
);
|
|
601
|
+
const sessionData = await sessionResponse.json();
|
|
681
602
|
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
603
|
+
// Fetch metrics from OTLP spans
|
|
604
|
+
const allSpans = db.getSpansBySessionAttribute(sessionId);
|
|
605
|
+
const traces = db.listTraces(100, 0, sessionId);
|
|
606
|
+
const sessionMetrics = extractSessionMetrics(
|
|
607
|
+
traces,
|
|
608
|
+
allSpans,
|
|
609
|
+
model,
|
|
610
|
+
);
|
|
685
611
|
|
|
686
|
-
|
|
612
|
+
// Convert to AnalysisMetrics format
|
|
613
|
+
const metrics = {
|
|
614
|
+
inputTokens: sessionMetrics.inputTokens,
|
|
615
|
+
outputTokens: sessionMetrics.outputTokens,
|
|
616
|
+
totalTokens: sessionMetrics.totalTokens,
|
|
617
|
+
estimatedCost: sessionMetrics.estimatedCost,
|
|
618
|
+
durationMs: sessionMetrics.durationMs,
|
|
619
|
+
};
|
|
620
|
+
|
|
621
|
+
// Convert tool calls to DetailedToolCall format
|
|
622
|
+
const toolCalls = (sessionMetrics.toolCalls || []).map(
|
|
623
|
+
(tc) => ({
|
|
624
|
+
name: tc.name,
|
|
625
|
+
input: tc.input,
|
|
626
|
+
output: tc.output,
|
|
627
|
+
startTimeUnixNano: tc.startTimeUnixNano,
|
|
628
|
+
endTimeUnixNano: tc.endTimeUnixNano,
|
|
629
|
+
}),
|
|
630
|
+
);
|
|
687
631
|
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
model,
|
|
695
|
-
);
|
|
632
|
+
// Analyze
|
|
633
|
+
const analysis = await analyzeSession({
|
|
634
|
+
session: sessionData,
|
|
635
|
+
metrics,
|
|
636
|
+
toolCalls,
|
|
637
|
+
});
|
|
696
638
|
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
output: tc.output,
|
|
712
|
-
startTimeUnixNano: tc.startTimeUnixNano,
|
|
713
|
-
endTimeUnixNano: tc.endTimeUnixNano,
|
|
714
|
-
}),
|
|
639
|
+
// Persist
|
|
640
|
+
analysisDb.saveAnalysis(analysis);
|
|
641
|
+
|
|
642
|
+
// Generate and save embedding
|
|
643
|
+
try {
|
|
644
|
+
const { embedAnalysis } = await import(
|
|
645
|
+
"./analysis/embeddings.js"
|
|
646
|
+
);
|
|
647
|
+
const embedding = await embedAnalysis(analysis);
|
|
648
|
+
await analysisDb.saveEmbedding(sessionId, embedding);
|
|
649
|
+
} catch (error) {
|
|
650
|
+
console.error(
|
|
651
|
+
`Failed to generate embedding for ${sessionId}:`,
|
|
652
|
+
error,
|
|
715
653
|
);
|
|
654
|
+
// Continue - batch processing continues
|
|
655
|
+
}
|
|
716
656
|
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
metrics,
|
|
721
|
-
toolCalls,
|
|
722
|
-
});
|
|
723
|
-
|
|
724
|
-
// Persist
|
|
725
|
-
analysisDb.saveAnalysis(analysis);
|
|
726
|
-
|
|
727
|
-
// Generate and save embedding
|
|
728
|
-
try {
|
|
729
|
-
const { embedAnalysis } = await import(
|
|
730
|
-
"./analysis/embeddings.js"
|
|
731
|
-
);
|
|
732
|
-
const embedding = await embedAnalysis(analysis);
|
|
733
|
-
await analysisDb.saveEmbedding(sessionId, embedding);
|
|
734
|
-
} catch (error) {
|
|
735
|
-
console.error(
|
|
736
|
-
`Failed to generate embedding for ${sessionId}:`,
|
|
737
|
-
error,
|
|
738
|
-
);
|
|
739
|
-
// Continue - batch processing continues
|
|
740
|
-
}
|
|
741
|
-
|
|
742
|
-
return { session_id: sessionId, success: true };
|
|
743
|
-
}),
|
|
744
|
-
);
|
|
657
|
+
return { session_id: sessionId, success: true };
|
|
658
|
+
}),
|
|
659
|
+
);
|
|
745
660
|
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
}
|
|
661
|
+
// Collect results
|
|
662
|
+
for (let j = 0; j < batchResults.length; j++) {
|
|
663
|
+
const result = batchResults[j];
|
|
664
|
+
const sessionId = batch[j];
|
|
665
|
+
if (!sessionId) continue;
|
|
666
|
+
|
|
667
|
+
if (result && result.status === "fulfilled") {
|
|
668
|
+
results.push(result.value);
|
|
669
|
+
} else if (result && result.status === "rejected") {
|
|
670
|
+
results.push({
|
|
671
|
+
session_id: sessionId,
|
|
672
|
+
success: false,
|
|
673
|
+
error:
|
|
674
|
+
result.reason instanceof Error
|
|
675
|
+
? result.reason.message
|
|
676
|
+
: String(result.reason || "Unknown error"),
|
|
677
|
+
});
|
|
764
678
|
}
|
|
765
|
-
|
|
766
|
-
const batchSuccesses = batchResults.filter(
|
|
767
|
-
(r) => r.status === "fulfilled",
|
|
768
|
-
).length;
|
|
769
|
-
const batchErrors = batchResults.filter(
|
|
770
|
-
(r) => r.status === "rejected",
|
|
771
|
-
).length;
|
|
772
|
-
console.log(
|
|
773
|
-
`✅ Batch ${batchNum}/${totalBatches} complete: ${batchSuccesses} successful, ${batchErrors} failed`,
|
|
774
|
-
);
|
|
775
679
|
}
|
|
776
680
|
|
|
777
|
-
const
|
|
778
|
-
|
|
681
|
+
const batchSuccesses = batchResults.filter(
|
|
682
|
+
(r) => r.status === "fulfilled",
|
|
683
|
+
).length;
|
|
684
|
+
const batchErrors = batchResults.filter(
|
|
685
|
+
(r) => r.status === "rejected",
|
|
686
|
+
).length;
|
|
779
687
|
console.log(
|
|
780
|
-
|
|
781
|
-
);
|
|
782
|
-
|
|
783
|
-
return Response.json({ results });
|
|
784
|
-
} catch (error) {
|
|
785
|
-
console.error("Batch analysis error:", error);
|
|
786
|
-
return Response.json(
|
|
787
|
-
{
|
|
788
|
-
error:
|
|
789
|
-
error instanceof Error ? error.message : "Analysis failed",
|
|
790
|
-
},
|
|
791
|
-
{ status: 500 },
|
|
688
|
+
`✅ Batch ${batchNum}/${totalBatches} complete: ${batchSuccesses} successful, ${batchErrors} failed`,
|
|
792
689
|
);
|
|
793
690
|
}
|
|
794
|
-
},
|
|
795
|
-
},
|
|
796
691
|
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
if (sessionId) {
|
|
804
|
-
// Get single analysis
|
|
805
|
-
const analysis = analysisDb.getAnalysis(sessionId);
|
|
806
|
-
if (!analysis) {
|
|
807
|
-
return Response.json(
|
|
808
|
-
{ error: "Analysis not found" },
|
|
809
|
-
{ status: 404 },
|
|
810
|
-
);
|
|
811
|
-
}
|
|
812
|
-
return Response.json(analysis);
|
|
813
|
-
}
|
|
814
|
-
|
|
815
|
-
// List all analyses
|
|
816
|
-
const limit = Number.parseInt(
|
|
817
|
-
url.searchParams.get("limit") || "50",
|
|
818
|
-
10,
|
|
819
|
-
);
|
|
820
|
-
const offset = Number.parseInt(
|
|
821
|
-
url.searchParams.get("offset") || "0",
|
|
822
|
-
10,
|
|
823
|
-
);
|
|
692
|
+
const totalSuccesses = results.filter((r) => r.success).length;
|
|
693
|
+
const totalErrors = results.filter((r) => !r.success).length;
|
|
694
|
+
console.log(
|
|
695
|
+
`🎉 Batch analysis complete: ${totalSuccesses} successful, ${totalErrors} failed`,
|
|
696
|
+
);
|
|
824
697
|
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
},
|
|
836
|
-
{ status: 500 },
|
|
837
|
-
);
|
|
838
|
-
}
|
|
839
|
-
},
|
|
698
|
+
return Response.json({ results });
|
|
699
|
+
} catch (error) {
|
|
700
|
+
console.error("Batch analysis error:", error);
|
|
701
|
+
return Response.json(
|
|
702
|
+
{
|
|
703
|
+
error: error instanceof Error ? error.message : "Analysis failed",
|
|
704
|
+
},
|
|
705
|
+
{ status: 500 },
|
|
706
|
+
);
|
|
707
|
+
}
|
|
840
708
|
},
|
|
709
|
+
},
|
|
841
710
|
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
const limit = Number.parseInt(
|
|
848
|
-
url.searchParams.get("limit") || "10",
|
|
849
|
-
10,
|
|
850
|
-
);
|
|
711
|
+
"/api/session-analyses": {
|
|
712
|
+
async GET(req) {
|
|
713
|
+
try {
|
|
714
|
+
const url = new URL(req.url);
|
|
715
|
+
const sessionId = url.searchParams.get("sessionId");
|
|
851
716
|
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
717
|
+
if (sessionId) {
|
|
718
|
+
// Get single analysis
|
|
719
|
+
const analysis = analysisDb.getAnalysis(sessionId);
|
|
720
|
+
if (!analysis) {
|
|
855
721
|
return Response.json(
|
|
856
|
-
{ error: "
|
|
722
|
+
{ error: "Analysis not found" },
|
|
857
723
|
{ status: 404 },
|
|
858
724
|
);
|
|
859
725
|
}
|
|
726
|
+
return Response.json(analysis);
|
|
727
|
+
}
|
|
860
728
|
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
729
|
+
// List all analyses
|
|
730
|
+
const limit = Number.parseInt(
|
|
731
|
+
url.searchParams.get("limit") || "50",
|
|
732
|
+
10,
|
|
733
|
+
);
|
|
734
|
+
const offset = Number.parseInt(
|
|
735
|
+
url.searchParams.get("offset") || "0",
|
|
736
|
+
10,
|
|
737
|
+
);
|
|
867
738
|
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
}
|
|
739
|
+
const analyses = analysisDb.listAnalyses(limit, offset);
|
|
740
|
+
return Response.json({ analyses });
|
|
741
|
+
} catch (error) {
|
|
742
|
+
console.error("Error retrieving analyses:", error);
|
|
743
|
+
return Response.json(
|
|
744
|
+
{
|
|
745
|
+
error:
|
|
746
|
+
error instanceof Error
|
|
747
|
+
? error.message
|
|
748
|
+
: "Failed to retrieve analyses",
|
|
749
|
+
},
|
|
750
|
+
{ status: 500 },
|
|
751
|
+
);
|
|
752
|
+
}
|
|
882
753
|
},
|
|
754
|
+
},
|
|
883
755
|
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
const
|
|
756
|
+
"/api/session-analyses/:sessionId/similar": {
|
|
757
|
+
async GET(req) {
|
|
758
|
+
try {
|
|
759
|
+
const sessionId = req.params.sessionId;
|
|
760
|
+
const url = new URL(req.url);
|
|
761
|
+
const limit = Number.parseInt(
|
|
762
|
+
url.searchParams.get("limit") || "10",
|
|
763
|
+
10,
|
|
764
|
+
);
|
|
888
765
|
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
766
|
+
// Get embedding for this session
|
|
767
|
+
const embedding = await analysisDb.getEmbedding(sessionId);
|
|
768
|
+
if (!embedding) {
|
|
769
|
+
return Response.json(
|
|
770
|
+
{ error: "No embedding found for this session" },
|
|
771
|
+
{ status: 404 },
|
|
893
772
|
);
|
|
773
|
+
}
|
|
894
774
|
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
775
|
+
// Search for similar sessions
|
|
776
|
+
const similar = (
|
|
777
|
+
await analysisDb.searchSimilarSessions(embedding, limit + 1)
|
|
778
|
+
)
|
|
779
|
+
.filter((s) => s.session_id !== sessionId)
|
|
780
|
+
.slice(0, limit);
|
|
781
|
+
|
|
782
|
+
return Response.json({ similar });
|
|
783
|
+
} catch (error) {
|
|
784
|
+
console.error("Error finding similar sessions:", error);
|
|
785
|
+
return Response.json(
|
|
786
|
+
{
|
|
787
|
+
error:
|
|
788
|
+
error instanceof Error
|
|
789
|
+
? error.message
|
|
790
|
+
: "Failed to find similar sessions",
|
|
791
|
+
},
|
|
792
|
+
{ status: 500 },
|
|
793
|
+
);
|
|
794
|
+
}
|
|
795
|
+
},
|
|
796
|
+
},
|
|
903
797
|
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
{ error: "Comparison config not found" },
|
|
909
|
-
{ status: 404 },
|
|
910
|
-
);
|
|
911
|
-
}
|
|
798
|
+
// Comparison analysis endpoints
|
|
799
|
+
"/api/analyze-comparison/:runId": {
|
|
800
|
+
async POST(req) {
|
|
801
|
+
const runId = req.params.runId;
|
|
912
802
|
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
);
|
|
919
|
-
}
|
|
803
|
+
try {
|
|
804
|
+
// Import analyzer dynamically
|
|
805
|
+
const { analyzeComparison } = await import(
|
|
806
|
+
"./analysis/comparison-analyzer.js"
|
|
807
|
+
);
|
|
920
808
|
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
809
|
+
// Get the comparison run
|
|
810
|
+
const run = comparisonDb.getRun(runId);
|
|
811
|
+
if (!run) {
|
|
812
|
+
return Response.json(
|
|
813
|
+
{ error: "Comparison run not found" },
|
|
814
|
+
{ status: 404 },
|
|
815
|
+
);
|
|
816
|
+
}
|
|
927
817
|
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
818
|
+
// Get the comparison config
|
|
819
|
+
const config = comparisonDb.getConfig(run.configId);
|
|
820
|
+
if (!config) {
|
|
821
|
+
return Response.json(
|
|
822
|
+
{ error: "Comparison config not found" },
|
|
823
|
+
{ status: 404 },
|
|
824
|
+
);
|
|
825
|
+
}
|
|
934
826
|
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
// Get agent config for original tools and system prompt
|
|
943
|
-
const agentConfig = await fetchAgentConfig();
|
|
944
|
-
|
|
945
|
-
// Get metrics for each session
|
|
946
|
-
const getMetrics = (sessionId: string) => {
|
|
947
|
-
const spans = db.getSpansBySessionAttribute(sessionId);
|
|
948
|
-
const traces = db.listTraces(100, 0, sessionId);
|
|
949
|
-
return extractSessionMetrics(
|
|
950
|
-
traces,
|
|
951
|
-
spans,
|
|
952
|
-
agentConfig?.model || "unknown",
|
|
953
|
-
);
|
|
954
|
-
};
|
|
955
|
-
|
|
956
|
-
const originalMetrics = getMetrics(run.sourceSessionId);
|
|
957
|
-
const controlMetrics = getMetrics(run.controlSessionId);
|
|
958
|
-
const variantMetrics = getMetrics(run.variantSessionId);
|
|
959
|
-
|
|
960
|
-
// Run the comparison analysis
|
|
961
|
-
const analysis = await analyzeComparison({
|
|
962
|
-
runId,
|
|
963
|
-
hypothesis: config.hypothesis || "",
|
|
964
|
-
config,
|
|
965
|
-
originalSession,
|
|
966
|
-
controlSession,
|
|
967
|
-
variantSession,
|
|
968
|
-
originalMetrics,
|
|
969
|
-
controlMetrics,
|
|
970
|
-
variantMetrics,
|
|
971
|
-
originalSystemPrompt: agentConfig?.systemPrompt || undefined,
|
|
972
|
-
originalTools: agentConfig?.tools?.map((t) => t.name) || [],
|
|
973
|
-
});
|
|
974
|
-
|
|
975
|
-
// Save to database
|
|
976
|
-
comparisonDb.saveComparisonAnalysis(runId, analysis);
|
|
827
|
+
// Verify all sessions exist
|
|
828
|
+
if (!run.controlSessionId || !run.variantSessionId) {
|
|
829
|
+
return Response.json(
|
|
830
|
+
{ error: "Comparison run is incomplete - missing session IDs" },
|
|
831
|
+
{ status: 400 },
|
|
832
|
+
);
|
|
833
|
+
}
|
|
977
834
|
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
835
|
+
// Fetch all three sessions from agent server
|
|
836
|
+
const [originalRes, controlRes, variantRes] = await Promise.all([
|
|
837
|
+
fetch(`${agentServerUrl}/sessions/${run.sourceSessionId}`),
|
|
838
|
+
fetch(`${agentServerUrl}/sessions/${run.controlSessionId}`),
|
|
839
|
+
fetch(`${agentServerUrl}/sessions/${run.variantSessionId}`),
|
|
840
|
+
]);
|
|
841
|
+
|
|
842
|
+
if (!originalRes.ok || !controlRes.ok || !variantRes.ok) {
|
|
981
843
|
return Response.json(
|
|
982
|
-
{
|
|
983
|
-
error:
|
|
984
|
-
error instanceof Error
|
|
985
|
-
? error.message
|
|
986
|
-
: "Comparison analysis failed",
|
|
987
|
-
},
|
|
844
|
+
{ error: "Failed to fetch one or more sessions" },
|
|
988
845
|
{ status: 500 },
|
|
989
846
|
);
|
|
990
847
|
}
|
|
991
|
-
},
|
|
992
|
-
},
|
|
993
848
|
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
|
|
998
|
-
|
|
849
|
+
const [originalSession, controlSession, variantSession] =
|
|
850
|
+
await Promise.all([
|
|
851
|
+
originalRes.json(),
|
|
852
|
+
controlRes.json(),
|
|
853
|
+
variantRes.json(),
|
|
854
|
+
]);
|
|
999
855
|
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
{ error: "Comparison analysis not found" },
|
|
1003
|
-
{ status: 404 },
|
|
1004
|
-
);
|
|
1005
|
-
}
|
|
856
|
+
// Get agent config for original tools and system prompt
|
|
857
|
+
const agentConfig = await fetchAgentConfig();
|
|
1006
858
|
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
859
|
+
// Get metrics for each session
|
|
860
|
+
const getMetrics = (sessionId: string) => {
|
|
861
|
+
const spans = db.getSpansBySessionAttribute(sessionId);
|
|
862
|
+
const traces = db.listTraces(100, 0, sessionId);
|
|
863
|
+
return extractSessionMetrics(
|
|
864
|
+
traces,
|
|
865
|
+
spans,
|
|
866
|
+
agentConfig?.model || "unknown",
|
|
867
|
+
);
|
|
868
|
+
};
|
|
869
|
+
|
|
870
|
+
const originalMetrics = getMetrics(run.sourceSessionId);
|
|
871
|
+
const controlMetrics = getMetrics(run.controlSessionId);
|
|
872
|
+
const variantMetrics = getMetrics(run.variantSessionId);
|
|
873
|
+
|
|
874
|
+
// Run the comparison analysis
|
|
875
|
+
const analysis = await analyzeComparison({
|
|
876
|
+
runId,
|
|
877
|
+
hypothesis: config.hypothesis || "",
|
|
878
|
+
config,
|
|
879
|
+
originalSession,
|
|
880
|
+
controlSession,
|
|
881
|
+
variantSession,
|
|
882
|
+
originalMetrics,
|
|
883
|
+
controlMetrics,
|
|
884
|
+
variantMetrics,
|
|
885
|
+
originalSystemPrompt: agentConfig?.systemPrompt || undefined,
|
|
886
|
+
originalTools: agentConfig?.tools?.map((t) => t.name) || [],
|
|
887
|
+
});
|
|
888
|
+
|
|
889
|
+
// Save to database
|
|
890
|
+
comparisonDb.saveComparisonAnalysis(runId, analysis);
|
|
891
|
+
|
|
892
|
+
return Response.json(analysis);
|
|
893
|
+
} catch (error) {
|
|
894
|
+
console.error("Comparison analysis error:", error);
|
|
895
|
+
return Response.json(
|
|
896
|
+
{
|
|
897
|
+
error:
|
|
898
|
+
error instanceof Error
|
|
899
|
+
? error.message
|
|
900
|
+
: "Comparison analysis failed",
|
|
901
|
+
},
|
|
902
|
+
{ status: 500 },
|
|
903
|
+
);
|
|
904
|
+
}
|
|
905
|
+
},
|
|
906
|
+
},
|
|
907
|
+
|
|
908
|
+
"/api/comparison-analysis/:runId": {
|
|
909
|
+
async GET(req) {
|
|
910
|
+
try {
|
|
911
|
+
const runId = req.params.runId;
|
|
912
|
+
const analysis = comparisonDb.getComparisonAnalysis(runId);
|
|
913
|
+
|
|
914
|
+
if (!analysis) {
|
|
1010
915
|
return Response.json(
|
|
1011
|
-
{
|
|
1012
|
-
|
|
1013
|
-
error instanceof Error
|
|
1014
|
-
? error.message
|
|
1015
|
-
: "Failed to fetch comparison analysis",
|
|
1016
|
-
},
|
|
1017
|
-
{ status: 500 },
|
|
916
|
+
{ error: "Comparison analysis not found" },
|
|
917
|
+
{ status: 404 },
|
|
1018
918
|
);
|
|
1019
919
|
}
|
|
1020
|
-
|
|
920
|
+
|
|
921
|
+
return Response.json(analysis);
|
|
922
|
+
} catch (error) {
|
|
923
|
+
console.error("Error fetching comparison analysis:", error);
|
|
924
|
+
return Response.json(
|
|
925
|
+
{
|
|
926
|
+
error:
|
|
927
|
+
error instanceof Error
|
|
928
|
+
? error.message
|
|
929
|
+
: "Failed to fetch comparison analysis",
|
|
930
|
+
},
|
|
931
|
+
{ status: 500 },
|
|
932
|
+
);
|
|
933
|
+
}
|
|
1021
934
|
},
|
|
935
|
+
},
|
|
1022
936
|
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
},
|
|
937
|
+
"/api/comparison-analysis/:runId/exists": {
|
|
938
|
+
async GET(req) {
|
|
939
|
+
try {
|
|
940
|
+
const runId = req.params.runId;
|
|
941
|
+
const exists = comparisonDb.hasComparisonAnalysis(runId);
|
|
942
|
+
return Response.json({ exists });
|
|
943
|
+
} catch (_error) {
|
|
944
|
+
return Response.json({ exists: false });
|
|
945
|
+
}
|
|
1033
946
|
},
|
|
947
|
+
},
|
|
948
|
+
});
|
|
949
|
+
}
|
|
950
|
+
|
|
951
|
+
// Enables TypeScript to infer route path parameters. This provides the same
|
|
952
|
+
// type inference that Bun.serve() uses internally.
|
|
953
|
+
function defineRoutes<R extends string>(routes: Serve.Routes<undefined, R>) {
|
|
954
|
+
return routes;
|
|
955
|
+
}
|
|
956
|
+
|
|
957
|
+
export interface DebuggerServerOptions {
|
|
958
|
+
port?: number;
|
|
959
|
+
otlpPort?: number;
|
|
960
|
+
dbPath: string;
|
|
961
|
+
agentName?: string;
|
|
962
|
+
agentServerUrl?: string;
|
|
963
|
+
}
|
|
964
|
+
|
|
965
|
+
export interface DebuggerServerResult {
|
|
966
|
+
server: ReturnType<typeof serve>;
|
|
967
|
+
otlpServer: ReturnType<typeof serve>;
|
|
968
|
+
stop: () => void;
|
|
969
|
+
}
|
|
1034
970
|
|
|
971
|
+
export function startDebuggerServer(
|
|
972
|
+
options: DebuggerServerOptions,
|
|
973
|
+
): DebuggerServerResult {
|
|
974
|
+
const {
|
|
975
|
+
port = DEFAULT_DEBUGGER_PORT,
|
|
976
|
+
otlpPort = DEFAULT_OTLP_PORT,
|
|
977
|
+
dbPath,
|
|
978
|
+
agentName = "Agent",
|
|
979
|
+
agentServerUrl = "http://localhost:3100",
|
|
980
|
+
} = options;
|
|
981
|
+
|
|
982
|
+
// Start OTLP server (initializes database internally)
|
|
983
|
+
const otlpApp = createOtlpServer({ dbPath });
|
|
984
|
+
const otlpServer = serve({
|
|
985
|
+
fetch: otlpApp.fetch,
|
|
986
|
+
hostname: Bun.env.BIND_HOST || "localhost",
|
|
987
|
+
port: otlpPort,
|
|
988
|
+
});
|
|
989
|
+
|
|
990
|
+
// Create routes using the factory function
|
|
991
|
+
const routes = createDebuggerRoutes({
|
|
992
|
+
dbPath,
|
|
993
|
+
agentName,
|
|
994
|
+
agentServerUrl,
|
|
995
|
+
});
|
|
996
|
+
|
|
997
|
+
// Start debugger UI server
|
|
998
|
+
const server = serve({
|
|
999
|
+
port,
|
|
1000
|
+
idleTimeout: 120, // 2 minutes for long-running LLM analysis requests
|
|
1001
|
+
routes: {
|
|
1002
|
+
...routes,
|
|
1035
1003
|
// Serve index.html for all unmatched routes (SPA routing)
|
|
1036
1004
|
"/*": index,
|
|
1037
1005
|
},
|
|
@@ -1056,3 +1024,54 @@ export function startDebuggerServer(
|
|
|
1056
1024
|
|
|
1057
1025
|
return { server, otlpServer, stop };
|
|
1058
1026
|
}
|
|
1027
|
+
|
|
1028
|
+
// Helper to fetch agent config from an agent server
|
|
1029
|
+
async function fetchAgentConfigFromServer(
|
|
1030
|
+
agentServerUrl: string,
|
|
1031
|
+
): Promise<AgentConfig | null> {
|
|
1032
|
+
try {
|
|
1033
|
+
// Call agent's initialize RPC to get config
|
|
1034
|
+
const response = await fetch(`${agentServerUrl}/rpc`, {
|
|
1035
|
+
method: "POST",
|
|
1036
|
+
headers: { "Content-Type": "application/json" },
|
|
1037
|
+
body: JSON.stringify({
|
|
1038
|
+
jsonrpc: "2.0",
|
|
1039
|
+
id: "debugger-config",
|
|
1040
|
+
method: "initialize",
|
|
1041
|
+
params: {
|
|
1042
|
+
protocolVersion: 1, // ACP protocol version as number
|
|
1043
|
+
clientCapabilities: {},
|
|
1044
|
+
},
|
|
1045
|
+
}),
|
|
1046
|
+
});
|
|
1047
|
+
|
|
1048
|
+
if (!response.ok) {
|
|
1049
|
+
console.error("Failed to fetch agent config:", response.statusText);
|
|
1050
|
+
return null;
|
|
1051
|
+
}
|
|
1052
|
+
|
|
1053
|
+
const data = await response.json();
|
|
1054
|
+
|
|
1055
|
+
// Check for JSON-RPC error
|
|
1056
|
+
if (data.error) {
|
|
1057
|
+
console.error("Agent RPC error:", data.error);
|
|
1058
|
+
return null;
|
|
1059
|
+
}
|
|
1060
|
+
|
|
1061
|
+
const result = data.result;
|
|
1062
|
+
if (!result) {
|
|
1063
|
+
console.error("No result in agent response");
|
|
1064
|
+
return null;
|
|
1065
|
+
}
|
|
1066
|
+
|
|
1067
|
+
// Extract config from initialize response
|
|
1068
|
+
return {
|
|
1069
|
+
model: result._meta?.model || "unknown",
|
|
1070
|
+
systemPrompt: result._meta?.systemPrompt || null,
|
|
1071
|
+
tools: result._meta?.tools || [],
|
|
1072
|
+
};
|
|
1073
|
+
} catch (error) {
|
|
1074
|
+
console.error("Error fetching agent config:", error);
|
|
1075
|
+
return null;
|
|
1076
|
+
}
|
|
1077
|
+
}
|