@townco/debugger 0.1.28 → 0.1.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +7 -4
- package/src/App.tsx +6 -0
- package/src/analysis/analyzer.ts +272 -0
- package/src/analysis/embeddings.ts +97 -0
- package/src/analysis/schema.ts +91 -0
- package/src/analysis/types.ts +157 -0
- package/src/analysis-db.ts +238 -0
- package/src/comparison-db.test.ts +28 -5
- package/src/comparison-db.ts +57 -9
- package/src/components/AnalyzeAllButton.tsx +81 -0
- package/src/components/DebuggerHeader.tsx +12 -0
- package/src/components/SessionAnalysisButton.tsx +109 -0
- package/src/components/SessionAnalysisDialog.tsx +240 -0
- package/src/components/UnifiedTimeline.tsx +3 -3
- package/src/components/ui/dialog.tsx +120 -0
- package/src/db.ts +3 -2
- package/src/lib/metrics.ts +131 -11
- package/src/pages/ComparisonView.tsx +618 -177
- package/src/pages/FindSessions.tsx +247 -0
- package/src/pages/SessionList.tsx +76 -10
- package/src/pages/SessionView.tsx +33 -1
- package/src/pages/TownHall.tsx +345 -187
- package/src/schemas.ts +27 -8
- package/src/server.ts +423 -3
- package/src/types.ts +11 -2
package/src/schemas.ts
CHANGED
|
@@ -1,15 +1,34 @@
|
|
|
1
1
|
import { z } from "zod";
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
const numberWithDefault = z
|
|
4
|
+
.number()
|
|
5
|
+
.optional()
|
|
6
|
+
.transform((val) => val ?? 0);
|
|
4
7
|
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
toolCallCount: z.number(),
|
|
8
|
+
const ToolCallSchema = z.object({
|
|
9
|
+
name: z.string(),
|
|
10
|
+
input: z.unknown(),
|
|
11
|
+
output: z.unknown(),
|
|
12
|
+
startTimeUnixNano: z.number().optional(),
|
|
13
|
+
endTimeUnixNano: z.number().optional(),
|
|
12
14
|
});
|
|
13
15
|
|
|
16
|
+
export const VariantToolsSchema = z.array(z.string());
|
|
17
|
+
|
|
18
|
+
export const SessionMetricsSchema = z
|
|
19
|
+
.object({
|
|
20
|
+
durationMs: numberWithDefault,
|
|
21
|
+
inputTokens: numberWithDefault,
|
|
22
|
+
outputTokens: numberWithDefault,
|
|
23
|
+
totalTokens: numberWithDefault,
|
|
24
|
+
estimatedCost: z.number().catch(0),
|
|
25
|
+
toolCallCount: numberWithDefault,
|
|
26
|
+
toolCalls: z.array(ToolCallSchema).optional().default([]),
|
|
27
|
+
})
|
|
28
|
+
.transform((metrics) => ({
|
|
29
|
+
...metrics,
|
|
30
|
+
toolCalls: metrics.toolCalls ?? [],
|
|
31
|
+
}));
|
|
32
|
+
|
|
14
33
|
export type VariantTools = z.infer<typeof VariantToolsSchema>;
|
|
15
34
|
export type SessionMetrics = z.infer<typeof SessionMetricsSchema>;
|
package/src/server.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { resetDb } from "@townco/otlp-server/db";
|
|
2
2
|
import { createOtlpServer } from "@townco/otlp-server/http";
|
|
3
3
|
import { serve } from "bun";
|
|
4
|
+
import { AnalysisDb } from "./analysis-db";
|
|
4
5
|
import { ComparisonDb } from "./comparison-db";
|
|
5
6
|
import { DebuggerDb } from "./db";
|
|
6
7
|
import index from "./index.html";
|
|
@@ -10,6 +11,7 @@ import type {
|
|
|
10
11
|
AgentConfig,
|
|
11
12
|
ComparisonConfig,
|
|
12
13
|
ConversationTrace,
|
|
14
|
+
SessionMetrics,
|
|
13
15
|
Span,
|
|
14
16
|
} from "./types";
|
|
15
17
|
|
|
@@ -56,6 +58,9 @@ export function startDebuggerServer(
|
|
|
56
58
|
const comparisonDbPath = dbPath.replace(/\.db$/, "-comparison.db");
|
|
57
59
|
const comparisonDb = new ComparisonDb(comparisonDbPath);
|
|
58
60
|
|
|
61
|
+
// Create analysis database - uses main debugger database
|
|
62
|
+
const analysisDb = new AnalysisDb(dbPath);
|
|
63
|
+
|
|
59
64
|
// Helper to fetch agent config from agent server
|
|
60
65
|
async function fetchAgentConfig(): Promise<AgentConfig | null> {
|
|
61
66
|
try {
|
|
@@ -133,7 +138,9 @@ export function startDebuggerServer(
|
|
|
133
138
|
"/api/sessions": {
|
|
134
139
|
GET(req) {
|
|
135
140
|
const url = new URL(req.url);
|
|
136
|
-
const limit = Number.parseInt(
|
|
141
|
+
const limit = Number.parseInt(
|
|
142
|
+
url.searchParams.get("limit") || "1000",
|
|
143
|
+
);
|
|
137
144
|
const offset = Number.parseInt(url.searchParams.get("offset") || "0");
|
|
138
145
|
const sessions = db.listSessions(limit, offset);
|
|
139
146
|
return Response.json(sessions);
|
|
@@ -258,7 +265,7 @@ export function startDebuggerServer(
|
|
|
258
265
|
const body = await req.json();
|
|
259
266
|
const config: ComparisonConfig = {
|
|
260
267
|
id: body.id || crypto.randomUUID(),
|
|
261
|
-
|
|
268
|
+
dimensions: body.dimensions || [],
|
|
262
269
|
controlModel: body.controlModel,
|
|
263
270
|
variantModel: body.variantModel,
|
|
264
271
|
variantSystemPrompt: body.variantSystemPrompt,
|
|
@@ -269,6 +276,7 @@ export function startDebuggerServer(
|
|
|
269
276
|
comparisonDb.saveConfig(config);
|
|
270
277
|
return Response.json({ id: config.id });
|
|
271
278
|
} catch (error) {
|
|
279
|
+
console.error("Error saving comparison config:", error);
|
|
272
280
|
return Response.json(
|
|
273
281
|
{ error: "Invalid request body" },
|
|
274
282
|
{ status: 400 },
|
|
@@ -277,6 +285,20 @@ export function startDebuggerServer(
|
|
|
277
285
|
},
|
|
278
286
|
},
|
|
279
287
|
|
|
288
|
+
"/api/comparison-config/:configId": {
|
|
289
|
+
GET(req) {
|
|
290
|
+
const configId = req.params.configId;
|
|
291
|
+
const config = comparisonDb.getConfig(configId);
|
|
292
|
+
if (!config) {
|
|
293
|
+
return Response.json(
|
|
294
|
+
{ error: "Comparison config not found" },
|
|
295
|
+
{ status: 404 },
|
|
296
|
+
);
|
|
297
|
+
}
|
|
298
|
+
return Response.json(config);
|
|
299
|
+
},
|
|
300
|
+
},
|
|
301
|
+
|
|
280
302
|
"/api/comparison-session-ids": {
|
|
281
303
|
GET() {
|
|
282
304
|
const sessionIds = comparisonDb.getComparisonSessionIds();
|
|
@@ -311,7 +333,53 @@ export function startDebuggerServer(
|
|
|
311
333
|
{ status: 404 },
|
|
312
334
|
);
|
|
313
335
|
}
|
|
314
|
-
|
|
336
|
+
|
|
337
|
+
const config = comparisonDb.getConfig(run.configId);
|
|
338
|
+
const controlModel =
|
|
339
|
+
config?.controlModel ??
|
|
340
|
+
config?.variantModel ??
|
|
341
|
+
"claude-sonnet-4-5-20250929";
|
|
342
|
+
const variantModel =
|
|
343
|
+
config?.variantModel ??
|
|
344
|
+
config?.controlModel ??
|
|
345
|
+
"claude-sonnet-4-5-20250929";
|
|
346
|
+
|
|
347
|
+
const maybeRefreshMetrics = (
|
|
348
|
+
sessionId: string | null,
|
|
349
|
+
cached: SessionMetrics | null,
|
|
350
|
+
model: string,
|
|
351
|
+
): SessionMetrics | null => {
|
|
352
|
+
if (!sessionId) return cached;
|
|
353
|
+
const needsRefresh =
|
|
354
|
+
!cached ||
|
|
355
|
+
cached.totalTokens === 0 ||
|
|
356
|
+
cached.toolCallCount === 0 ||
|
|
357
|
+
!cached.toolCalls ||
|
|
358
|
+
cached.toolCalls.length === 0;
|
|
359
|
+
if (!needsRefresh) return cached;
|
|
360
|
+
|
|
361
|
+
const spans = db.getSpansBySessionAttribute(sessionId);
|
|
362
|
+
if (spans.length === 0) return cached;
|
|
363
|
+
const traces = db.listTraces(100, 0, sessionId);
|
|
364
|
+
return extractSessionMetrics(traces, spans, model);
|
|
365
|
+
};
|
|
366
|
+
|
|
367
|
+
const controlMetrics = maybeRefreshMetrics(
|
|
368
|
+
run.controlSessionId,
|
|
369
|
+
run.controlMetrics,
|
|
370
|
+
controlModel,
|
|
371
|
+
);
|
|
372
|
+
const variantMetrics = maybeRefreshMetrics(
|
|
373
|
+
run.variantSessionId,
|
|
374
|
+
run.variantMetrics,
|
|
375
|
+
variantModel,
|
|
376
|
+
);
|
|
377
|
+
|
|
378
|
+
return Response.json({
|
|
379
|
+
...run,
|
|
380
|
+
controlMetrics,
|
|
381
|
+
variantMetrics,
|
|
382
|
+
});
|
|
315
383
|
},
|
|
316
384
|
},
|
|
317
385
|
|
|
@@ -449,6 +517,351 @@ export function startDebuggerServer(
|
|
|
449
517
|
},
|
|
450
518
|
},
|
|
451
519
|
|
|
520
|
+
"/api/analyze-session/:sessionId": {
|
|
521
|
+
async POST(req) {
|
|
522
|
+
const sessionId = req.params.sessionId;
|
|
523
|
+
|
|
524
|
+
try {
|
|
525
|
+
// Import analyzer dynamically to avoid loading at startup
|
|
526
|
+
const { analyzeSession } = await import("./analysis/analyzer.js");
|
|
527
|
+
|
|
528
|
+
// Fetch session from agent server via ACP HTTP API
|
|
529
|
+
const sessionResponse = await fetch(
|
|
530
|
+
`${agentServerUrl}/sessions/${sessionId}`,
|
|
531
|
+
);
|
|
532
|
+
|
|
533
|
+
if (!sessionResponse.ok) {
|
|
534
|
+
if (sessionResponse.status === 404) {
|
|
535
|
+
return Response.json(
|
|
536
|
+
{ error: "Session not found" },
|
|
537
|
+
{ status: 404 },
|
|
538
|
+
);
|
|
539
|
+
}
|
|
540
|
+
throw new Error(
|
|
541
|
+
`Failed to fetch session: ${sessionResponse.statusText}`,
|
|
542
|
+
);
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
const sessionData = await sessionResponse.json();
|
|
546
|
+
|
|
547
|
+
// Fetch agent config to get model for cost calculation
|
|
548
|
+
const agentConfig = await fetchAgentConfig();
|
|
549
|
+
const model = agentConfig?.model || "unknown";
|
|
550
|
+
|
|
551
|
+
// Fetch metrics from OTLP spans
|
|
552
|
+
const allSpans = db.getSpansBySessionAttribute(sessionId);
|
|
553
|
+
const traces = db.listTraces(100, 0, sessionId);
|
|
554
|
+
const sessionMetrics = extractSessionMetrics(
|
|
555
|
+
traces,
|
|
556
|
+
allSpans,
|
|
557
|
+
model,
|
|
558
|
+
);
|
|
559
|
+
|
|
560
|
+
// Convert to AnalysisMetrics format
|
|
561
|
+
const metrics = {
|
|
562
|
+
inputTokens: sessionMetrics.inputTokens,
|
|
563
|
+
outputTokens: sessionMetrics.outputTokens,
|
|
564
|
+
totalTokens: sessionMetrics.totalTokens,
|
|
565
|
+
estimatedCost: sessionMetrics.estimatedCost,
|
|
566
|
+
durationMs: sessionMetrics.durationMs,
|
|
567
|
+
};
|
|
568
|
+
|
|
569
|
+
// Convert tool calls to DetailedToolCall format
|
|
570
|
+
const toolCalls = (sessionMetrics.toolCalls || []).map((tc) => ({
|
|
571
|
+
name: tc.name,
|
|
572
|
+
input: tc.input,
|
|
573
|
+
output: tc.output,
|
|
574
|
+
startTimeUnixNano: tc.startTimeUnixNano,
|
|
575
|
+
endTimeUnixNano: tc.endTimeUnixNano,
|
|
576
|
+
}));
|
|
577
|
+
|
|
578
|
+
// Analyze with LLM
|
|
579
|
+
const analysis = await analyzeSession({
|
|
580
|
+
session: sessionData,
|
|
581
|
+
metrics,
|
|
582
|
+
toolCalls,
|
|
583
|
+
});
|
|
584
|
+
|
|
585
|
+
// Persist to database
|
|
586
|
+
analysisDb.saveAnalysis(analysis);
|
|
587
|
+
|
|
588
|
+
// Generate and save embedding
|
|
589
|
+
try {
|
|
590
|
+
const { embedAnalysis } = await import(
|
|
591
|
+
"./analysis/embeddings.js"
|
|
592
|
+
);
|
|
593
|
+
const embedding = await embedAnalysis(analysis);
|
|
594
|
+
await analysisDb.saveEmbedding(analysis.session_id, embedding);
|
|
595
|
+
} catch (error) {
|
|
596
|
+
console.error(
|
|
597
|
+
`Failed to generate embedding for ${sessionId}:`,
|
|
598
|
+
error,
|
|
599
|
+
);
|
|
600
|
+
// Continue - don't fail entire analysis
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
return Response.json(analysis);
|
|
604
|
+
} catch (error) {
|
|
605
|
+
console.error("Session analysis error:", error);
|
|
606
|
+
return Response.json(
|
|
607
|
+
{
|
|
608
|
+
error:
|
|
609
|
+
error instanceof Error ? error.message : "Analysis failed",
|
|
610
|
+
},
|
|
611
|
+
{ status: 500 },
|
|
612
|
+
);
|
|
613
|
+
}
|
|
614
|
+
},
|
|
615
|
+
},
|
|
616
|
+
|
|
617
|
+
"/api/analyze-all-sessions": {
|
|
618
|
+
async POST(req) {
|
|
619
|
+
try {
|
|
620
|
+
const body = await req.json();
|
|
621
|
+
const { sessionIds } = body as { sessionIds: string[] };
|
|
622
|
+
|
|
623
|
+
if (!Array.isArray(sessionIds)) {
|
|
624
|
+
return Response.json(
|
|
625
|
+
{ error: "sessionIds must be an array" },
|
|
626
|
+
{ status: 400 },
|
|
627
|
+
);
|
|
628
|
+
}
|
|
629
|
+
|
|
630
|
+
// Import analyzer dynamically
|
|
631
|
+
const { analyzeSession } = await import("./analysis/analyzer.js");
|
|
632
|
+
|
|
633
|
+
// Fetch agent config once for all sessions
|
|
634
|
+
const agentConfig = await fetchAgentConfig();
|
|
635
|
+
const model = agentConfig?.model || "unknown";
|
|
636
|
+
|
|
637
|
+
// Process in batches of 25
|
|
638
|
+
const BATCH_SIZE = 25;
|
|
639
|
+
const results: Array<{
|
|
640
|
+
session_id: string;
|
|
641
|
+
success: boolean;
|
|
642
|
+
error?: string;
|
|
643
|
+
}> = [];
|
|
644
|
+
|
|
645
|
+
const totalBatches = Math.ceil(sessionIds.length / BATCH_SIZE);
|
|
646
|
+
console.log(
|
|
647
|
+
`✨ Starting batch analysis of ${sessionIds.length} sessions (${totalBatches} batches)...`,
|
|
648
|
+
);
|
|
649
|
+
|
|
650
|
+
for (let i = 0; i < sessionIds.length; i += BATCH_SIZE) {
|
|
651
|
+
const batch = sessionIds.slice(i, i + BATCH_SIZE);
|
|
652
|
+
const batchNum = Math.floor(i / BATCH_SIZE) + 1;
|
|
653
|
+
|
|
654
|
+
console.log(
|
|
655
|
+
`📊 Processing batch ${batchNum}/${totalBatches} (${batch.length} sessions)...`,
|
|
656
|
+
);
|
|
657
|
+
|
|
658
|
+
// Run batch in parallel
|
|
659
|
+
const batchResults = await Promise.allSettled(
|
|
660
|
+
batch.map(async (sessionId) => {
|
|
661
|
+
// Fetch session data
|
|
662
|
+
const sessionResponse = await fetch(
|
|
663
|
+
`${agentServerUrl}/sessions/${sessionId}`,
|
|
664
|
+
);
|
|
665
|
+
|
|
666
|
+
if (!sessionResponse.ok) {
|
|
667
|
+
throw new Error(`Failed to fetch session ${sessionId}`);
|
|
668
|
+
}
|
|
669
|
+
|
|
670
|
+
const sessionData = await sessionResponse.json();
|
|
671
|
+
|
|
672
|
+
// Fetch metrics from OTLP spans
|
|
673
|
+
const allSpans = db.getSpansBySessionAttribute(sessionId);
|
|
674
|
+
const traces = db.listTraces(100, 0, sessionId);
|
|
675
|
+
const sessionMetrics = extractSessionMetrics(
|
|
676
|
+
traces,
|
|
677
|
+
allSpans,
|
|
678
|
+
model,
|
|
679
|
+
);
|
|
680
|
+
|
|
681
|
+
// Convert to AnalysisMetrics format
|
|
682
|
+
const metrics = {
|
|
683
|
+
inputTokens: sessionMetrics.inputTokens,
|
|
684
|
+
outputTokens: sessionMetrics.outputTokens,
|
|
685
|
+
totalTokens: sessionMetrics.totalTokens,
|
|
686
|
+
estimatedCost: sessionMetrics.estimatedCost,
|
|
687
|
+
durationMs: sessionMetrics.durationMs,
|
|
688
|
+
};
|
|
689
|
+
|
|
690
|
+
// Convert tool calls to DetailedToolCall format
|
|
691
|
+
const toolCalls = (sessionMetrics.toolCalls || []).map(
|
|
692
|
+
(tc) => ({
|
|
693
|
+
name: tc.name,
|
|
694
|
+
input: tc.input,
|
|
695
|
+
output: tc.output,
|
|
696
|
+
startTimeUnixNano: tc.startTimeUnixNano,
|
|
697
|
+
endTimeUnixNano: tc.endTimeUnixNano,
|
|
698
|
+
}),
|
|
699
|
+
);
|
|
700
|
+
|
|
701
|
+
// Analyze
|
|
702
|
+
const analysis = await analyzeSession({
|
|
703
|
+
session: sessionData,
|
|
704
|
+
metrics,
|
|
705
|
+
toolCalls,
|
|
706
|
+
});
|
|
707
|
+
|
|
708
|
+
// Persist
|
|
709
|
+
analysisDb.saveAnalysis(analysis);
|
|
710
|
+
|
|
711
|
+
// Generate and save embedding
|
|
712
|
+
try {
|
|
713
|
+
const { embedAnalysis } = await import(
|
|
714
|
+
"./analysis/embeddings.js"
|
|
715
|
+
);
|
|
716
|
+
const embedding = await embedAnalysis(analysis);
|
|
717
|
+
await analysisDb.saveEmbedding(sessionId, embedding);
|
|
718
|
+
} catch (error) {
|
|
719
|
+
console.error(
|
|
720
|
+
`Failed to generate embedding for ${sessionId}:`,
|
|
721
|
+
error,
|
|
722
|
+
);
|
|
723
|
+
// Continue - batch processing continues
|
|
724
|
+
}
|
|
725
|
+
|
|
726
|
+
return { session_id: sessionId, success: true };
|
|
727
|
+
}),
|
|
728
|
+
);
|
|
729
|
+
|
|
730
|
+
// Collect results
|
|
731
|
+
for (let j = 0; j < batchResults.length; j++) {
|
|
732
|
+
const result = batchResults[j];
|
|
733
|
+
const sessionId = batch[j];
|
|
734
|
+
if (!sessionId) continue;
|
|
735
|
+
|
|
736
|
+
if (result && result.status === "fulfilled") {
|
|
737
|
+
results.push(result.value);
|
|
738
|
+
} else if (result && result.status === "rejected") {
|
|
739
|
+
results.push({
|
|
740
|
+
session_id: sessionId,
|
|
741
|
+
success: false,
|
|
742
|
+
error:
|
|
743
|
+
result.reason instanceof Error
|
|
744
|
+
? result.reason.message
|
|
745
|
+
: String(result.reason || "Unknown error"),
|
|
746
|
+
});
|
|
747
|
+
}
|
|
748
|
+
}
|
|
749
|
+
|
|
750
|
+
const batchSuccesses = batchResults.filter(
|
|
751
|
+
(r) => r.status === "fulfilled",
|
|
752
|
+
).length;
|
|
753
|
+
const batchErrors = batchResults.filter(
|
|
754
|
+
(r) => r.status === "rejected",
|
|
755
|
+
).length;
|
|
756
|
+
console.log(
|
|
757
|
+
`✅ Batch ${batchNum}/${totalBatches} complete: ${batchSuccesses} successful, ${batchErrors} failed`,
|
|
758
|
+
);
|
|
759
|
+
}
|
|
760
|
+
|
|
761
|
+
const totalSuccesses = results.filter((r) => r.success).length;
|
|
762
|
+
const totalErrors = results.filter((r) => !r.success).length;
|
|
763
|
+
console.log(
|
|
764
|
+
`🎉 Batch analysis complete: ${totalSuccesses} successful, ${totalErrors} failed`,
|
|
765
|
+
);
|
|
766
|
+
|
|
767
|
+
return Response.json({ results });
|
|
768
|
+
} catch (error) {
|
|
769
|
+
console.error("Batch analysis error:", error);
|
|
770
|
+
return Response.json(
|
|
771
|
+
{
|
|
772
|
+
error:
|
|
773
|
+
error instanceof Error ? error.message : "Analysis failed",
|
|
774
|
+
},
|
|
775
|
+
{ status: 500 },
|
|
776
|
+
);
|
|
777
|
+
}
|
|
778
|
+
},
|
|
779
|
+
},
|
|
780
|
+
|
|
781
|
+
"/api/session-analyses": {
|
|
782
|
+
async GET(req) {
|
|
783
|
+
try {
|
|
784
|
+
const url = new URL(req.url);
|
|
785
|
+
const sessionId = url.searchParams.get("sessionId");
|
|
786
|
+
|
|
787
|
+
if (sessionId) {
|
|
788
|
+
// Get single analysis
|
|
789
|
+
const analysis = analysisDb.getAnalysis(sessionId);
|
|
790
|
+
if (!analysis) {
|
|
791
|
+
return Response.json(
|
|
792
|
+
{ error: "Analysis not found" },
|
|
793
|
+
{ status: 404 },
|
|
794
|
+
);
|
|
795
|
+
}
|
|
796
|
+
return Response.json(analysis);
|
|
797
|
+
}
|
|
798
|
+
|
|
799
|
+
// List all analyses
|
|
800
|
+
const limit = Number.parseInt(
|
|
801
|
+
url.searchParams.get("limit") || "50",
|
|
802
|
+
);
|
|
803
|
+
const offset = Number.parseInt(
|
|
804
|
+
url.searchParams.get("offset") || "0",
|
|
805
|
+
);
|
|
806
|
+
|
|
807
|
+
const analyses = analysisDb.listAnalyses(limit, offset);
|
|
808
|
+
return Response.json({ analyses });
|
|
809
|
+
} catch (error) {
|
|
810
|
+
console.error("Error retrieving analyses:", error);
|
|
811
|
+
return Response.json(
|
|
812
|
+
{
|
|
813
|
+
error:
|
|
814
|
+
error instanceof Error
|
|
815
|
+
? error.message
|
|
816
|
+
: "Failed to retrieve analyses",
|
|
817
|
+
},
|
|
818
|
+
{ status: 500 },
|
|
819
|
+
);
|
|
820
|
+
}
|
|
821
|
+
},
|
|
822
|
+
},
|
|
823
|
+
|
|
824
|
+
"/api/session-analyses/:sessionId/similar": {
|
|
825
|
+
async GET(req) {
|
|
826
|
+
try {
|
|
827
|
+
const sessionId = req.params.sessionId;
|
|
828
|
+
const url = new URL(req.url);
|
|
829
|
+
const limit = Number.parseInt(
|
|
830
|
+
url.searchParams.get("limit") || "10",
|
|
831
|
+
);
|
|
832
|
+
|
|
833
|
+
// Get embedding for this session
|
|
834
|
+
const embedding = await analysisDb.getEmbedding(sessionId);
|
|
835
|
+
if (!embedding) {
|
|
836
|
+
return Response.json(
|
|
837
|
+
{ error: "No embedding found for this session" },
|
|
838
|
+
{ status: 404 },
|
|
839
|
+
);
|
|
840
|
+
}
|
|
841
|
+
|
|
842
|
+
// Search for similar sessions
|
|
843
|
+
const similar = (
|
|
844
|
+
await analysisDb.searchSimilarSessions(embedding, limit + 1)
|
|
845
|
+
)
|
|
846
|
+
.filter((s) => s.session_id !== sessionId)
|
|
847
|
+
.slice(0, limit);
|
|
848
|
+
|
|
849
|
+
return Response.json({ similar });
|
|
850
|
+
} catch (error) {
|
|
851
|
+
console.error("Error finding similar sessions:", error);
|
|
852
|
+
return Response.json(
|
|
853
|
+
{
|
|
854
|
+
error:
|
|
855
|
+
error instanceof Error
|
|
856
|
+
? error.message
|
|
857
|
+
: "Failed to find similar sessions",
|
|
858
|
+
},
|
|
859
|
+
{ status: 500 },
|
|
860
|
+
);
|
|
861
|
+
}
|
|
862
|
+
},
|
|
863
|
+
},
|
|
864
|
+
|
|
452
865
|
// Serve index.html for all unmatched routes (SPA routing)
|
|
453
866
|
"/*": index,
|
|
454
867
|
},
|
|
@@ -464,5 +877,12 @@ export function startDebuggerServer(
|
|
|
464
877
|
otlpServer.stop();
|
|
465
878
|
};
|
|
466
879
|
|
|
880
|
+
console.log(`🔍 Debugger UI: http://${server.hostname}:${server.port}`);
|
|
881
|
+
console.log(
|
|
882
|
+
`📊 OTLP endpoint: http://${otlpServer.hostname}:${otlpServer.port}`,
|
|
883
|
+
);
|
|
884
|
+
console.log(`📁 Database: ${dbPath}`);
|
|
885
|
+
console.log(`🤖 Agent server: ${agentServerUrl}`);
|
|
886
|
+
|
|
467
887
|
return { server, otlpServer, stop };
|
|
468
888
|
}
|
package/src/types.ts
CHANGED
|
@@ -85,7 +85,7 @@ export type ComparisonDimension = "model" | "system_prompt" | "tools";
|
|
|
85
85
|
|
|
86
86
|
export interface ComparisonConfig {
|
|
87
87
|
id: string;
|
|
88
|
-
|
|
88
|
+
dimensions: ComparisonDimension[]; // Now supports multiple dimensions
|
|
89
89
|
controlModel?: string | undefined; // Original model for comparison
|
|
90
90
|
variantModel?: string | undefined;
|
|
91
91
|
variantSystemPrompt?: string | undefined;
|
|
@@ -96,7 +96,7 @@ export interface ComparisonConfig {
|
|
|
96
96
|
|
|
97
97
|
export interface ComparisonConfigRow {
|
|
98
98
|
id: string;
|
|
99
|
-
|
|
99
|
+
dimensions: string; // JSON array of dimensions
|
|
100
100
|
control_model: string | null;
|
|
101
101
|
variant_model: string | null;
|
|
102
102
|
variant_system_prompt: string | null;
|
|
@@ -112,6 +112,15 @@ export interface SessionMetrics {
|
|
|
112
112
|
totalTokens: number;
|
|
113
113
|
estimatedCost: number;
|
|
114
114
|
toolCallCount: number;
|
|
115
|
+
toolCalls?: ToolCall[];
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
export interface ToolCall {
|
|
119
|
+
name: string;
|
|
120
|
+
input: unknown;
|
|
121
|
+
output: unknown;
|
|
122
|
+
startTimeUnixNano?: number | undefined;
|
|
123
|
+
endTimeUnixNano?: number | undefined;
|
|
115
124
|
}
|
|
116
125
|
|
|
117
126
|
export interface ComparisonRun {
|