@townco/debugger 0.1.28 → 0.1.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +7 -4
- package/src/App.tsx +6 -0
- package/src/analysis/analyzer.ts +272 -0
- package/src/analysis/embeddings.ts +97 -0
- package/src/analysis/schema.ts +91 -0
- package/src/analysis/types.ts +157 -0
- package/src/analysis-db.ts +238 -0
- package/src/comparison-db.test.ts +28 -5
- package/src/comparison-db.ts +57 -9
- package/src/components/AnalyzeAllButton.tsx +81 -0
- package/src/components/DebuggerHeader.tsx +12 -0
- package/src/components/SessionAnalysisButton.tsx +109 -0
- package/src/components/SessionAnalysisDialog.tsx +240 -0
- package/src/components/UnifiedTimeline.tsx +3 -3
- package/src/components/ui/dialog.tsx +120 -0
- package/src/db.ts +3 -2
- package/src/lib/metrics.ts +131 -11
- package/src/pages/ComparisonView.tsx +618 -177
- package/src/pages/FindSessions.tsx +247 -0
- package/src/pages/SessionList.tsx +76 -10
- package/src/pages/SessionView.tsx +33 -1
- package/src/pages/TownHall.tsx +345 -187
- package/src/schemas.ts +27 -8
- package/src/server.ts +423 -3
- package/src/types.ts +11 -2
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@townco/debugger",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.30",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"engines": {
|
|
6
6
|
"bun": ">=1.3.0"
|
|
@@ -15,24 +15,27 @@
|
|
|
15
15
|
"check": "tsc --noEmit"
|
|
16
16
|
},
|
|
17
17
|
"dependencies": {
|
|
18
|
+
"@anthropic-ai/sdk": "^0.70.0",
|
|
19
|
+
"@lancedb/lancedb": "^0.22.3",
|
|
18
20
|
"@radix-ui/react-dialog": "^1.1.15",
|
|
19
21
|
"@radix-ui/react-label": "^2.1.7",
|
|
20
22
|
"@radix-ui/react-select": "^2.2.6",
|
|
21
23
|
"@radix-ui/react-slot": "^1.2.3",
|
|
22
24
|
"@radix-ui/react-tabs": "^1.1.0",
|
|
23
|
-
"@townco/otlp-server": "0.1.
|
|
24
|
-
"@townco/ui": "0.1.
|
|
25
|
+
"@townco/otlp-server": "0.1.30",
|
|
26
|
+
"@townco/ui": "0.1.75",
|
|
25
27
|
"bun-plugin-tailwind": "^0.1.2",
|
|
26
28
|
"class-variance-authority": "^0.7.1",
|
|
27
29
|
"clsx": "^2.1.1",
|
|
28
30
|
"lucide-react": "^0.545.0",
|
|
31
|
+
"openai": "^4.77.3",
|
|
29
32
|
"react": "19.2.1",
|
|
30
33
|
"react-dom": "19.2.1",
|
|
31
34
|
"tailwind-merge": "^3.3.1",
|
|
32
35
|
"zod": "^4.1.13"
|
|
33
36
|
},
|
|
34
37
|
"devDependencies": {
|
|
35
|
-
"@townco/tsconfig": "0.1.
|
|
38
|
+
"@townco/tsconfig": "0.1.72",
|
|
36
39
|
"@types/bun": "latest",
|
|
37
40
|
"@types/react": "^19",
|
|
38
41
|
"@types/react-dom": "^19",
|
package/src/App.tsx
CHANGED
|
@@ -2,6 +2,7 @@ import { ThemeProvider } from "@townco/ui/gui";
|
|
|
2
2
|
import { Component, type ReactNode } from "react";
|
|
3
3
|
import "./index.css";
|
|
4
4
|
import { ComparisonView } from "./pages/ComparisonView";
|
|
5
|
+
import { FindSessions } from "./pages/FindSessions";
|
|
5
6
|
import { SessionList } from "./pages/SessionList";
|
|
6
7
|
import { SessionView } from "./pages/SessionView";
|
|
7
8
|
import { TownHall } from "./pages/TownHall";
|
|
@@ -108,6 +109,11 @@ function AppContent() {
|
|
|
108
109
|
return <TownHall />;
|
|
109
110
|
}
|
|
110
111
|
|
|
112
|
+
// Route: /find-sessions
|
|
113
|
+
if (pathname === "/find-sessions") {
|
|
114
|
+
return <FindSessions />;
|
|
115
|
+
}
|
|
116
|
+
|
|
111
117
|
// Default: Session list
|
|
112
118
|
return <SessionList />;
|
|
113
119
|
}
|
|
@@ -0,0 +1,272 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Session analyzer - analyzes agent sessions using Claude to extract high-level insights
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import Anthropic from "@anthropic-ai/sdk";
|
|
6
|
+
import { LLMAnalysisOutputSchema, SessionAnalysisSchema } from "./schema";
|
|
7
|
+
import type {
|
|
8
|
+
AnalysisMetrics,
|
|
9
|
+
DetailedToolCall,
|
|
10
|
+
LLMAnalysisOutput,
|
|
11
|
+
PreComputedFields,
|
|
12
|
+
SessionAnalysis,
|
|
13
|
+
} from "./types";
|
|
14
|
+
|
|
15
|
+
// Import session types from the agent package
|
|
16
|
+
type StoredSession = {
|
|
17
|
+
sessionId: string;
|
|
18
|
+
messages: SessionMessage[];
|
|
19
|
+
metadata: {
|
|
20
|
+
createdAt: string;
|
|
21
|
+
updatedAt: string;
|
|
22
|
+
agentName: string;
|
|
23
|
+
};
|
|
24
|
+
context: unknown[];
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
type SessionMessage = {
|
|
28
|
+
role: "user" | "assistant";
|
|
29
|
+
content: ContentBlock[];
|
|
30
|
+
timestamp: string;
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
type ContentBlock =
|
|
34
|
+
| { type: "text"; text: string }
|
|
35
|
+
| { type: "image"; [key: string]: unknown }
|
|
36
|
+
| {
|
|
37
|
+
type: "tool_call";
|
|
38
|
+
id: string;
|
|
39
|
+
title: string;
|
|
40
|
+
status: "pending" | "in_progress" | "completed" | "failed";
|
|
41
|
+
error?: string;
|
|
42
|
+
};
|
|
43
|
+
|
|
44
|
+
const anthropic = new Anthropic({
|
|
45
|
+
apiKey: process.env.ANTHROPIC_API_KEY,
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
const ANALYSIS_MODEL = "claude-sonnet-4-5-20250929";
|
|
49
|
+
|
|
50
|
+
const ANALYSIS_SYSTEM_PROMPT = `You are analyzing an AI agent session to provide structured insights.
|
|
51
|
+
|
|
52
|
+
Your task is to analyze the session transcript and generate a structured analysis.
|
|
53
|
+
|
|
54
|
+
You must respond with valid JSON matching this schema:
|
|
55
|
+
{
|
|
56
|
+
"task_summary": "Brief 1-2 sentence summary of what the user wanted",
|
|
57
|
+
"intent_type": "One of: RETRIEVE_INFORMATION, RESEARCH_TOPIC, ANALYZE_DATA, SUMMARIZE_CONTENT, COMPARE_OPTIONS, CODE_GENERATION, CODE_MODIFICATION, CODE_REVIEW, DEBUG_ISSUE, REFACTOR_CODE, FILE_MANIPULATION, CONTENT_SEARCH, PLANNING, TASK_EXECUTION, TROUBLESHOOTING, CONVERSATION, OTHER",
|
|
58
|
+
"high_level_plan": "One paragraph describing the strategy the agent used",
|
|
59
|
+
"status": "One of: SUCCESS, FAILURE, ABORTED, TIMEOUT, PARTIAL_SUCCESS",
|
|
60
|
+
"answer_type": "One of: FACTUAL_RESPONSE, STRUCTURED_DATA, SUMMARY, ANALYSIS, RECOMMENDATION, CODE, FILE_MODIFICATIONS, CLARIFYING_QUESTION, ACKNOWLEDGMENT, ERROR_MESSAGE, PARTIAL_COMPLETION, NO_RESPONSE, OTHER",
|
|
61
|
+
"assessment": "2-3 sentence explanation of why you chose the status and answer_type"
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
Guidelines:
|
|
65
|
+
- task_summary: Concise goal description in user's terms
|
|
66
|
+
- intent_type: Primary user intent (choose most specific match)
|
|
67
|
+
- high_level_plan: Describe the strategy/approach the agent followed (focus on HOW it tried to solve the task, not whether it succeeded)
|
|
68
|
+
- status: Evaluate if task was completed successfully
|
|
69
|
+
- answer_type: What form the final response took
|
|
70
|
+
- assessment: Explain your reasoning for the status and answer_type choices (focus on OUTCOME - did it succeed, what was delivered, any issues encountered)
|
|
71
|
+
|
|
72
|
+
Respond with ONLY the JSON object, no additional text.`;
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Extract pre-computed fields from session data
|
|
76
|
+
*/
|
|
77
|
+
function extractPreComputedFields(session: StoredSession): PreComputedFields {
|
|
78
|
+
// First user message
|
|
79
|
+
const firstUserMsg = session.messages.find(
|
|
80
|
+
(m: SessionMessage) => m.role === "user",
|
|
81
|
+
);
|
|
82
|
+
const textBlock = firstUserMsg?.content.find(
|
|
83
|
+
(c: ContentBlock) => c.type === "text",
|
|
84
|
+
);
|
|
85
|
+
const userQuery = textBlock && "text" in textBlock ? textBlock.text : "";
|
|
86
|
+
|
|
87
|
+
// Count assistant messages
|
|
88
|
+
const numSteps = session.messages.filter(
|
|
89
|
+
(m: SessionMessage) => m.role === "assistant",
|
|
90
|
+
).length;
|
|
91
|
+
|
|
92
|
+
// Extract tool calls and tool names
|
|
93
|
+
const toolCalls: string[] = [];
|
|
94
|
+
for (const msg of session.messages) {
|
|
95
|
+
for (const block of msg.content) {
|
|
96
|
+
if (block.type === "tool_call" && "title" in block) {
|
|
97
|
+
toolCalls.push(block.title);
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
return {
|
|
103
|
+
userQuery,
|
|
104
|
+
numSteps,
|
|
105
|
+
numToolCalls: toolCalls.length,
|
|
106
|
+
toolsUsed: [...new Set(toolCalls)], // Unique tool names
|
|
107
|
+
};
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
/**
|
|
111
|
+
* Format conversation transcript for LLM analysis
|
|
112
|
+
*/
|
|
113
|
+
function formatConversationTranscript(session: StoredSession): string {
|
|
114
|
+
let transcript = "";
|
|
115
|
+
|
|
116
|
+
for (const msg of session.messages) {
|
|
117
|
+
transcript += `\n## ${msg.role.toUpperCase()}\n`;
|
|
118
|
+
|
|
119
|
+
for (const block of msg.content) {
|
|
120
|
+
if (block.type === "text") {
|
|
121
|
+
transcript += block.text + "\n";
|
|
122
|
+
} else if (block.type === "tool_call") {
|
|
123
|
+
transcript += `[Tool: ${block.title}`;
|
|
124
|
+
if (block.status === "completed") {
|
|
125
|
+
transcript += " - completed]\n";
|
|
126
|
+
} else if (block.status === "failed") {
|
|
127
|
+
transcript += ` - failed: ${block.error}]\n`;
|
|
128
|
+
} else {
|
|
129
|
+
transcript += `]\n`;
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
return transcript;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
/**
|
|
139
|
+
* Build analysis prompt for the LLM
|
|
140
|
+
*/
|
|
141
|
+
function buildPrompt(
|
|
142
|
+
transcript: string,
|
|
143
|
+
preComputed: PreComputedFields,
|
|
144
|
+
): string {
|
|
145
|
+
return `Analyze this AI agent session and provide structured insights.
|
|
146
|
+
|
|
147
|
+
Session Metadata:
|
|
148
|
+
- First user message: ${preComputed.userQuery.slice(0, 200)}${preComputed.userQuery.length > 200 ? "..." : ""}
|
|
149
|
+
- Number of steps: ${preComputed.numSteps}
|
|
150
|
+
- Number of tool calls: ${preComputed.numToolCalls}
|
|
151
|
+
- Tools used: ${preComputed.toolsUsed.join(", ") || "none"}
|
|
152
|
+
|
|
153
|
+
Conversation Transcript:
|
|
154
|
+
${transcript}
|
|
155
|
+
|
|
156
|
+
Analyze the above session and respond with the required JSON structure.`;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
/**
|
|
160
|
+
* Extract JSON from potential markdown code blocks
|
|
161
|
+
*/
|
|
162
|
+
function extractJSON(text: string): string {
|
|
163
|
+
// Try to find JSON in markdown code block
|
|
164
|
+
const jsonMatch = text.match(/```(?:json)?\n([\s\S]*?)\n```/);
|
|
165
|
+
if (jsonMatch?.[1]) {
|
|
166
|
+
return jsonMatch[1];
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
// Otherwise return the whole text
|
|
170
|
+
return text.trim();
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
/**
|
|
174
|
+
* Options for session analysis
|
|
175
|
+
*/
|
|
176
|
+
export interface AnalyzeSessionOptions {
|
|
177
|
+
session: StoredSession;
|
|
178
|
+
metrics?: AnalysisMetrics;
|
|
179
|
+
toolCalls?: DetailedToolCall[];
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
/**
|
|
183
|
+
* Analyze a session using Claude
|
|
184
|
+
*/
|
|
185
|
+
export async function analyzeSession(
|
|
186
|
+
options: AnalyzeSessionOptions,
|
|
187
|
+
): Promise<SessionAnalysis> {
|
|
188
|
+
const { session, metrics, toolCalls } = options;
|
|
189
|
+
|
|
190
|
+
// 1. Pre-compute extractable fields
|
|
191
|
+
const preComputed = extractPreComputedFields(session);
|
|
192
|
+
|
|
193
|
+
// 2. Build conversation transcript for LLM
|
|
194
|
+
const transcript = formatConversationTranscript(session);
|
|
195
|
+
|
|
196
|
+
// 3. Build analysis prompt
|
|
197
|
+
const prompt = buildPrompt(transcript, preComputed);
|
|
198
|
+
|
|
199
|
+
// 4. Call Claude
|
|
200
|
+
const response = await anthropic.messages.create({
|
|
201
|
+
model: ANALYSIS_MODEL,
|
|
202
|
+
max_tokens: 4096,
|
|
203
|
+
temperature: 0,
|
|
204
|
+
system: ANALYSIS_SYSTEM_PROMPT,
|
|
205
|
+
messages: [{ role: "user", content: prompt }],
|
|
206
|
+
});
|
|
207
|
+
|
|
208
|
+
// 5. Parse and validate response
|
|
209
|
+
const content = response.content[0];
|
|
210
|
+
if (!content) {
|
|
211
|
+
throw new Error("No content in response");
|
|
212
|
+
}
|
|
213
|
+
if (content.type !== "text") {
|
|
214
|
+
throw new Error("Unexpected response format");
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
// Extract JSON from potential markdown code blocks
|
|
218
|
+
const jsonText = extractJSON(content.text);
|
|
219
|
+
const parsed = JSON.parse(jsonText);
|
|
220
|
+
const llmOutput = LLMAnalysisOutputSchema.parse(parsed);
|
|
221
|
+
|
|
222
|
+
// Use provided metrics or create defaults
|
|
223
|
+
const sessionDurationMs = metrics?.durationMs ?? calculateDurationMs(session);
|
|
224
|
+
const analysisMetrics: AnalysisMetrics = metrics ?? {
|
|
225
|
+
inputTokens: 0,
|
|
226
|
+
outputTokens: 0,
|
|
227
|
+
totalTokens: 0,
|
|
228
|
+
estimatedCost: 0,
|
|
229
|
+
durationMs: sessionDurationMs,
|
|
230
|
+
};
|
|
231
|
+
|
|
232
|
+
// Use provided tool calls or empty array
|
|
233
|
+
const detailedToolCalls: DetailedToolCall[] = toolCalls ?? [];
|
|
234
|
+
|
|
235
|
+
// 6. Combine pre-computed and LLM data
|
|
236
|
+
const analysis: SessionAnalysis = {
|
|
237
|
+
session_id: session.sessionId,
|
|
238
|
+
agent_name: session.metadata.agentName,
|
|
239
|
+
started_at: session.metadata.createdAt,
|
|
240
|
+
ended_at: session.metadata.updatedAt,
|
|
241
|
+
task: {
|
|
242
|
+
user_query: preComputed.userQuery,
|
|
243
|
+
task_summary: llmOutput.task_summary,
|
|
244
|
+
intent_type: llmOutput.intent_type,
|
|
245
|
+
},
|
|
246
|
+
trajectory: {
|
|
247
|
+
high_level_plan: llmOutput.high_level_plan,
|
|
248
|
+
tools_used: preComputed.toolsUsed,
|
|
249
|
+
num_steps: preComputed.numSteps,
|
|
250
|
+
num_tool_calls: preComputed.numToolCalls,
|
|
251
|
+
tool_calls: detailedToolCalls,
|
|
252
|
+
},
|
|
253
|
+
outcome: {
|
|
254
|
+
status: llmOutput.status,
|
|
255
|
+
answer_type: llmOutput.answer_type,
|
|
256
|
+
assessment: llmOutput.assessment,
|
|
257
|
+
},
|
|
258
|
+
metrics: analysisMetrics,
|
|
259
|
+
};
|
|
260
|
+
|
|
261
|
+
// 7. Validate final schema
|
|
262
|
+
return SessionAnalysisSchema.parse(analysis);
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
/**
|
|
266
|
+
* Calculate duration from session timestamps
|
|
267
|
+
*/
|
|
268
|
+
function calculateDurationMs(session: StoredSession): number {
|
|
269
|
+
const startTime = new Date(session.metadata.createdAt).getTime();
|
|
270
|
+
const endTime = new Date(session.metadata.updatedAt).getTime();
|
|
271
|
+
return endTime - startTime;
|
|
272
|
+
}
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Session Analysis Embeddings Module
|
|
3
|
+
*
|
|
4
|
+
* Generates embeddings for session analyses using OpenAI's text-embedding-3-small model.
|
|
5
|
+
* These embeddings enable semantic search and similarity analysis of sessions.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import OpenAI from "openai";
|
|
9
|
+
import type { SessionAnalysis } from "./types";
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Generates structured text from a SessionAnalysis for embedding.
|
|
13
|
+
* Format captures all key aspects of the session for semantic search.
|
|
14
|
+
*/
|
|
15
|
+
export function generateEmbeddingText(analysis: SessionAnalysis): string {
|
|
16
|
+
const toolsList = analysis.trajectory.tools_used.join(", ");
|
|
17
|
+
|
|
18
|
+
return `Agent: ${analysis.agent_name}
|
|
19
|
+
|
|
20
|
+
IntentType: ${analysis.task.intent_type}
|
|
21
|
+
AnswerType: ${analysis.outcome.answer_type}
|
|
22
|
+
OutcomeStatus: ${analysis.outcome.status}
|
|
23
|
+
|
|
24
|
+
UserQuery:
|
|
25
|
+
${analysis.task.user_query}
|
|
26
|
+
|
|
27
|
+
TaskSummary:
|
|
28
|
+
${analysis.task.task_summary}
|
|
29
|
+
|
|
30
|
+
HighLevelPlan:
|
|
31
|
+
${analysis.trajectory.high_level_plan}
|
|
32
|
+
|
|
33
|
+
ToolsUsed: ${toolsList}
|
|
34
|
+
NumSteps: ${analysis.trajectory.num_steps}
|
|
35
|
+
NumToolCalls: ${analysis.trajectory.num_tool_calls}
|
|
36
|
+
|
|
37
|
+
OutcomeAssessment:
|
|
38
|
+
${analysis.outcome.assessment}`;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Generates an embedding vector for the given text using OpenAI's API.
|
|
43
|
+
* Uses text-embedding-3-small model (1536 dimensions).
|
|
44
|
+
*
|
|
45
|
+
* @throws Error if API key is missing or API call fails
|
|
46
|
+
*/
|
|
47
|
+
export async function generateEmbedding(
|
|
48
|
+
text: string,
|
|
49
|
+
apiKey: string,
|
|
50
|
+
): Promise<Float32Array> {
|
|
51
|
+
if (!apiKey) {
|
|
52
|
+
throw new Error("OpenAI API key is required for embedding generation");
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
const openai = new OpenAI({ apiKey });
|
|
56
|
+
|
|
57
|
+
try {
|
|
58
|
+
const response = await openai.embeddings.create({
|
|
59
|
+
model: "text-embedding-3-small",
|
|
60
|
+
input: text,
|
|
61
|
+
encoding_format: "float",
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
const embedding = response.data[0]?.embedding;
|
|
65
|
+
if (!embedding) {
|
|
66
|
+
throw new Error("No embedding returned from OpenAI API");
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
return new Float32Array(embedding);
|
|
70
|
+
} catch (error) {
|
|
71
|
+
if (error instanceof Error) {
|
|
72
|
+
throw new Error(`OpenAI API error: ${error.message}`);
|
|
73
|
+
}
|
|
74
|
+
throw new Error("Unknown error generating embedding");
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Convenience wrapper that generates an embedding for a SessionAnalysis.
|
|
80
|
+
* Reads OPENAI_API_KEY from environment.
|
|
81
|
+
*
|
|
82
|
+
* @throws Error if OPENAI_API_KEY is not set or API call fails
|
|
83
|
+
*/
|
|
84
|
+
export async function embedAnalysis(
|
|
85
|
+
analysis: SessionAnalysis,
|
|
86
|
+
): Promise<Float32Array> {
|
|
87
|
+
const apiKey = process.env.OPENAI_API_KEY;
|
|
88
|
+
|
|
89
|
+
if (!apiKey) {
|
|
90
|
+
throw new Error(
|
|
91
|
+
"OPENAI_API_KEY environment variable not set. Embedding generation requires an OpenAI API key.",
|
|
92
|
+
);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
const text = generateEmbeddingText(analysis);
|
|
96
|
+
return generateEmbedding(text, apiKey);
|
|
97
|
+
}
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Zod schemas for session analysis validation
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { z } from "zod";
|
|
6
|
+
import { AnswerType, IntentType, OutcomeStatus } from "./types";
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Intent type schema
|
|
10
|
+
*/
|
|
11
|
+
export const IntentTypeSchema = z.nativeEnum(IntentType);
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Answer type schema
|
|
15
|
+
*/
|
|
16
|
+
export const AnswerTypeSchema = z.nativeEnum(AnswerType);
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Outcome status schema
|
|
20
|
+
*/
|
|
21
|
+
export const OutcomeStatusSchema = z.nativeEnum(OutcomeStatus);
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* LLM output schema - what we expect the LLM to generate
|
|
25
|
+
*/
|
|
26
|
+
export const LLMAnalysisOutputSchema = z.object({
|
|
27
|
+
task_summary: z
|
|
28
|
+
.string()
|
|
29
|
+
.describe("Brief 1-2 sentence summary of what the user wanted"),
|
|
30
|
+
intent_type: IntentTypeSchema.describe("Primary user intent"),
|
|
31
|
+
high_level_plan: z
|
|
32
|
+
.string()
|
|
33
|
+
.describe("One paragraph describing the strategy the agent used"),
|
|
34
|
+
status: OutcomeStatusSchema.describe(
|
|
35
|
+
"Whether the task was completed successfully",
|
|
36
|
+
),
|
|
37
|
+
answer_type: AnswerTypeSchema.describe("What form the final response took"),
|
|
38
|
+
assessment: z
|
|
39
|
+
.string()
|
|
40
|
+
.describe("Explanation of why the status and answer_type were chosen"),
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Detailed tool call schema
|
|
45
|
+
*/
|
|
46
|
+
export const DetailedToolCallSchema = z.object({
|
|
47
|
+
name: z.string(),
|
|
48
|
+
input: z.unknown(),
|
|
49
|
+
output: z.unknown(),
|
|
50
|
+
startTimeUnixNano: z.number().optional(),
|
|
51
|
+
endTimeUnixNano: z.number().optional(),
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Metrics schema
|
|
56
|
+
*/
|
|
57
|
+
export const AnalysisMetricsSchema = z.object({
|
|
58
|
+
inputTokens: z.number(),
|
|
59
|
+
outputTokens: z.number(),
|
|
60
|
+
totalTokens: z.number(),
|
|
61
|
+
estimatedCost: z.number(),
|
|
62
|
+
durationMs: z.number(),
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Complete session analysis schema
|
|
67
|
+
*/
|
|
68
|
+
export const SessionAnalysisSchema = z.object({
|
|
69
|
+
session_id: z.string(),
|
|
70
|
+
agent_name: z.string(),
|
|
71
|
+
started_at: z.string(),
|
|
72
|
+
ended_at: z.string(),
|
|
73
|
+
task: z.object({
|
|
74
|
+
user_query: z.string(),
|
|
75
|
+
task_summary: z.string(),
|
|
76
|
+
intent_type: IntentTypeSchema,
|
|
77
|
+
}),
|
|
78
|
+
trajectory: z.object({
|
|
79
|
+
high_level_plan: z.string(),
|
|
80
|
+
tools_used: z.array(z.string()),
|
|
81
|
+
num_steps: z.number(),
|
|
82
|
+
num_tool_calls: z.number(),
|
|
83
|
+
tool_calls: z.array(DetailedToolCallSchema),
|
|
84
|
+
}),
|
|
85
|
+
outcome: z.object({
|
|
86
|
+
status: OutcomeStatusSchema,
|
|
87
|
+
answer_type: AnswerTypeSchema,
|
|
88
|
+
assessment: z.string(),
|
|
89
|
+
}),
|
|
90
|
+
metrics: AnalysisMetricsSchema,
|
|
91
|
+
});
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Session Analysis Types
|
|
3
|
+
*
|
|
4
|
+
* This module defines the types for analyzing AI agent sessions to extract
|
|
5
|
+
* high-level insights about task execution and trajectory.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Intent types - What the user wanted to accomplish
|
|
10
|
+
*/
|
|
11
|
+
export enum IntentType {
|
|
12
|
+
// Information & Research
|
|
13
|
+
RETRIEVE_INFORMATION = "RETRIEVE_INFORMATION", // Lookup facts/data
|
|
14
|
+
RESEARCH_TOPIC = "RESEARCH_TOPIC", // Deep investigation
|
|
15
|
+
|
|
16
|
+
// Data & Analysis
|
|
17
|
+
ANALYZE_DATA = "ANALYZE_DATA", // Examine/interpret data
|
|
18
|
+
SUMMARIZE_CONTENT = "SUMMARIZE_CONTENT", // Condense information
|
|
19
|
+
COMPARE_OPTIONS = "COMPARE_OPTIONS", // Evaluate alternatives
|
|
20
|
+
|
|
21
|
+
// Code & Development
|
|
22
|
+
CODE_GENERATION = "CODE_GENERATION", // Write new code
|
|
23
|
+
CODE_MODIFICATION = "CODE_MODIFICATION", // Edit existing code
|
|
24
|
+
CODE_REVIEW = "CODE_REVIEW", // Analyze code quality
|
|
25
|
+
DEBUG_ISSUE = "DEBUG_ISSUE", // Fix bugs
|
|
26
|
+
REFACTOR_CODE = "REFACTOR_CODE", // Improve structure
|
|
27
|
+
|
|
28
|
+
// File Operations
|
|
29
|
+
FILE_MANIPULATION = "FILE_MANIPULATION", // Create/edit/move files
|
|
30
|
+
CONTENT_SEARCH = "CONTENT_SEARCH", // Find in files
|
|
31
|
+
|
|
32
|
+
// Task Management
|
|
33
|
+
PLANNING = "PLANNING", // Create task plans
|
|
34
|
+
TASK_EXECUTION = "TASK_EXECUTION", // Complete multi-step task
|
|
35
|
+
|
|
36
|
+
// System & Troubleshooting
|
|
37
|
+
TROUBLESHOOTING = "TROUBLESHOOTING", // Diagnose problems
|
|
38
|
+
|
|
39
|
+
// Meta
|
|
40
|
+
CONVERSATION = "CONVERSATION", // General chat
|
|
41
|
+
OTHER = "OTHER", // Fallback
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Answer types - What form the agent's response took
|
|
46
|
+
*/
|
|
47
|
+
export enum AnswerType {
|
|
48
|
+
// Direct responses
|
|
49
|
+
FACTUAL_RESPONSE = "FACTUAL_RESPONSE", // Direct facts/data
|
|
50
|
+
STRUCTURED_DATA = "STRUCTURED_DATA", // JSON/tables
|
|
51
|
+
SUMMARY = "SUMMARY", // Condensed information
|
|
52
|
+
ANALYSIS = "ANALYSIS", // Interpretation/insights
|
|
53
|
+
RECOMMENDATION = "RECOMMENDATION", // Suggested actions
|
|
54
|
+
|
|
55
|
+
// Artifacts
|
|
56
|
+
CODE = "CODE", // Code snippets/files
|
|
57
|
+
FILE_MODIFICATIONS = "FILE_MODIFICATIONS", // Changed files
|
|
58
|
+
|
|
59
|
+
// Interactive
|
|
60
|
+
CLARIFYING_QUESTION = "CLARIFYING_QUESTION", // Needs more input
|
|
61
|
+
ACKNOWLEDGMENT = "ACKNOWLEDGMENT", // Confirmed action
|
|
62
|
+
|
|
63
|
+
// Status
|
|
64
|
+
ERROR_MESSAGE = "ERROR_MESSAGE", // Failed with error
|
|
65
|
+
PARTIAL_COMPLETION = "PARTIAL_COMPLETION", // Incomplete success
|
|
66
|
+
|
|
67
|
+
// Empty
|
|
68
|
+
NO_RESPONSE = "NO_RESPONSE", // Aborted/timeout
|
|
69
|
+
OTHER = "OTHER", // Fallback
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Outcome status - Whether the task was completed successfully
|
|
74
|
+
*/
|
|
75
|
+
export enum OutcomeStatus {
|
|
76
|
+
SUCCESS = "SUCCESS", // Completed successfully
|
|
77
|
+
FAILURE = "FAILURE", // Failed with error
|
|
78
|
+
ABORTED = "ABORTED", // User cancelled
|
|
79
|
+
TIMEOUT = "TIMEOUT", // Exceeded time limit
|
|
80
|
+
PARTIAL_SUCCESS = "PARTIAL_SUCCESS", // Some goals achieved
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Detailed tool call with input/output
|
|
85
|
+
*/
|
|
86
|
+
export interface DetailedToolCall {
|
|
87
|
+
name: string;
|
|
88
|
+
input: unknown;
|
|
89
|
+
output: unknown;
|
|
90
|
+
startTimeUnixNano?: number | undefined;
|
|
91
|
+
endTimeUnixNano?: number | undefined;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* Session metrics (tokens, cost, etc.)
|
|
96
|
+
*/
|
|
97
|
+
export interface AnalysisMetrics {
|
|
98
|
+
inputTokens: number;
|
|
99
|
+
outputTokens: number;
|
|
100
|
+
totalTokens: number;
|
|
101
|
+
estimatedCost: number;
|
|
102
|
+
durationMs: number;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* Complete session analysis result
|
|
107
|
+
*/
|
|
108
|
+
export interface SessionAnalysis {
|
|
109
|
+
session_id: string;
|
|
110
|
+
agent_name: string;
|
|
111
|
+
started_at: string; // ISO8601
|
|
112
|
+
ended_at: string; // ISO8601
|
|
113
|
+
|
|
114
|
+
task: {
|
|
115
|
+
user_query: string; // First user message text
|
|
116
|
+
task_summary: string; // LLM-generated summary
|
|
117
|
+
intent_type: IntentType; // LLM-selected from enum
|
|
118
|
+
};
|
|
119
|
+
|
|
120
|
+
trajectory: {
|
|
121
|
+
high_level_plan: string; // LLM-generated paragraph
|
|
122
|
+
tools_used: string[]; // Pre-computed from tool calls
|
|
123
|
+
num_steps: number; // Pre-computed (count assistant messages)
|
|
124
|
+
num_tool_calls: number; // Pre-computed
|
|
125
|
+
tool_calls: DetailedToolCall[]; // Detailed tool call info with args/results
|
|
126
|
+
};
|
|
127
|
+
|
|
128
|
+
outcome: {
|
|
129
|
+
status: OutcomeStatus; // LLM-selected from enum
|
|
130
|
+
answer_type: AnswerType; // LLM-selected from enum
|
|
131
|
+
assessment: string; // LLM-generated explanation of status and answer_type
|
|
132
|
+
};
|
|
133
|
+
|
|
134
|
+
metrics: AnalysisMetrics; // Token counts, cost, duration
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
/**
|
|
138
|
+
* LLM output schema (what the LLM generates)
|
|
139
|
+
*/
|
|
140
|
+
export interface LLMAnalysisOutput {
|
|
141
|
+
task_summary: string;
|
|
142
|
+
intent_type: IntentType;
|
|
143
|
+
high_level_plan: string;
|
|
144
|
+
status: OutcomeStatus;
|
|
145
|
+
answer_type: AnswerType;
|
|
146
|
+
assessment: string;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* Pre-computed fields (extracted from session data)
|
|
151
|
+
*/
|
|
152
|
+
export interface PreComputedFields {
|
|
153
|
+
userQuery: string;
|
|
154
|
+
numSteps: number;
|
|
155
|
+
numToolCalls: number;
|
|
156
|
+
toolsUsed: string[];
|
|
157
|
+
}
|