@townco/debugger 0.1.28 → 0.1.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@townco/debugger",
3
- "version": "0.1.28",
3
+ "version": "0.1.29",
4
4
  "type": "module",
5
5
  "engines": {
6
6
  "bun": ">=1.3.0"
@@ -15,24 +15,27 @@
15
15
  "check": "tsc --noEmit"
16
16
  },
17
17
  "dependencies": {
18
+ "@anthropic-ai/sdk": "^0.70.0",
19
+ "@lancedb/lancedb": "^0.22.3",
18
20
  "@radix-ui/react-dialog": "^1.1.15",
19
21
  "@radix-ui/react-label": "^2.1.7",
20
22
  "@radix-ui/react-select": "^2.2.6",
21
23
  "@radix-ui/react-slot": "^1.2.3",
22
24
  "@radix-ui/react-tabs": "^1.1.0",
23
- "@townco/otlp-server": "0.1.28",
24
- "@townco/ui": "0.1.73",
25
+ "@townco/otlp-server": "0.1.29",
26
+ "@townco/ui": "0.1.74",
25
27
  "bun-plugin-tailwind": "^0.1.2",
26
28
  "class-variance-authority": "^0.7.1",
27
29
  "clsx": "^2.1.1",
28
30
  "lucide-react": "^0.545.0",
31
+ "openai": "^4.77.3",
29
32
  "react": "19.2.1",
30
33
  "react-dom": "19.2.1",
31
34
  "tailwind-merge": "^3.3.1",
32
35
  "zod": "^4.1.13"
33
36
  },
34
37
  "devDependencies": {
35
- "@townco/tsconfig": "0.1.70",
38
+ "@townco/tsconfig": "0.1.71",
36
39
  "@types/bun": "latest",
37
40
  "@types/react": "^19",
38
41
  "@types/react-dom": "^19",
package/src/App.tsx CHANGED
@@ -2,6 +2,7 @@ import { ThemeProvider } from "@townco/ui/gui";
2
2
  import { Component, type ReactNode } from "react";
3
3
  import "./index.css";
4
4
  import { ComparisonView } from "./pages/ComparisonView";
5
+ import { FindSessions } from "./pages/FindSessions";
5
6
  import { SessionList } from "./pages/SessionList";
6
7
  import { SessionView } from "./pages/SessionView";
7
8
  import { TownHall } from "./pages/TownHall";
@@ -108,6 +109,11 @@ function AppContent() {
108
109
  return <TownHall />;
109
110
  }
110
111
 
112
+ // Route: /find-sessions
113
+ if (pathname === "/find-sessions") {
114
+ return <FindSessions />;
115
+ }
116
+
111
117
  // Default: Session list
112
118
  return <SessionList />;
113
119
  }
@@ -0,0 +1,235 @@
1
+ /**
2
+ * Session analyzer - analyzes agent sessions using Claude to extract high-level insights
3
+ */
4
+
5
+ import Anthropic from "@anthropic-ai/sdk";
6
+ import { LLMAnalysisOutputSchema, SessionAnalysisSchema } from "./schema";
7
+ import type {
8
+ LLMAnalysisOutput,
9
+ PreComputedFields,
10
+ SessionAnalysis,
11
+ } from "./types";
12
+
13
+ // Import session types from the agent package
14
+ type StoredSession = {
15
+ sessionId: string;
16
+ messages: SessionMessage[];
17
+ metadata: {
18
+ createdAt: string;
19
+ updatedAt: string;
20
+ agentName: string;
21
+ };
22
+ context: unknown[];
23
+ };
24
+
25
+ type SessionMessage = {
26
+ role: "user" | "assistant";
27
+ content: ContentBlock[];
28
+ timestamp: string;
29
+ };
30
+
31
+ type ContentBlock =
32
+ | { type: "text"; text: string }
33
+ | { type: "image"; [key: string]: unknown }
34
+ | {
35
+ type: "tool_call";
36
+ id: string;
37
+ title: string;
38
+ status: "pending" | "in_progress" | "completed" | "failed";
39
+ error?: string;
40
+ };
41
+
42
+ const anthropic = new Anthropic({
43
+ apiKey: process.env.ANTHROPIC_API_KEY,
44
+ });
45
+
46
+ const ANALYSIS_MODEL = "claude-sonnet-4-5-20250929";
47
+
48
+ const ANALYSIS_SYSTEM_PROMPT = `You are analyzing an AI agent session to provide structured insights.
49
+
50
+ Your task is to analyze the session transcript and generate a structured analysis.
51
+
52
+ You must respond with valid JSON matching this schema:
53
+ {
54
+ "task_summary": "Brief 1-2 sentence summary of what the user wanted",
55
+ "intent_type": "One of: RETRIEVE_INFORMATION, RESEARCH_TOPIC, ANALYZE_DATA, SUMMARIZE_CONTENT, COMPARE_OPTIONS, CODE_GENERATION, CODE_MODIFICATION, CODE_REVIEW, DEBUG_ISSUE, REFACTOR_CODE, FILE_MANIPULATION, CONTENT_SEARCH, PLANNING, TASK_EXECUTION, TROUBLESHOOTING, CONVERSATION, OTHER",
56
+ "high_level_plan": "One paragraph describing the strategy the agent used",
57
+ "status": "One of: SUCCESS, FAILURE, ABORTED, TIMEOUT, PARTIAL_SUCCESS",
58
+ "answer_type": "One of: FACTUAL_RESPONSE, STRUCTURED_DATA, SUMMARY, ANALYSIS, RECOMMENDATION, CODE, FILE_MODIFICATIONS, CLARIFYING_QUESTION, ACKNOWLEDGMENT, ERROR_MESSAGE, PARTIAL_COMPLETION, NO_RESPONSE, OTHER",
59
+ "assessment": "2-3 sentence explanation of why you chose the status and answer_type"
60
+ }
61
+
62
+ Guidelines:
63
+ - task_summary: Concise goal description in user's terms
64
+ - intent_type: Primary user intent (choose most specific match)
65
+ - high_level_plan: Describe the strategy/approach the agent followed (focus on HOW it tried to solve the task, not whether it succeeded)
66
+ - status: Evaluate if task was completed successfully
67
+ - answer_type: What form the final response took
68
+ - assessment: Explain your reasoning for the status and answer_type choices (focus on OUTCOME - did it succeed, what was delivered, any issues encountered)
69
+
70
+ Respond with ONLY the JSON object, no additional text.`;
71
+
72
+ /**
73
+ * Extract pre-computed fields from session data
74
+ */
75
+ function extractPreComputedFields(session: StoredSession): PreComputedFields {
76
+ // First user message
77
+ const firstUserMsg = session.messages.find(
78
+ (m: SessionMessage) => m.role === "user",
79
+ );
80
+ const textBlock = firstUserMsg?.content.find(
81
+ (c: ContentBlock) => c.type === "text",
82
+ );
83
+ const userQuery = textBlock && "text" in textBlock ? textBlock.text : "";
84
+
85
+ // Count assistant messages
86
+ const numSteps = session.messages.filter(
87
+ (m: SessionMessage) => m.role === "assistant",
88
+ ).length;
89
+
90
+ // Extract tool calls and tool names
91
+ const toolCalls: string[] = [];
92
+ for (const msg of session.messages) {
93
+ for (const block of msg.content) {
94
+ if (block.type === "tool_call" && "title" in block) {
95
+ toolCalls.push(block.title);
96
+ }
97
+ }
98
+ }
99
+
100
+ return {
101
+ userQuery,
102
+ numSteps,
103
+ numToolCalls: toolCalls.length,
104
+ toolsUsed: [...new Set(toolCalls)], // Unique tool names
105
+ };
106
+ }
107
+
108
+ /**
109
+ * Format conversation transcript for LLM analysis
110
+ */
111
+ function formatConversationTranscript(session: StoredSession): string {
112
+ let transcript = "";
113
+
114
+ for (const msg of session.messages) {
115
+ transcript += `\n## ${msg.role.toUpperCase()}\n`;
116
+
117
+ for (const block of msg.content) {
118
+ if (block.type === "text") {
119
+ transcript += block.text + "\n";
120
+ } else if (block.type === "tool_call") {
121
+ transcript += `[Tool: ${block.title}`;
122
+ if (block.status === "completed") {
123
+ transcript += " - completed]\n";
124
+ } else if (block.status === "failed") {
125
+ transcript += ` - failed: ${block.error}]\n`;
126
+ } else {
127
+ transcript += `]\n`;
128
+ }
129
+ }
130
+ }
131
+ }
132
+
133
+ return transcript;
134
+ }
135
+
136
+ /**
137
+ * Build analysis prompt for the LLM
138
+ */
139
+ function buildPrompt(
140
+ transcript: string,
141
+ preComputed: PreComputedFields,
142
+ ): string {
143
+ return `Analyze this AI agent session and provide structured insights.
144
+
145
+ Session Metadata:
146
+ - First user message: ${preComputed.userQuery.slice(0, 200)}${preComputed.userQuery.length > 200 ? "..." : ""}
147
+ - Number of steps: ${preComputed.numSteps}
148
+ - Number of tool calls: ${preComputed.numToolCalls}
149
+ - Tools used: ${preComputed.toolsUsed.join(", ") || "none"}
150
+
151
+ Conversation Transcript:
152
+ ${transcript}
153
+
154
+ Analyze the above session and respond with the required JSON structure.`;
155
+ }
156
+
157
+ /**
158
+ * Extract JSON from potential markdown code blocks
159
+ */
160
+ function extractJSON(text: string): string {
161
+ // Try to find JSON in markdown code block
162
+ const jsonMatch = text.match(/```(?:json)?\n([\s\S]*?)\n```/);
163
+ if (jsonMatch?.[1]) {
164
+ return jsonMatch[1];
165
+ }
166
+
167
+ // Otherwise return the whole text
168
+ return text.trim();
169
+ }
170
+
171
+ /**
172
+ * Analyze a session using Claude
173
+ */
174
+ export async function analyzeSession(
175
+ session: StoredSession,
176
+ ): Promise<SessionAnalysis> {
177
+ // 1. Pre-compute extractable fields
178
+ const preComputed = extractPreComputedFields(session);
179
+
180
+ // 2. Build conversation transcript for LLM
181
+ const transcript = formatConversationTranscript(session);
182
+
183
+ // 3. Build analysis prompt
184
+ const prompt = buildPrompt(transcript, preComputed);
185
+
186
+ // 4. Call Claude
187
+ const response = await anthropic.messages.create({
188
+ model: ANALYSIS_MODEL,
189
+ max_tokens: 4096,
190
+ temperature: 0,
191
+ system: ANALYSIS_SYSTEM_PROMPT,
192
+ messages: [{ role: "user", content: prompt }],
193
+ });
194
+
195
+ // 5. Parse and validate response
196
+ const content = response.content[0];
197
+ if (!content) {
198
+ throw new Error("No content in response");
199
+ }
200
+ if (content.type !== "text") {
201
+ throw new Error("Unexpected response format");
202
+ }
203
+
204
+ // Extract JSON from potential markdown code blocks
205
+ const jsonText = extractJSON(content.text);
206
+ const parsed = JSON.parse(jsonText);
207
+ const llmOutput = LLMAnalysisOutputSchema.parse(parsed);
208
+
209
+ // 6. Combine pre-computed and LLM data
210
+ const analysis: SessionAnalysis = {
211
+ session_id: session.sessionId,
212
+ agent_name: session.metadata.agentName,
213
+ started_at: session.metadata.createdAt,
214
+ ended_at: session.metadata.updatedAt,
215
+ task: {
216
+ user_query: preComputed.userQuery,
217
+ task_summary: llmOutput.task_summary,
218
+ intent_type: llmOutput.intent_type,
219
+ },
220
+ trajectory: {
221
+ high_level_plan: llmOutput.high_level_plan,
222
+ tools_used: preComputed.toolsUsed,
223
+ num_steps: preComputed.numSteps,
224
+ num_tool_calls: preComputed.numToolCalls,
225
+ },
226
+ outcome: {
227
+ status: llmOutput.status,
228
+ answer_type: llmOutput.answer_type,
229
+ assessment: llmOutput.assessment,
230
+ },
231
+ };
232
+
233
+ // 7. Validate final schema
234
+ return SessionAnalysisSchema.parse(analysis);
235
+ }
@@ -0,0 +1,97 @@
1
+ /**
2
+ * Session Analysis Embeddings Module
3
+ *
4
+ * Generates embeddings for session analyses using OpenAI's text-embedding-3-small model.
5
+ * These embeddings enable semantic search and similarity analysis of sessions.
6
+ */
7
+
8
+ import OpenAI from "openai";
9
+ import type { SessionAnalysis } from "./types";
10
+
11
+ /**
12
+ * Generates structured text from a SessionAnalysis for embedding.
13
+ * Format captures all key aspects of the session for semantic search.
14
+ */
15
+ export function generateEmbeddingText(analysis: SessionAnalysis): string {
16
+ const toolsList = analysis.trajectory.tools_used.join(", ");
17
+
18
+ return `Agent: ${analysis.agent_name}
19
+
20
+ IntentType: ${analysis.task.intent_type}
21
+ AnswerType: ${analysis.outcome.answer_type}
22
+ OutcomeStatus: ${analysis.outcome.status}
23
+
24
+ UserQuery:
25
+ ${analysis.task.user_query}
26
+
27
+ TaskSummary:
28
+ ${analysis.task.task_summary}
29
+
30
+ HighLevelPlan:
31
+ ${analysis.trajectory.high_level_plan}
32
+
33
+ ToolsUsed: ${toolsList}
34
+ NumSteps: ${analysis.trajectory.num_steps}
35
+ NumToolCalls: ${analysis.trajectory.num_tool_calls}
36
+
37
+ OutcomeAssessment:
38
+ ${analysis.outcome.assessment}`;
39
+ }
40
+
41
+ /**
42
+ * Generates an embedding vector for the given text using OpenAI's API.
43
+ * Uses text-embedding-3-small model (1536 dimensions).
44
+ *
45
+ * @throws Error if API key is missing or API call fails
46
+ */
47
+ export async function generateEmbedding(
48
+ text: string,
49
+ apiKey: string,
50
+ ): Promise<Float32Array> {
51
+ if (!apiKey) {
52
+ throw new Error("OpenAI API key is required for embedding generation");
53
+ }
54
+
55
+ const openai = new OpenAI({ apiKey });
56
+
57
+ try {
58
+ const response = await openai.embeddings.create({
59
+ model: "text-embedding-3-small",
60
+ input: text,
61
+ encoding_format: "float",
62
+ });
63
+
64
+ const embedding = response.data[0]?.embedding;
65
+ if (!embedding) {
66
+ throw new Error("No embedding returned from OpenAI API");
67
+ }
68
+
69
+ return new Float32Array(embedding);
70
+ } catch (error) {
71
+ if (error instanceof Error) {
72
+ throw new Error(`OpenAI API error: ${error.message}`);
73
+ }
74
+ throw new Error("Unknown error generating embedding");
75
+ }
76
+ }
77
+
78
+ /**
79
+ * Convenience wrapper that generates an embedding for a SessionAnalysis.
80
+ * Reads OPENAI_API_KEY from environment.
81
+ *
82
+ * @throws Error if OPENAI_API_KEY is not set or API call fails
83
+ */
84
+ export async function embedAnalysis(
85
+ analysis: SessionAnalysis,
86
+ ): Promise<Float32Array> {
87
+ const apiKey = process.env.OPENAI_API_KEY;
88
+
89
+ if (!apiKey) {
90
+ throw new Error(
91
+ "OPENAI_API_KEY environment variable not set. Embedding generation requires an OpenAI API key.",
92
+ );
93
+ }
94
+
95
+ const text = generateEmbeddingText(analysis);
96
+ return generateEmbedding(text, apiKey);
97
+ }
@@ -0,0 +1,67 @@
1
+ /**
2
+ * Zod schemas for session analysis validation
3
+ */
4
+
5
+ import { z } from "zod";
6
+ import { AnswerType, IntentType, OutcomeStatus } from "./types";
7
+
8
+ /**
9
+ * Intent type schema
10
+ */
11
+ export const IntentTypeSchema = z.nativeEnum(IntentType);
12
+
13
+ /**
14
+ * Answer type schema
15
+ */
16
+ export const AnswerTypeSchema = z.nativeEnum(AnswerType);
17
+
18
+ /**
19
+ * Outcome status schema
20
+ */
21
+ export const OutcomeStatusSchema = z.nativeEnum(OutcomeStatus);
22
+
23
+ /**
24
+ * LLM output schema - what we expect the LLM to generate
25
+ */
26
+ export const LLMAnalysisOutputSchema = z.object({
27
+ task_summary: z
28
+ .string()
29
+ .describe("Brief 1-2 sentence summary of what the user wanted"),
30
+ intent_type: IntentTypeSchema.describe("Primary user intent"),
31
+ high_level_plan: z
32
+ .string()
33
+ .describe("One paragraph describing the strategy the agent used"),
34
+ status: OutcomeStatusSchema.describe(
35
+ "Whether the task was completed successfully",
36
+ ),
37
+ answer_type: AnswerTypeSchema.describe("What form the final response took"),
38
+ assessment: z
39
+ .string()
40
+ .describe("Explanation of why the status and answer_type were chosen"),
41
+ });
42
+
43
+ /**
44
+ * Complete session analysis schema
45
+ */
46
+ export const SessionAnalysisSchema = z.object({
47
+ session_id: z.string(),
48
+ agent_name: z.string(),
49
+ started_at: z.string(),
50
+ ended_at: z.string(),
51
+ task: z.object({
52
+ user_query: z.string(),
53
+ task_summary: z.string(),
54
+ intent_type: IntentTypeSchema,
55
+ }),
56
+ trajectory: z.object({
57
+ high_level_plan: z.string(),
58
+ tools_used: z.array(z.string()),
59
+ num_steps: z.number(),
60
+ num_tool_calls: z.number(),
61
+ }),
62
+ outcome: z.object({
63
+ status: OutcomeStatusSchema,
64
+ answer_type: AnswerTypeSchema,
65
+ assessment: z.string(),
66
+ }),
67
+ });
@@ -0,0 +1,132 @@
1
+ /**
2
+ * Session Analysis Types
3
+ *
4
+ * This module defines the types for analyzing AI agent sessions to extract
5
+ * high-level insights about task execution and trajectory.
6
+ */
7
+
8
+ /**
9
+ * Intent types - What the user wanted to accomplish
10
+ */
11
+ export enum IntentType {
12
+ // Information & Research
13
+ RETRIEVE_INFORMATION = "RETRIEVE_INFORMATION", // Lookup facts/data
14
+ RESEARCH_TOPIC = "RESEARCH_TOPIC", // Deep investigation
15
+
16
+ // Data & Analysis
17
+ ANALYZE_DATA = "ANALYZE_DATA", // Examine/interpret data
18
+ SUMMARIZE_CONTENT = "SUMMARIZE_CONTENT", // Condense information
19
+ COMPARE_OPTIONS = "COMPARE_OPTIONS", // Evaluate alternatives
20
+
21
+ // Code & Development
22
+ CODE_GENERATION = "CODE_GENERATION", // Write new code
23
+ CODE_MODIFICATION = "CODE_MODIFICATION", // Edit existing code
24
+ CODE_REVIEW = "CODE_REVIEW", // Analyze code quality
25
+ DEBUG_ISSUE = "DEBUG_ISSUE", // Fix bugs
26
+ REFACTOR_CODE = "REFACTOR_CODE", // Improve structure
27
+
28
+ // File Operations
29
+ FILE_MANIPULATION = "FILE_MANIPULATION", // Create/edit/move files
30
+ CONTENT_SEARCH = "CONTENT_SEARCH", // Find in files
31
+
32
+ // Task Management
33
+ PLANNING = "PLANNING", // Create task plans
34
+ TASK_EXECUTION = "TASK_EXECUTION", // Complete multi-step task
35
+
36
+ // System & Troubleshooting
37
+ TROUBLESHOOTING = "TROUBLESHOOTING", // Diagnose problems
38
+
39
+ // Meta
40
+ CONVERSATION = "CONVERSATION", // General chat
41
+ OTHER = "OTHER", // Fallback
42
+ }
43
+
44
+ /**
45
+ * Answer types - What form the agent's response took
46
+ */
47
+ export enum AnswerType {
48
+ // Direct responses
49
+ FACTUAL_RESPONSE = "FACTUAL_RESPONSE", // Direct facts/data
50
+ STRUCTURED_DATA = "STRUCTURED_DATA", // JSON/tables
51
+ SUMMARY = "SUMMARY", // Condensed information
52
+ ANALYSIS = "ANALYSIS", // Interpretation/insights
53
+ RECOMMENDATION = "RECOMMENDATION", // Suggested actions
54
+
55
+ // Artifacts
56
+ CODE = "CODE", // Code snippets/files
57
+ FILE_MODIFICATIONS = "FILE_MODIFICATIONS", // Changed files
58
+
59
+ // Interactive
60
+ CLARIFYING_QUESTION = "CLARIFYING_QUESTION", // Needs more input
61
+ ACKNOWLEDGMENT = "ACKNOWLEDGMENT", // Confirmed action
62
+
63
+ // Status
64
+ ERROR_MESSAGE = "ERROR_MESSAGE", // Failed with error
65
+ PARTIAL_COMPLETION = "PARTIAL_COMPLETION", // Incomplete success
66
+
67
+ // Empty
68
+ NO_RESPONSE = "NO_RESPONSE", // Aborted/timeout
69
+ OTHER = "OTHER", // Fallback
70
+ }
71
+
72
+ /**
73
+ * Outcome status - Whether the task was completed successfully
74
+ */
75
+ export enum OutcomeStatus {
76
+ SUCCESS = "SUCCESS", // Completed successfully
77
+ FAILURE = "FAILURE", // Failed with error
78
+ ABORTED = "ABORTED", // User cancelled
79
+ TIMEOUT = "TIMEOUT", // Exceeded time limit
80
+ PARTIAL_SUCCESS = "PARTIAL_SUCCESS", // Some goals achieved
81
+ }
82
+
83
+ /**
84
+ * Complete session analysis result
85
+ */
86
+ export interface SessionAnalysis {
87
+ session_id: string;
88
+ agent_name: string;
89
+ started_at: string; // ISO8601
90
+ ended_at: string; // ISO8601
91
+
92
+ task: {
93
+ user_query: string; // First user message text
94
+ task_summary: string; // LLM-generated summary
95
+ intent_type: IntentType; // LLM-selected from enum
96
+ };
97
+
98
+ trajectory: {
99
+ high_level_plan: string; // LLM-generated paragraph
100
+ tools_used: string[]; // Pre-computed from tool calls
101
+ num_steps: number; // Pre-computed (count assistant messages)
102
+ num_tool_calls: number; // Pre-computed
103
+ };
104
+
105
+ outcome: {
106
+ status: OutcomeStatus; // LLM-selected from enum
107
+ answer_type: AnswerType; // LLM-selected from enum
108
+ assessment: string; // LLM-generated explanation of status and answer_type
109
+ };
110
+ }
111
+
112
+ /**
113
+ * LLM output schema (what the LLM generates)
114
+ */
115
+ export interface LLMAnalysisOutput {
116
+ task_summary: string;
117
+ intent_type: IntentType;
118
+ high_level_plan: string;
119
+ status: OutcomeStatus;
120
+ answer_type: AnswerType;
121
+ assessment: string;
122
+ }
123
+
124
+ /**
125
+ * Pre-computed fields (extracted from session data)
126
+ */
127
+ export interface PreComputedFields {
128
+ userQuery: string;
129
+ numSteps: number;
130
+ numToolCalls: number;
131
+ toolsUsed: string[];
132
+ }