npm - @townco/agent - Versions diffs - 0.1.52 → 0.1.53 - Mend

@townco/agent 0.1.52 → 0.1.53

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

package/dist/acp-server/adapter.d.ts +2 -0
package/dist/acp-server/adapter.js +28 -3
package/dist/acp-server/cli.d.ts +3 -1
package/dist/acp-server/session-storage.d.ts +2 -0
package/dist/acp-server/session-storage.js +2 -0
package/dist/bin.js +0 -0
package/dist/definition/mcp.d.ts +0 -0
package/dist/definition/mcp.js +0 -0
package/dist/definition/tools/todo.d.ts +49 -0
package/dist/definition/tools/todo.js +80 -0
package/dist/definition/tools/web_search.d.ts +4 -0
package/dist/definition/tools/web_search.js +26 -0
package/dist/dev-agent/index.d.ts +2 -0
package/dist/dev-agent/index.js +18 -0
package/dist/example.d.ts +2 -0
package/dist/example.js +19 -0
package/dist/runner/agent-runner.d.ts +4 -0
package/dist/runner/index.d.ts +3 -1
package/dist/runner/langchain/index.d.ts +0 -1
package/dist/runner/langchain/index.js +88 -27
package/dist/tsconfig.tsbuildinfo +1 -1
package/dist/utils/__tests__/tool-overhead-calculator.test.d.ts +1 -0
package/dist/utils/__tests__/tool-overhead-calculator.test.js +153 -0
package/dist/utils/context-size-calculator.d.ts +9 -4
package/dist/utils/context-size-calculator.js +23 -6
package/dist/utils/tool-overhead-calculator.d.ts +30 -0
package/dist/utils/tool-overhead-calculator.js +54 -0
package/package.json +6 -6
package/dist/check-jaeger.d.ts +0 -5
package/dist/check-jaeger.js +0 -82
package/dist/run-subagents.d.ts +0 -9
package/dist/run-subagents.js +0 -110
package/dist/runner/langchain/custom-stream-types.d.ts +0 -36
package/dist/runner/langchain/custom-stream-types.js +0 -23
package/dist/runner/langchain/tools/bash.d.ts +0 -14
package/dist/runner/langchain/tools/bash.js +0 -135
package/dist/test-telemetry.d.ts +0 -5
package/dist/test-telemetry.js +0 -88

package/dist/acp-server/adapter.d.ts CHANGED Viewed

@@ -24,6 +24,8 @@ export declare class AgentAcpAdapter implements acp.Agent {
     private agentVersion;
     private agentDescription;
     private agentSuggestedPrompts;
+    private currentToolOverheadTokens;
+    private currentMcpOverheadTokens;
     constructor(agent: AgentRunner, connection: acp.AgentSideConnection, agentDir?: string, agentName?: string);
     /**
      * Helper to save session to disk

package/dist/acp-server/adapter.js CHANGED Viewed

@@ -103,6 +103,8 @@ export class AgentAcpAdapter {
     agentVersion;
     agentDescription;
     agentSuggestedPrompts;
+    currentToolOverheadTokens = 0; // Track tool overhead for current turn
+    currentMcpOverheadTokens = 0; // Track MCP overhead for current turn
     constructor(agent, connection, agentDir, agentName) {
         this.connection = connection;
         this.sessions = new Map();
@@ -338,6 +340,9 @@ export class AgentAcpAdapter {
         }
         session.pendingPrompt?.abort();
         session.pendingPrompt = new AbortController();
+        // Reset tool overhead for new turn (will be set by harness)
+        this.currentToolOverheadTokens = 0;
+        this.currentMcpOverheadTokens = 0;
         // Generate a unique messageId for this assistant response
         const messageId = Math.random().toString(36).substring(2);
         // Extract and store the user message
@@ -397,7 +402,9 @@ export class AgentAcpAdapter {
                 }
             }
             // Calculate context size - no LLM call yet, so only estimated values
-            const context_size = calculateContextSize(contextMessages, this.agent.definition.systemPrompt ?? undefined, undefined);
+            const context_size = calculateContextSize(contextMessages, this.agent.definition.systemPrompt ?? undefined, undefined, // No LLM-reported tokens yet
+            this.currentToolOverheadTokens, // Include tool overhead
+            this.currentMcpOverheadTokens);
             const contextSnapshot = createContextSnapshot(session.messages.length, new Date().toISOString(), previousContext, context_size);
             session.context.push(contextSnapshot);
             await this.saveSessionToDisk(params.sessionId, session);
@@ -461,6 +468,20 @@ export class AgentAcpAdapter {
             let iterResult = await generator.next();
             while (!iterResult.done) {
                 const msg = iterResult.value;
+                // Capture tool overhead info if provided by harness
+                if ("sessionUpdate" in msg &&
+                    msg.sessionUpdate === "tool_overhead_info") {
+                    const overheadInfo = msg;
+                    this.currentToolOverheadTokens = overheadInfo.toolOverheadTokens;
+                    this.currentMcpOverheadTokens = overheadInfo.mcpOverheadTokens;
+                    logger.debug("Received tool overhead info from harness", {
+                        toolOverheadTokens: this.currentToolOverheadTokens,
+                        mcpOverheadTokens: this.currentMcpOverheadTokens,
+                    });
+                    // Don't send this update to client, it's internal metadata
+                    iterResult = await generator.next();
+                    continue;
+                }
                 // Extract and accumulate token usage from message chunks
                 if ("sessionUpdate" in msg &&
                     msg.sessionUpdate === "agent_message_chunk" &&
@@ -659,7 +680,9 @@ export class AgentAcpAdapter {
                                 }
                             }
                             // Calculate context size - tool result is now in the message, but hasn't been sent to LLM yet
-                            const context_size = calculateContextSize(contextMessages, this.agent.definition.systemPrompt ?? undefined, undefined);
+                            const context_size = calculateContextSize(contextMessages, this.agent.definition.systemPrompt ?? undefined, undefined, // Tool result hasn't been sent to LLM yet, so no new LLM-reported tokens
+                            this.currentToolOverheadTokens, // Include tool overhead
+                            this.currentMcpOverheadTokens);
                             // Create snapshot with a pointer to the partial message (not a full copy!)
                             const midTurnSnapshot = {
                                 timestamp: new Date().toISOString(),
@@ -779,7 +802,9 @@ export class AgentAcpAdapter {
                 }
             }
             // Calculate context size with LLM-reported tokens from this turn
-            const context_size = calculateContextSize(contextMessages, this.agent.definition.systemPrompt ?? undefined, turnTokenUsage.inputTokens);
+            const context_size = calculateContextSize(contextMessages, this.agent.definition.systemPrompt ?? undefined, turnTokenUsage.inputTokens, // Final LLM-reported tokens from this turn
+            this.currentToolOverheadTokens, // Include tool overhead
+            this.currentMcpOverheadTokens);
             const contextSnapshot = createContextSnapshot(session.messages.length, new Date().toISOString(), previousContext, context_size);
             session.context.push(contextSnapshot);
             await this.saveSessionToDisk(params.sessionId, session);

package/dist/acp-server/cli.d.ts CHANGED Viewed

@@ -1,3 +1,5 @@
 import type { AgentDefinition } from "../definition";
 import { type AgentRunner } from "../runner";
-export declare function makeStdioTransport(agent: AgentRunner | AgentDefinition): void;
+export declare function makeStdioTransport(
+	agent: AgentRunner | AgentDefinition,
+): void;

package/dist/acp-server/session-storage.d.ts CHANGED Viewed

@@ -62,6 +62,8 @@ export interface ContextEntry {
      */
     context_size: {
         systemPromptTokens: number;
+        toolOverheadTokens?: number | undefined;
+        mcpOverheadTokens?: number | undefined;
         userMessagesTokens: number;
         assistantMessagesTokens: number;
         toolInputTokens: number;

package/dist/acp-server/session-storage.js CHANGED Viewed

@@ -58,6 +58,8 @@ const contextEntrySchema = z.object({
     compactedUpTo: z.number().optional(),
     context_size: z.object({
         systemPromptTokens: z.number(),
+        toolOverheadTokens: z.number().optional(),
+        mcpOverheadTokens: z.number().optional(),
         userMessagesTokens: z.number(),
         assistantMessagesTokens: z.number(),
         toolInputTokens: z.number(),

package/dist/bin.js CHANGED Viewed

File without changes

package/dist/definition/mcp.d.ts ADDED Viewed

File without changes

package/dist/definition/mcp.js ADDED Viewed

File without changes

package/dist/definition/tools/todo.d.ts ADDED Viewed

@@ -0,0 +1,49 @@
+import { z } from "zod";
+export declare const todoItemSchema: z.ZodObject<
+	{
+		content: z.ZodString;
+		status: z.ZodEnum<{
+			pending: "pending";
+			in_progress: "in_progress";
+			completed: "completed";
+		}>;
+		activeForm: z.ZodString;
+	},
+	z.core.$strip
+>;
+export declare const todoWrite: import("langchain").DynamicStructuredTool<
+	z.ZodObject<
+		{
+			todos: z.ZodArray<
+				z.ZodObject<
+					{
+						content: z.ZodString;
+						status: z.ZodEnum<{
+							pending: "pending";
+							in_progress: "in_progress";
+							completed: "completed";
+						}>;
+						activeForm: z.ZodString;
+					},
+					z.core.$strip
+				>
+			>;
+		},
+		z.core.$strip
+	>,
+	{
+		todos: {
+			content: string;
+			status: "pending" | "in_progress" | "completed";
+			activeForm: string;
+		}[];
+	},
+	{
+		todos: {
+			content: string;
+			status: "pending" | "in_progress" | "completed";
+			activeForm: string;
+		}[];
+	},
+	string
+>;

package/dist/definition/tools/todo.js ADDED Viewed

@@ -0,0 +1,80 @@
+import { tool } from "langchain";
+import { z } from "zod";
+export const todoItemSchema = z.object({
+	content: z.string().min(1),
+	status: z.enum(["pending", "in_progress", "completed"]),
+	activeForm: z.string().min(1),
+});
+export const todoWrite = tool(
+	({ todos }) => {
+		// Simple implementation that confirms the todos were written
+		return `Successfully updated todo list with ${todos.length} items`;
+	},
+	{
+		name: "todo_write",
+		description: `Use this tool to create and manage a structured task list for your current coding session. This helps you track progress, organize complex tasks, and demonstrate thoroughness to the user.
+It also helps the user understand the progress of the task and overall progress of their requests.
+## When to Use This Tool
+Use this tool proactively in these scenarios:
+1. Complex multi-step tasks - When a task requires 3 or more distinct steps or actions
+2. Non-trivial and complex tasks - Tasks that require careful planning or multiple operations
+3. User explicitly requests todo list - When the user directly asks you to use the todo list
+4. User provides multiple tasks - When users provide a list of things to be done (numbered or comma-separated)
+5. After receiving new instructions - Immediately capture user requirements as todos
+6. When you start working on a task - Mark it as in_progress BEFORE beginning work. Ideally you should only have one todo as in_progress at a time
+7. After completing a task - Mark it as completed and add any new follow-up tasks discovered during implementation
+## When NOT to Use This Tool
+Skip using this tool when:
+1. There is only a single, straightforward task
+2. The task is trivial and tracking it provides no organizational benefit
+3. The task can be completed in less than 3 trivial steps
+4. The task is purely conversational or informational
+NOTE that you should not use this tool if there is only one trivial task to do. In this case you are better off just doing the task directly.
+## Task States and Management
+1. **Task States**: Use these states to track progress:
+   - pending: Task not yet started
+   - in_progress: Currently working on (limit to ONE task at a time)
+   - completed: Task finished successfully
+   **IMPORTANT**: Task descriptions must have two forms:
+   - content: The imperative form describing what needs to be done (e.g., "Run tests", "Build the project")
+   - activeForm: The present continuous form shown during execution (e.g., "Running tests", "Building the project")
+2. **Task Management**:
+   - Update task status in real-time as you work
+   - Mark tasks complete IMMEDIATELY after finishing (don't batch completions)
+   - Exactly ONE task must be in_progress at any time (not less, not more)
+   - Complete current tasks before starting new ones
+   - Remove tasks that are no longer relevant from the list entirely
+3. **Task Completion Requirements**:
+   - ONLY mark a task as completed when you have FULLY accomplished it
+   - If you encounter errors, blockers, or cannot finish, keep the task as in_progress
+   - When blocked, create a new task describing what needs to be resolved
+   - Never mark a task as completed if:
+     - Tests are failing
+     - Implementation is partial
+     - You encountered unresolved errors
+     - You couldn't find necessary files or dependencies
+4. **Task Breakdown**:
+   - Create specific, actionable items
+   - Break complex tasks into smaller, manageable steps
+   - Use clear, descriptive task names
+   - Always provide both forms:
+     - content: "Fix authentication bug"
+     - activeForm: "Fixing authentication bug"
+When in doubt, use this tool. Being proactive with task management demonstrates attentiveness and ensures you complete all requirements successfully.`,
+		schema: z.object({
+			todos: z.array(todoItemSchema),
+		}),
+	},
+);

package/dist/definition/tools/web_search.d.ts ADDED Viewed

@@ -0,0 +1,4 @@
+import { ExaSearchResults } from "@langchain/exa";
+export declare function makeWebSearchTool(): ExaSearchResults<{
+	text: true;
+}>;

package/dist/definition/tools/web_search.js ADDED Viewed

@@ -0,0 +1,26 @@
+import { ExaSearchResults } from "@langchain/exa";
+import Exa from "exa-js";
+let _webSearchInstance = null;
+export function makeWebSearchTool() {
+	if (_webSearchInstance) {
+		return _webSearchInstance;
+	}
+	const apiKey = process.env.EXA_API_KEY;
+	if (!apiKey) {
+		throw new Error(
+			"EXA_API_KEY environment variable is required to use the web_search tool. " +
+				"Please set it to your Exa API key from https://exa.ai",
+		);
+	}
+	const client = new Exa(apiKey);
+	_webSearchInstance = new ExaSearchResults({
+		client,
+		searchArgs: {
+			numResults: 5,
+			type: "auto",
+			text: true,
+		},
+	});
+	return _webSearchInstance;
+}

package/dist/dev-agent/index.d.ts ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ #!/usr/bin/env bun
2	+ export {};

package/dist/dev-agent/index.js ADDED Viewed

@@ -0,0 +1,18 @@
+#!/usr/bin/env bun
+import { readFileSync } from "node:fs";
+import { join } from "node:path";
+import { makeHttpTransport, makeStdioTransport } from "../acp-server/index";
+// Load agent definition from JSON file
+const configPath = join(import.meta.dir, "agent.json");
+const agent = JSON.parse(readFileSync(configPath, "utf-8"));
+const transport = process.argv[2] || "stdio";
+if (transport === "http") {
+    makeHttpTransport(agent);
+}
+else if (transport === "stdio") {
+    makeStdioTransport(agent);
+}
+else {
+    console.error(`Invalid transport: ${transport}`);
+    process.exit(1);
+}

package/dist/example.d.ts ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ #!/usr/bin/env bun
2	+ export {};

package/dist/example.js ADDED Viewed

@@ -0,0 +1,19 @@
+#!/usr/bin/env bun
+import { makeHttpTransport, makeStdioTransport } from "./acp-server/index.js";
+const exampleAgent = {
+	model: "claude-sonnet-4-5-20250929",
+	systemPrompt: "You are a helpful assistant.",
+	tools: ["todo_write", "get_weather", "web_search"],
+};
+// Parse transport type from command line argument
+const transport = process.argv[2] || "stdio";
+if (transport === "http") {
+	makeHttpTransport(exampleAgent);
+} else if (transport === "stdio") {
+	makeStdioTransport(exampleAgent);
+} else {
+	console.error(`Invalid transport: ${transport}`);
+	console.error("Usage: bun run example.ts [stdio|http]");
+	process.exit(1);
+}

package/dist/runner/agent-runner.d.ts CHANGED Viewed

@@ -109,6 +109,10 @@ export type ExtendedSessionUpdate = (SessionNotification["update"] & {
         contextInputTokens?: number;
         [key: string]: unknown;
     };
+} | {
+    sessionUpdate: "tool_overhead_info";
+    toolOverheadTokens: number;
+    mcpOverheadTokens: number;
 } | AgentMessageChunkWithTokens | HookNotificationUpdate;
 /** Describes an object that can run an agent definition */
 export interface AgentRunner {

package/dist/runner/index.d.ts CHANGED Viewed

@@ -1,4 +1,6 @@
 import type { AgentDefinition } from "../definition";
 import { type AgentRunner } from "./agent-runner";
 export type { AgentRunner };
-export declare const makeRunnerFromDefinition: (definition: AgentDefinition) => AgentRunner;
+export declare const makeRunnerFromDefinition: (
+	definition: AgentDefinition,
+) => AgentRunner;

package/dist/runner/langchain/index.d.ts CHANGED Viewed

@@ -10,7 +10,6 @@ type MakeLazy<T> = T extends LangchainTool ? () => T : never;
 export declare const TOOL_REGISTRY: Record<BuiltInToolType, LangchainTool | LazyLangchainTool | LazyLangchainTools>;
 export declare class LangchainAgent implements AgentRunner {
     definition: CreateAgentRunnerParams;
-    private toolSpans;
     constructor(params: CreateAgentRunnerParams);
     invoke(req: InvokeRequest): AsyncGenerator<ExtendedSessionUpdate, PromptResponse, undefined>;
 }

package/dist/runner/langchain/index.js CHANGED Viewed

@@ -54,7 +54,6 @@ async function loadCustomTools(modulePaths) {
 }
 export class LangchainAgent {
     definition;
-    toolSpans = new Map();
     constructor(params) {
         this.definition = params;
     }
@@ -157,10 +156,41 @@ export class LangchainAgent {
                 const customTools = await loadCustomTools(customToolPaths);
                 enabledTools.push(...customTools);
             }
-            // MCP tools
+            // Calculate tool overhead tokens for non-MCP tools
+            const { countTokens } = await import("../../utils/token-counter.js");
+            const { extractToolMetadata, estimateAllToolsOverhead } = await import("../../utils/tool-overhead-calculator.js");
+            // Calculate overhead for non-MCP tools (built-in, custom, filesystem)
+            const nonMcpToolMetadata = enabledTools.map(extractToolMetadata);
+            const nonMcpToolDefinitionsTokens = estimateAllToolsOverhead(nonMcpToolMetadata);
+            // Calculate TODO_WRITE_INSTRUCTIONS overhead if applicable
+            const hasTodoWriteTool = builtInNames.includes("todo_write");
+            const todoInstructionsTokens = hasTodoWriteTool
+                ? countTokens(TODO_WRITE_INSTRUCTIONS)
+                : 0;
+            // Total non-MCP tool overhead: tool definitions + TODO instructions
+            const toolOverheadTokens = nonMcpToolDefinitionsTokens + todoInstructionsTokens;
+            // MCP tools - calculate overhead separately
+            let mcpOverheadTokens = 0;
             if ((this.definition.mcps?.length ?? 0) > 0) {
-                enabledTools.push(...(await makeMcpToolsClient(this.definition.mcps).getTools()));
+                const mcpTools = await makeMcpToolsClient(this.definition.mcps).getTools();
+                const mcpToolMetadata = mcpTools.map(extractToolMetadata);
+                mcpOverheadTokens = estimateAllToolsOverhead(mcpToolMetadata);
+                enabledTools.push(...mcpTools);
             }
+            _logger.debug("Calculated tool overhead for context sizing", {
+                enabledToolCount: enabledTools.length,
+                nonMcpToolDefinitionsTokens,
+                mcpToolDefinitionsTokens: mcpOverheadTokens,
+                todoInstructionsTokens,
+                totalNonMcpOverheadTokens: toolOverheadTokens,
+                totalMcpOverheadTokens: mcpOverheadTokens,
+            });
+            // Yield tool overhead info to adapter early in the turn
+            yield {
+                sessionUpdate: "tool_overhead_info",
+                toolOverheadTokens,
+                mcpOverheadTokens,
+            };
             // Wrap tools with response compaction if hook is configured
             const hooks = this.definition.hooks ?? [];
             const hasToolResponseHook = hooks.some((h) => h.type === "tool_response");
@@ -255,9 +285,12 @@ export class LangchainAgent {
             }
             // Filter tools if running in subagent mode
             const isSubagent = req.sessionMeta?.[SUBAGENT_MODE_KEY] === true;
-            const finalTools = isSubagent
+            const filteredTools = isSubagent
                 ? wrappedTools.filter((t) => t.name !== TODO_WRITE_TOOL_NAME && t.name !== TASK_TOOL_NAME)
                 : wrappedTools;
+            // Wrap tools with tracing so each tool executes within its own span context.
+            // This ensures subagent spans are children of the Task tool span.
+            const finalTools = filteredTools.map((t) => wrapToolWithTracing(t, req.sessionId));
             // Create the model instance using the factory
             // This detects the provider from the model string:
             // - "gemini-2.0-flash" → Google Generative AI
@@ -362,16 +395,6 @@ export class LangchainAgent {
                             if (toolCall.id == null) {
                                 throw new Error(`Tool call is missing id: ${JSON.stringify(toolCall)}`);
                             }
-                            // Create tool span within the invocation context
-                            // This makes the tool span a child of the invocation span
-                            const toolInputJson = JSON.stringify(toolCall.args);
-                            const toolSpan = context.with(invocationContext, () => telemetry.startSpan("agent.tool_call", {
-                                "tool.name": toolCall.name,
-                                "tool.id": toolCall.id,
-                                "tool.input": toolInputJson,
-                                "agent.session_id": req.sessionId,
-                            }));
-                            this.toolSpans.set(toolCall.id, toolSpan);
                             telemetry.log("info", `Tool call started: ${toolCall.name}`, {
                                 toolCallId: toolCall.id,
                                 toolName: toolCall.name,
@@ -553,19 +576,9 @@ export class LangchainAgent {
                                 // Skip tool_call_update for todo_write tools
                                 continue;
                             }
-                            // End telemetry span for this tool call
-                            const toolSpan = this.toolSpans.get(aiMessage.tool_call_id);
-                            if (toolSpan) {
-                                // Add tool output to span before ending
-                                telemetry.setSpanAttributes(toolSpan, {
-                                    "tool.output": aiMessage.content,
-                                });
-                                telemetry.log("info", "Tool call completed", {
-                                    toolCallId: aiMessage.tool_call_id,
-                                });
-                                telemetry.endSpan(toolSpan);
-                                this.toolSpans.delete(aiMessage.tool_call_id);
-                            }
+                            telemetry.log("info", "Tool call completed", {
+                                toolCallId: aiMessage.tool_call_id,
+                            });
                             // Send status update (metadata only, no content)
                             yield {
                                 sessionUpdate: "tool_call_update",
@@ -715,3 +728,51 @@ I've found some existing telemetry code. Let me mark the first todo as in_progre
 `.trim();
 // Re-export subagent tool utility
 export { makeSubagentsTool } from "./tools/subagent.js";
+/**
+ * Wraps a LangChain tool with OpenTelemetry tracing.
+ * This ensures the tool executes within its own span context,
+ * so any child operations (like subagent spawning) become children
+ * of the tool span rather than the parent invocation span.
+ */
+function wrapToolWithTracing(originalTool, sessionId) {
+    const wrappedFunc = async (input) => {
+        const toolInputJson = JSON.stringify(input);
+        const toolSpan = telemetry.startSpan("agent.tool_call", {
+            "tool.name": originalTool.name,
+            "tool.input": toolInputJson,
+            "agent.session_id": sessionId,
+        });
+        // Create a context with the tool span as active
+        const spanContext = toolSpan
+            ? trace.setSpan(context.active(), toolSpan)
+            : context.active();
+        try {
+            // Execute within the tool span's context
+            const result = await context.with(spanContext, () => originalTool.invoke(input));
+            const resultStr = typeof result === "string" ? result : JSON.stringify(result);
+            if (toolSpan) {
+                telemetry.setSpanAttributes(toolSpan, {
+                    "tool.output": resultStr,
+                });
+                telemetry.endSpan(toolSpan);
+            }
+            return result;
+        }
+        catch (error) {
+            if (toolSpan) {
+                telemetry.endSpan(toolSpan, error);
+            }
+            throw error;
+        }
+    };
+    // Create new tool with wrapped function
+    const wrappedTool = tool(wrappedFunc, {
+        name: originalTool.name,
+        description: originalTool.description,
+        schema: originalTool.schema,
+    });
+    // Preserve metadata
+    wrappedTool.prettyName = originalTool.prettyName;
+    wrappedTool.icon = originalTool.icon;
+    return wrappedTool;
+}