npm - @townco/agent - Versions diffs - 0.1.121 → 0.1.123 - Mend

@townco/agent 0.1.121 → 0.1.123

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/dist/runner/langchain/index.js CHANGED Viewed

@@ -1,5 +1,5 @@
 import { EventEmitter } from "node:events";
-import { mkdir } from "node:fs/promises";
+import { mkdir, writeFile } from "node:fs/promises";
 import * as path from "node:path";
 import { MultiServerMCPClient } from "@langchain/mcp-adapters";
 import { context, propagation, trace } from "@opentelemetry/api";
@@ -10,9 +10,11 @@ import { ContextOverflowError, SUBAGENT_MODE_KEY, } from "../../acp-server/adapt
 import { createLogger } from "../../logger.js";
 import { telemetry } from "../../telemetry/index.js";
 import { calculateContextSize } from "../../utils/context-size-calculator.js";
+import { countToolResultTokens } from "../../utils/token-counter.js";
 import { getModelContextWindow } from "../hooks/constants.js";
+import { HookExecutor, loadHookCallback, } from "../hooks/index.js";
 import { isContextOverflowError } from "../hooks/predefined/context-validator.js";
-import { bindGeneratorToAbortSignal, bindGeneratorToEmitUpdate, bindGeneratorToInvocationContext, bindGeneratorToSessionContext, getAbortSignal, getInvocationContext, runWithAbortSignal, } from "../session-context";
+import { bindGeneratorToAbortSignal, bindGeneratorToEmitUpdate, bindGeneratorToInvocationContext, bindGeneratorToSessionContext, getAbortSignal, getInvocationContext, getSessionContext, runWithAbortSignal, } from "../session-context";
 import { loadCustomToolModule, } from "../tool-loader.js";
 import { createModelFromString, detectProvider } from "./model-factory.js";
 import { makeOtelCallbacks } from "./otel-callbacks.js";
@@ -35,6 +37,33 @@ const getWeather = tool(({ city }) => `It's always sunny in ${city}!`, {
 getWeather.prettyName = "Get Weather";
 // biome-ignore lint/suspicious/noExplicitAny: Need to add custom properties to LangChain tool
 getWeather.icon = "Cloud";
+function isPlainRecord(v) {
+    return !!v && typeof v === "object" && !Array.isArray(v);
+}
+function stableStringify(value) {
+    const seen = new WeakSet();
+    const _stringify = (v) => {
+        if (v === null)
+            return null;
+        if (typeof v !== "object")
+            return v;
+        if (Array.isArray(v))
+            return v.map(_stringify);
+        const obj = v;
+        if (seen.has(obj))
+            return "[Circular]";
+        seen.add(obj);
+        const out = {};
+        for (const k of Object.keys(obj).sort()) {
+            out[k] = _stringify(obj[k]);
+        }
+        return out;
+    };
+    return JSON.stringify(_stringify(value));
+}
+function toolCallKey(toolName, args) {
+    return `${toolName}:${stableStringify(args)}`;
+}
 export const TOOL_REGISTRY = {
     todo_write: () => makeTodoWriteTool(), // Factory function to create fresh instance per invocation
     get_weather: getWeather, // TODO: Convert to factory function for full concurrency safety
@@ -225,11 +254,25 @@ export class LangchainAgent {
             // Create OTEL callbacks for instrumentation early so we can use them during tool wrapping
             // Track iteration index across LLM calls in this invocation
             const iterationIndexRef = { current: 0 };
+            // Track actual token usage from API responses for validation
+            let lastActualTokenUsage = null;
             otelCallbacks = makeOtelCallbacks({
                 provider,
                 model: effectiveModel,
                 parentContext: invocationContext,
                 iterationIndexRef,
+                emitTokenUsage: (data) => {
+                    // Store actual token usage from API response
+                    lastActualTokenUsage = {
+                        inputTokens: data.inputTokens,
+                        outputTokens: data.outputTokens,
+                    };
+                    _logger.info("Actual token usage from API", {
+                        inputTokens: data.inputTokens,
+                        outputTokens: data.outputTokens,
+                        totalTokens: data.inputTokens + data.outputTokens,
+                    });
+                },
             });
             // Track todo_write tool call IDs to suppress their tool_call notifications
             const todoWriteToolCallIds = new Set();
@@ -332,13 +375,13 @@ export class LangchainAgent {
             const { extractToolMetadata, estimateAllToolsOverhead } = await import("../../utils/tool-overhead-calculator.js");
             // Calculate overhead for non-MCP tools (built-in, custom, filesystem)
             const nonMcpToolMetadata = enabledTools.map(extractToolMetadata);
-            const nonMcpToolDefinitionsTokens = estimateAllToolsOverhead(nonMcpToolMetadata);
+            const nonMcpToolDefinitionsTokens = await estimateAllToolsOverhead(nonMcpToolMetadata);
             // Calculate TODO_WRITE_INSTRUCTIONS overhead if applicable
             // Skip for subagents since the todo_write tool is filtered out for them
             const isSubagentForTokens = req.sessionMeta?.[SUBAGENT_MODE_KEY] === true;
             const hasTodoWriteTool = builtInNames.includes("todo_write") && !isSubagentForTokens;
             const todoInstructionsTokens = hasTodoWriteTool
-                ? countTokens(TODO_WRITE_INSTRUCTIONS)
+                ? await countTokens(TODO_WRITE_INSTRUCTIONS)
                 : 0;
             // Total non-MCP tool overhead: tool definitions + TODO instructions
             const toolOverheadTokens = nonMcpToolDefinitionsTokens + todoInstructionsTokens;
@@ -347,8 +390,20 @@ export class LangchainAgent {
             if ((this.definition.mcps?.length ?? 0) > 0) {
                 const client = await makeMcpToolsClient(this.definition.mcps);
                 const mcpTools = await client.getTools();
+                _logger.info("MCP tools loaded", {
+                    mcpCount: this.definition.mcps?.length ?? 0,
+                    toolCount: mcpTools.length,
+                    toolNames: mcpTools.map((t) => t.name),
+                });
                 const mcpToolMetadata = mcpTools.map(extractToolMetadata);
-                mcpOverheadTokens = estimateAllToolsOverhead(mcpToolMetadata);
+                mcpOverheadTokens = await estimateAllToolsOverhead(mcpToolMetadata);
+                _logger.warn("MCP tool overhead calculated", {
+                    mcpToolCount: mcpTools.length,
+                    estimatedTokens: mcpOverheadTokens,
+                    avgTokensPerTool: mcpTools.length > 0
+                        ? Math.round(mcpOverheadTokens / mcpTools.length)
+                        : 0,
+                });
                 enabledTools.push(...mcpTools);
             }
             _logger.debug("Calculated tool overhead for context sizing", {
@@ -368,7 +423,7 @@ export class LangchainAgent {
             // Calculate accurate context size for tool response compaction decisions
             // This includes: system prompt, tool overhead, MCP overhead, and message history
             const baseSystemPromptTokens = this.definition.systemPrompt
-                ? countTokens(this.definition.systemPrompt)
+                ? await countTokens(this.definition.systemPrompt)
                 : 0;
             // Estimate additional injection tokens based on enabled features
             // These will be injected into the system prompt later
@@ -389,7 +444,7 @@ export class LangchainAgent {
                 todoInstructionsTokens +
                 injectionOverheadEstimate;
             // Calculate message history tokens from context messages
-            const messageHistoryContext = calculateContextSize(req.contextMessages || [], undefined, // Don't double count system prompt
+            const messageHistoryContext = await calculateContextSize(req.contextMessages || [], undefined, // Don't double count system prompt
             0, // Don't double count tool overhead
             0);
             const messageHistoryTokens = messageHistoryContext.userMessagesTokens +
@@ -415,6 +470,101 @@ export class LangchainAgent {
             // Hook execution removed from tool wrapper - hooks are now executed only at the adapter layer
             // The adapter has proper MidTurnRestartError handling that can restart the turn
             // Executing hooks here in the runner was causing restart signals to be caught as tool failures
+            // In-flight tool-response compaction (updates what the LLM sees in this same turn).
+            // We run ONLY the tool_response_compactor callback here, and we attach metadata separately
+            // to tool_output notifications for persistence/UI, without polluting the model-visible output.
+            const hooks = this.definition.hooks ?? [];
+            const inflightToolResponseHooks = hooks
+                .filter((h) => h.type === "tool_response")
+                .map((h) => {
+                // Keep only tool_response_compactor callbacks to avoid mid-turn restart logic here.
+                if (h.callbacks && h.callbacks.length > 0) {
+                    const filtered = h.callbacks.filter((c) => c.name === "tool_response_compactor");
+                    return filtered.length > 0 ? { ...h, callbacks: filtered } : null;
+                }
+                if (h.callback === "tool_response_compactor") {
+                    return h;
+                }
+                return null;
+            })
+                .filter((h) => h !== null);
+            const hasInflightToolCompaction = inflightToolResponseHooks.length > 0;
+            const inflightContextTokensRef = { current: baseContextTokens };
+            const toolCallIdQueuesByKey = new Map();
+            const toolCallIdWaitersByKey = new Map();
+            const inflightCompactionMetaByToolCallId = new Map();
+            const registerToolCallId = (toolName, args, id) => {
+                const key = toolCallKey(toolName, args);
+                const waiters = toolCallIdWaitersByKey.get(key);
+                if (waiters && waiters.length > 0) {
+                    const resolve = waiters.shift();
+                    if (resolve) {
+                        resolve(id);
+                        return;
+                    }
+                }
+                const q = toolCallIdQueuesByKey.get(key) ?? [];
+                q.push(id);
+                toolCallIdQueuesByKey.set(key, q);
+            };
+            const consumeToolCallId = async (toolName, args) => {
+                const key = toolCallKey(toolName, args);
+                const q = toolCallIdQueuesByKey.get(key);
+                if (q && q.length > 0) {
+                    const id = q.shift();
+                    if (id)
+                        return id;
+                }
+                // Wait briefly for the updates stream to register the id.
+                return await new Promise((resolve) => {
+                    const waiters = toolCallIdWaitersByKey.get(key) ?? [];
+                    waiters.push(resolve);
+                    toolCallIdWaitersByKey.set(key, waiters);
+                    setTimeout(() => {
+                        // Fallback if never registered (should be rare)
+                        const fallback = `unknown_${Date.now()}_${Math.random()
+                            .toString(36)
+                            .slice(2, 8)}`;
+                        // Remove this resolver from the waiters list if still present
+                        const remaining = toolCallIdWaitersByKey.get(key) ?? [];
+                        const idx = remaining.indexOf(resolve);
+                        if (idx >= 0)
+                            remaining.splice(idx, 1);
+                        if (remaining.length === 0) {
+                            toolCallIdWaitersByKey.delete(key);
+                        }
+                        else {
+                            toolCallIdWaitersByKey.set(key, remaining);
+                        }
+                        resolve(fallback);
+                    }, 1500);
+                });
+            };
+            const saveToolOriginalToArtifacts = async (sessionId, toolName, toolCallId, content) => {
+                try {
+                    const { artifactsDir } = getSessionContext();
+                    const toolDir = path.join(artifactsDir, `tool-${toolName}`);
+                    await mkdir(toolDir, { recursive: true });
+                    const filePath = path.join(toolDir, `${toolCallId}.original.txt`);
+                    await writeFile(filePath, content, "utf-8");
+                    // Match SessionStorage.saveToolOriginal relative path format:
+                    return `${sessionId}/artifacts/tool-${toolName}/${toolCallId}.original.txt`;
+                }
+                catch (e) {
+                    _logger.warn("Failed to save original tool output in runner", {
+                        toolName,
+                        toolCallId,
+                        error: e instanceof Error ? e.message : String(e),
+                    });
+                    return null;
+                }
+            };
+            const inflightHookExecutor = hasInflightToolCompaction
+                ? new HookExecutor(inflightToolResponseHooks, this.definition.model, (callbackRef) => loadHookCallback(callbackRef, req.agentDir), undefined, // no streaming notifications from runner
+                this.definition, {
+                    getArtifactsDir: (_sid) => getSessionContext().artifactsDir,
+                }, req.sessionId, req.agentDir)
+                : null;
             // Counter for subagent calls - used to create unique source ID ranges
             // Each subagent call gets a unique offset (1000, 2000, 3000, etc.)
             // to ensure sources never conflict with parent's sources (typically < 100)
@@ -423,9 +573,118 @@ export class LangchainAgent {
             // All hook execution (compaction, restart logic) happens at the adapter layer
             const wrappedTools = enabledTools.map((originalTool) => {
                 const wrappedFunc = async (input) => {
-                    // Execute the original tool and return raw result
+                    // Execute the original tool and return result.
+                    // If configured, compact large tool output *before* returning it to LangChain,
+                    // so the next LLM step sees the compacted output in the same in-flight turn.
+                    const toolCallId = hasInflightToolCompaction
+                        ? await consumeToolCallId(originalTool.name, input)
+                        : `unknown_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
                     const result = await originalTool.invoke(input);
-                    return result;
+                    if (!inflightHookExecutor || !hasInflightToolCompaction) {
+                        return result;
+                    }
+                    // Build hook input in the same shape the adapter uses.
+                    const toolInput = isPlainRecord(input) ? input : {};
+                    const rawOutput = isPlainRecord(result)
+                        ? result
+                        : typeof result === "string"
+                            ? { content: result }
+                            : { content: JSON.stringify(result) };
+                    const outputTokens = await countToolResultTokens(rawOutput);
+                    // Include current prompt as the last user message for better context.
+                    const nowIso = new Date().toISOString();
+                    const promptBlocksForHooks = req.prompt.map((block) => {
+                        if (block.type === "text" && typeof block.text === "string") {
+                            return { type: "text", text: block.text };
+                        }
+                        if (block.type === "image") {
+                            const imageBlock = { type: "image" };
+                            if (block.source)
+                                imageBlock.source = block.source;
+                            if (block.url)
+                                imageBlock.url = block.url;
+                            if (block.data)
+                                imageBlock.data = block.data;
+                            if (block.mimeType)
+                                imageBlock.mimeType = block.mimeType;
+                            return imageBlock;
+                        }
+                        return {
+                            type: "text",
+                            text: JSON.stringify(block),
+                        };
+                    });
+                    const messagesForHooks = [
+                        ...(req.contextMessages ?? []),
+                        {
+                            role: "user",
+                            content: promptBlocksForHooks,
+                            timestamp: nowIso,
+                        },
+                    ];
+                    const hookResult = await inflightHookExecutor.executeToolResponseHooks({
+                        messages: messagesForHooks,
+                        context: [],
+                        requestParams: {},
+                    }, inflightContextTokensRef.current, {
+                        toolCallId,
+                        toolName: originalTool.name,
+                        toolInput,
+                        rawOutput,
+                        outputTokens,
+                    });
+                    // Update inflight context token estimate for subsequent tool calls.
+                    const finalOutput = hookResult.modifiedOutput ?? rawOutput;
+                    const finalTokens = hookResult.metadata?.finalTokens;
+                    const finalOutputTokens = finalTokens ?? (await countToolResultTokens(finalOutput));
+                    inflightContextTokensRef.current += finalOutputTokens;
+                    // If compaction happened, persist original output to artifacts and store meta
+                    // for the adapter/UI (without polluting model-visible tool output).
+                    const action = hookResult.metadata?.action;
+                    const originalTokens = hookResult.metadata?.originalTokens;
+                    const finalTokensMeta = hookResult.metadata?.finalTokens;
+                    const didActuallyCompact = action &&
+                        action !== "none" &&
+                        action !== "no_action_needed" &&
+                        originalTokens !== undefined &&
+                        finalTokensMeta !== undefined &&
+                        finalTokensMeta < originalTokens;
+                    if (didActuallyCompact && req.sessionId) {
+                        const originalContentStr = typeof rawOutput.content === "string"
+                            ? rawOutput.content
+                            : JSON.stringify(rawOutput);
+                        const preview = originalContentStr.slice(0, 2000);
+                        const originalContentPath = await saveToolOriginalToArtifacts(req.sessionId, originalTool.name, toolCallId, originalContentStr);
+                        const meta = {
+                            action: action ?? "compacted",
+                            originalTokens,
+                            finalTokens: finalTokensMeta,
+                            originalContentPreview: preview,
+                        };
+                        const tokensSaved = hookResult.metadata?.tokensSaved ??
+                            (originalTokens !== undefined && finalTokensMeta !== undefined
+                                ? originalTokens - finalTokensMeta
+                                : undefined);
+                        if (typeof tokensSaved === "number") {
+                            meta.tokensSaved = tokensSaved;
+                        }
+                        const compactionMethod = hookResult.metadata?.compactionMethod;
+                        if (typeof compactionMethod === "string") {
+                            meta.compactionMethod = compactionMethod;
+                        }
+                        if (typeof originalContentPath === "string") {
+                            meta.originalContentPath = originalContentPath;
+                        }
+                        inflightCompactionMetaByToolCallId.set(toolCallId, meta);
+                    }
+                    // Return compacted output to LangChain (model-visible), without metadata.
+                    if (typeof result === "string") {
+                        if (typeof finalOutput.content === "string") {
+                            return finalOutput.content;
+                        }
+                        return JSON.stringify(finalOutput);
+                    }
+                    return finalOutput;
                 };
                 // Create new tool with wrapped function
                 // biome-ignore lint/suspicious/noExplicitAny: Need to pass function with dynamic signature
@@ -595,6 +854,20 @@ export class LangchainAgent {
                     }
                 }
             }
+            // Calculate final system prompt tokens after ALL modifications/injections
+            const finalSystemPromptTokens = agentConfig.systemPrompt
+                ? await countTokens(agentConfig.systemPrompt)
+                : 0;
+            _logger.debug("Final system prompt tokens after all injections", {
+                finalSystemPromptTokens,
+                baseSystemPromptTokens: baseSystemPromptTokens,
+                injectionOverhead: finalSystemPromptTokens - baseSystemPromptTokens,
+            });
+            // Yield system prompt overhead to adapter for accurate context tracking
+            yield {
+                sessionUpdate: "system_prompt_overhead",
+                systemPromptTokens: finalSystemPromptTokens,
+            };
             const agent = createAgent(agentConfig);
             // Build messages from context history if available, otherwise use just the prompt
             // Type includes tool messages for sending tool results
@@ -859,6 +1132,9 @@ export class LangchainAgent {
                             if (toolCall.id == null) {
                                 throw new Error(`Tool call is missing id: ${JSON.stringify(toolCall)}`);
                             }
+                            // Register toolCall id so the tool wrapper can associate it with the invocation.
+                            // This enables in-flight tool-output compaction keyed by real tool_call_id.
+                            registerToolCallId(toolCall.name, toolCall.args, toolCall.id);
                             telemetry.log("info", `Tool call started: ${toolCall.name}`, {
                                 toolCallId: toolCall.id,
                                 toolName: toolCall.name,
@@ -1154,6 +1430,13 @@ export class LangchainAgent {
                             catch {
                                 // Not valid JSON, use original content
                             }
+                            // If we compacted the tool result in the wrapper, attach meta here for the adapter/UI.
+                            // This does NOT affect what LangChain/LLM saw (it already received the compacted output).
+                            const inflightMeta = inflightCompactionMetaByToolCallId.get(aiMessage.tool_call_id);
+                            if (inflightMeta) {
+                                compactionMeta = { ...(compactionMeta ?? {}), ...inflightMeta };
+                                rawOutput = { ...rawOutput, _compactionMeta: compactionMeta };
+                            }
                             // For content display, use cleaned version if compaction occurred
                             let displayContent = aiMessage.content;
                             if (compactionMeta) {
@@ -1235,6 +1518,15 @@ export class LangchainAgent {
                 sessionId: req.sessionId,
             });
             telemetry.endSpan(invocationSpan);
+            // Yield actual token usage from API for comparison with estimates
+            if (lastActualTokenUsage !== null) {
+                const actualUsage = lastActualTokenUsage;
+                yield {
+                    sessionUpdate: "actual_token_usage",
+                    inputTokens: actualUsage.inputTokens,
+                    outputTokens: actualUsage.outputTokens,
+                };
+            }
             return {
                 stopReason: "end_turn",
                 _meta: {

package/dist/runner/langchain/otel-callbacks.d.ts CHANGED Viewed

@@ -7,6 +7,11 @@ export interface OtelCallbackOptions {
     iterationIndexRef: {
         current: number;
     };
+    emitTokenUsage?: (data: {
+        sessionUpdate: "actual_token_usage";
+        inputTokens: number;
+        outputTokens: number;
+    }) => void;
 }
 /**
  * Creates OpenTelemetry callback handlers for LangChain LLM calls.

package/dist/runner/langchain/otel-callbacks.js CHANGED Viewed

@@ -188,6 +188,14 @@ export function makeOtelCallbacks(opts) {
                         ? tokenUsage.totalTokens - inputTokens
                         : 0);
                 telemetry.recordTokenUsage(inputTokens, outputTokens, chatSpan);
+                // Emit token usage to adapter for validation
+                if (opts.emitTokenUsage) {
+                    opts.emitTokenUsage({
+                        sessionUpdate: "actual_token_usage",
+                        inputTokens,
+                        outputTokens,
+                    });
+                }
             }
             // Serialize output and attach to span
             const serializedOutput = serializeOutput(output);

package/dist/runner/langchain/tools/document_extract.js CHANGED Viewed

@@ -37,7 +37,7 @@ const documentExtract = tool(async ({ session_id, file_path, query, target_token
             parsedContent = { content };
         }
         // Count tokens in the document
-        const documentTokens = countTokens(content);
+        const documentTokens = await countTokens(content);
         logger.info("Document extraction requested", {
             filePath: file_path,
             documentTokens,