npm - @poncho-ai/harness - Versions diffs - 0.31.0 → 0.31.2 - Mend

@poncho-ai/harness 0.31.0 → 0.31.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/dist/index.js CHANGED Viewed

@@ -361,9 +361,11 @@ var OVERHEAD_MULTIPLIER = 1.15;
 var MIN_COMPACTABLE_MESSAGES = 4;
 var DEFAULT_COMPACTION_CONFIG = {
   enabled: true,
-  trigger: 0.8,
-  keepRecentMessages: 6
+  trigger: 0.75,
+  keepRecentMessages: 4
 };
+var SUMMARIZATION_MESSAGE_TRUNCATION_CHARS = 1200;
+var SUMMARIZATION_MAX_OUTPUT_TOKENS = 768;
 var SUMMARIZATION_PROMPT = `Summarize the following conversation into a structured working state that allows continuation without re-asking questions. Include:
 1. **User intent**: What the user originally asked for and any refinements
@@ -418,7 +420,7 @@ var buildSummarizationMessages = (messagesToCompact, instructions) => {
   const conversationLines = [];
   for (const msg of messagesToCompact) {
     const text = getTextContent(msg);
-    const truncated = text.length > 2e3 ? text.slice(0, 2e3) + "\n...[truncated]" : text;
+    const truncated = text.length > SUMMARIZATION_MESSAGE_TRUNCATION_CHARS ? text.slice(0, SUMMARIZATION_MESSAGE_TRUNCATION_CHARS) + "\n...[truncated]" : text;
     conversationLines.push(`[${msg.role}]: ${truncated}`);
   }
   const prompt = instructions ? `${SUMMARIZATION_PROMPT}
@@ -473,7 +475,7 @@ var compactMessages = async (model, messages, config, options) => {
     const result = await generateText({
       model,
       messages: summarizationMessages,
-      maxOutputTokens: 2048
+      maxOutputTokens: SUMMARIZATION_MAX_OUTPUT_TOKENS
     });
     const summary = result.text.trim();
     if (!summary) {
@@ -1658,7 +1660,7 @@ Logs print to console:
 [event] run:started {"type":"run:started","runId":"run_abc123","agentId":"my-agent"}
 [event] tool:started {"type":"tool:started","tool":"read_file","input":{"path":"README.md"}}
 [event] tool:completed {"type":"tool:completed","tool":"read_file","duration":45,"output":{"path":"README.md","content":"..."}}
-[event] run:completed {"type":"run:completed","runId":"run_abc123","result":{"status":"completed","response":"...","steps":3,"tokens":{"input":1500,"output":840}}}
+[event] run:completed {"type":"run:completed","runId":"run_abc123","result":{"status":"completed","response":"...","steps":3,"tokens":{"input":1500,"output":840,"cached":1200,"cacheWrite":300}}}
 \`\`\`
 ### Production telemetry (generic OTLP)
@@ -2110,7 +2112,7 @@ var ponchoDocsTool = defineTool({
 import { randomUUID as randomUUID3 } from "crypto";
 import { readFile as readFile9 } from "fs/promises";
 import { resolve as resolve11 } from "path";
-import { getTextContent as getTextContent2 } from "@poncho-ai/sdk";
+import { defineTool as defineTool7, getTextContent as getTextContent2 } from "@poncho-ai/sdk";
 // src/upload-store.ts
 import { createHash as createHash2 } from "crypto";
@@ -3887,6 +3889,7 @@ var MODEL_CONTEXT_WINDOWS = {
 };
 var DEFAULT_CONTEXT_WINDOW = 2e5;
 var OPENAI_CODEX_DEFAULT_INSTRUCTIONS = "You are Codex, based on GPT-5. You are running as a coding agent in Poncho.";
+var OPENAI_CODEX_RESPONSES_URL = process.env.OPENAI_CODEX_RESPONSES_URL ?? "https://chatgpt.com/backend-api/codex/responses";
 var extractSystemInstructionFromInput = (input) => {
   if (!Array.isArray(input)) return void 0;
   for (const message of input) {
@@ -3909,6 +3912,18 @@ var extractSystemInstructionFromInput = (input) => {
   }
   return void 0;
 };
+var normalizeToolParameterSchemas = (tools) => {
+  if (!Array.isArray(tools)) return;
+  for (const tool of tools) {
+    if (!tool || typeof tool !== "object") continue;
+    const entry = tool;
+    if (!entry.parameters || typeof entry.parameters !== "object") continue;
+    const schema = entry.parameters;
+    if (schema.type === "object" && (typeof schema.properties !== "object" || schema.properties === null)) {
+      schema.properties = {};
+    }
+  }
+};
 var getModelContextWindow = (modelName) => {
   if (MODEL_CONTEXT_WINDOWS[modelName] !== void 0) {
     return MODEL_CONTEXT_WINDOWS[modelName];
@@ -3938,7 +3953,7 @@ var createModelProvider = (provider, config) => {
         const originalUrl = input instanceof URL ? input.toString() : typeof input === "string" ? input : input.url;
         const parsed = new URL(originalUrl);
         const shouldRewrite = parsed.pathname.includes("/v1/responses") || parsed.pathname.includes("/chat/completions");
-        const targetUrl = shouldRewrite ? "https://chatgpt.com/backend-api/codex/responses" : originalUrl;
+        const targetUrl = shouldRewrite ? OPENAI_CODEX_RESPONSES_URL : originalUrl;
         let body = init?.body;
         if (shouldRewrite && typeof body === "string" && headers.get("Content-Type")?.includes("application/json")) {
           try {
@@ -3946,12 +3961,21 @@ var createModelProvider = (provider, config) => {
             if (typeof payload.instructions !== "string" || payload.instructions.trim() === "") {
               payload.instructions = extractSystemInstructionFromInput(payload.input) ?? OPENAI_CODEX_DEFAULT_INSTRUCTIONS;
             }
+            normalizeToolParameterSchemas(payload.tools);
             payload.store = false;
             body = JSON.stringify(payload);
           } catch {
           }
         }
-        return fetch(targetUrl, { ...init, headers, body });
+        try {
+          return await fetch(targetUrl, { ...init, headers, body });
+        } catch (error) {
+          const message = error instanceof Error ? error.message : String(error);
+          if (shouldRewrite && targetUrl.includes("chatgpt.com") && message.includes("ENOTFOUND chatgpt.com")) {
+            return fetch(originalUrl, { ...init, headers, body });
+          }
+          throw error;
+        }
       }
     });
     return (modelName) => openai(modelName);
@@ -4993,6 +5017,9 @@ var TelemetryEmitter = class {
     if (otlp) {
       await this.sendOtlp(event, otlp);
     }
+    if (event.type === "model:chunk") {
+      return;
+    }
     process.stdout.write(`[event] ${event.type} ${sanitizeEventForLog(event)}
 `);
   }
@@ -5119,8 +5146,12 @@ var ToolDispatcher = class {
 // src/harness.ts
 var now = () => Date.now();
-var FIRST_CHUNK_TIMEOUT_MS = 3e5;
-var MAX_TRANSIENT_STEP_RETRIES = 2;
+var FIRST_CHUNK_TIMEOUT_MS = 9e4;
+var MAX_TRANSIENT_STEP_RETRIES = 1;
+var COMPACTION_CHECK_INTERVAL_STEPS = 3;
+var TOOL_RESULT_ARCHIVE_PARAM = "__toolResultArchive";
+var TOOL_RESULT_TRUNCATED_PREFIX = "[TRUNCATED_TOOL_RESULT]";
+var TOOL_RESULT_PREVIEW_CHARS = 700;
 var FirstChunkTimeoutError = class extends Error {
   constructor(modelName, timeoutMs) {
     super(
@@ -5179,18 +5210,11 @@ var isRetryableModelError = (error) => {
   if (error instanceof FirstChunkTimeoutError) {
     return true;
   }
-  if (isNoOutputGeneratedError(error)) {
-    return true;
-  }
   const statusCode = getErrorStatusCode(error);
   if (typeof statusCode === "number") {
     return statusCode === 429 || statusCode >= 500;
   }
-  if (!error || typeof error !== "object") {
-    return false;
-  }
-  const maybeMessage = "message" in error ? String(error.message ?? "").toLowerCase() : "";
-  return maybeMessage.includes("internal server error") || maybeMessage.includes("service unavailable") || maybeMessage.includes("gateway timeout") || maybeMessage.includes("rate limit");
+  return false;
 };
 var toRunError = (error) => {
   const statusCode = getErrorStatusCode(error);
@@ -5242,6 +5266,58 @@ var toProviderSafeToolName = (originalName, index, used) => {
   used.add(candidate);
   return candidate;
 };
+var isToolResultRow = (value) => {
+  if (typeof value !== "object" || value === null) return false;
+  const row = value;
+  return typeof row.tool_use_id === "string" && typeof row.tool_name === "string" && typeof row.content === "string";
+};
+var readArchiveFromParameters = (parameters) => {
+  const raw = parameters?.[TOOL_RESULT_ARCHIVE_PARAM];
+  if (typeof raw !== "object" || raw === null) return {};
+  const out = {};
+  for (const [key, value] of Object.entries(raw)) {
+    if (typeof value !== "object" || value === null) continue;
+    const row = value;
+    if (typeof row.toolResultId !== "string" || typeof row.conversationId !== "string" || typeof row.toolName !== "string" || typeof row.toolCallId !== "string" || typeof row.createdAt !== "number" || typeof row.sizeBytes !== "number" || typeof row.payload !== "string") {
+      continue;
+    }
+    out[key] = {
+      toolResultId: row.toolResultId,
+      conversationId: row.conversationId,
+      toolName: row.toolName,
+      toolCallId: row.toolCallId,
+      createdAt: row.createdAt,
+      sizeBytes: row.sizeBytes,
+      payload: row.payload
+    };
+  }
+  return out;
+};
+var makeTruncatedToolResultNotice = (toolResultId, toolName, payload) => {
+  const preview = payload.slice(0, TOOL_RESULT_PREVIEW_CHARS);
+  const omittedChars = Math.max(0, payload.length - preview.length);
+  return `${TOOL_RESULT_TRUNCATED_PREFIX} id="${toolResultId}" tool="${toolName}" omittedChars=${omittedChars}
+${preview}${omittedChars > 0 ? "\n...[truncated]" : ""}`;
+};
+var hasUntruncatedToolResults = (messages) => {
+  for (const msg of messages) {
+    if (msg.role !== "tool" || typeof msg.content !== "string") continue;
+    let parsed;
+    try {
+      parsed = JSON.parse(msg.content);
+    } catch {
+      continue;
+    }
+    if (!Array.isArray(parsed)) continue;
+    for (const row of parsed) {
+      if (!isToolResultRow(row)) continue;
+      if (!row.content.startsWith(TOOL_RESULT_TRUNCATED_PREFIX)) {
+        return true;
+      }
+    }
+  }
+  return false;
+};
 var DEVELOPMENT_MODE_CONTEXT = `## Development Mode Context
 You are running locally in development mode. Treat this as an editable agent workspace.
@@ -5573,6 +5649,7 @@ var AgentHarness = class _AgentHarness {
   agentFileFingerprint = "";
   mcpBridge;
   subagentManager;
+  archivedToolResultsByConversation = /* @__PURE__ */ new Map();
   resolveToolAccess(toolName) {
     const tools = this.loadedConfig?.tools;
     if (!tools) return true;
@@ -5644,6 +5721,56 @@ var AgentHarness = class _AgentHarness {
     if (this.environment === "development" && this.isToolEnabled("poncho_docs")) {
       this.registerIfMissing(ponchoDocsTool);
     }
+    if (this.isToolEnabled("get_tool_result_by_id")) {
+      this.registerIfMissing(this.createGetToolResultByIdTool());
+    }
+  }
+  createGetToolResultByIdTool() {
+    return defineTool7({
+      name: "get_tool_result_by_id",
+      description: "Retrieve a previously archived full tool result by id for the current conversation. Use this when older tool outputs were truncated in prompt history.",
+      inputSchema: {
+        type: "object",
+        properties: {
+          toolResultId: { type: "string", description: "Archived tool result id to retrieve" },
+          offset: { type: "number", description: "Optional character offset for paging large payloads" },
+          limit: { type: "number", description: "Optional maximum characters to return (default 6000, max 20000)" }
+        },
+        required: ["toolResultId"],
+        additionalProperties: false
+      },
+      handler: async (input, context) => {
+        const conversationId = context.conversationId ?? "__default__";
+        const archive = this.archivedToolResultsByConversation.get(conversationId) ?? {};
+        const toolResultId = typeof input.toolResultId === "string" ? input.toolResultId : "";
+        const record = archive[toolResultId];
+        if (!record) {
+          console.info(
+            `[poncho][cost] Archived tool result lookup miss: id="${toolResultId}" conversation="${conversationId}"`
+          );
+          return {
+            error: `No archived tool result found for id "${toolResultId}" in this conversation.`
+          };
+        }
+        const offset = Math.max(0, Number(input.offset) || 0);
+        const limit = Math.min(Math.max(Number(input.limit) || 6e3, 1), 2e4);
+        const end = Math.min(record.payload.length, offset + limit);
+        const chunk = record.payload.slice(offset, end);
+        console.info(
+          `[poncho][cost] Archived tool result lookup hit: id="${toolResultId}" conversation="${conversationId}" offset=${offset} returned=${chunk.length} total=${record.payload.length}`
+        );
+        return {
+          toolResultId: record.toolResultId,
+          toolName: record.toolName,
+          toolCallId: record.toolCallId,
+          totalChars: record.payload.length,
+          offset,
+          returnedChars: chunk.length,
+          hasMore: end < record.payload.length,
+          payload: chunk
+        };
+      }
+    });
   }
   shouldEnableWriteTool() {
     const override = process.env.PONCHO_FS_WRITE?.toLowerCase();
@@ -5668,6 +5795,116 @@ var AgentHarness = class _AgentHarness {
   get frontmatter() {
     return this.parsedAgent?.frontmatter;
   }
+  getToolResultArchive(conversationId) {
+    const archive = this.archivedToolResultsByConversation.get(conversationId);
+    return archive ? { ...archive } : {};
+  }
+  seedToolResultArchive(conversationId, parameters) {
+    const seeded = readArchiveFromParameters(parameters);
+    const existing = this.archivedToolResultsByConversation.get(conversationId) ?? {};
+    const merged = { ...existing, ...seeded };
+    this.archivedToolResultsByConversation.set(conversationId, merged);
+    return merged;
+  }
+  truncateHistoricalToolResults(messages, conversationId) {
+    let latestRunId;
+    for (let i = messages.length - 1; i >= 0; i -= 1) {
+      const msg = messages[i];
+      const meta = msg.metadata;
+      const runId = typeof meta?.runId === "string" ? meta.runId : void 0;
+      if (runId) {
+        latestRunId = runId;
+        break;
+      }
+    }
+    if (!latestRunId) {
+      return { changed: false, truncatedCount: 0, archivedCount: 0, omittedChars: 0 };
+    }
+    const archive = this.archivedToolResultsByConversation.get(conversationId) ?? {};
+    this.archivedToolResultsByConversation.set(conversationId, archive);
+    let changed = false;
+    let truncatedCount = 0;
+    let archivedCount = 0;
+    let omittedChars = 0;
+    for (const msg of messages) {
+      if (msg.role !== "tool" || typeof msg.content !== "string") continue;
+      const meta = msg.metadata;
+      const runId = typeof meta?.runId === "string" ? meta.runId : void 0;
+      if (runId === latestRunId) continue;
+      let parsed;
+      try {
+        parsed = JSON.parse(msg.content);
+      } catch {
+        continue;
+      }
+      if (!Array.isArray(parsed)) continue;
+      let rowChanged = false;
+      const nextRows = parsed.map((row) => {
+        if (!isToolResultRow(row)) return row;
+        if (row.content.startsWith(TOOL_RESULT_TRUNCATED_PREFIX)) return row;
+        if (this.shouldPreserveSkillToolResult(row)) return row;
+        const toolResultId = row.tool_use_id;
+        if (!archive[toolResultId]) {
+          archive[toolResultId] = {
+            toolResultId,
+            conversationId,
+            toolName: row.tool_name,
+            toolCallId: row.tool_use_id,
+            createdAt: now(),
+            sizeBytes: Buffer.byteLength(row.content, "utf8"),
+            payload: row.content
+          };
+          archivedCount += 1;
+        }
+        const omitted = Math.max(0, row.content.length - TOOL_RESULT_PREVIEW_CHARS);
+        omittedChars += omitted;
+        truncatedCount += 1;
+        rowChanged = true;
+        return {
+          ...row,
+          content: makeTruncatedToolResultNotice(toolResultId, row.tool_name, row.content)
+        };
+      });
+      if (rowChanged) {
+        msg.content = JSON.stringify(nextRows);
+        if (msg.metadata && typeof msg.metadata === "object") {
+          const meta2 = msg.metadata;
+          if ("_richToolResults" in meta2) {
+            delete meta2._richToolResults;
+          }
+        }
+        changed = true;
+      }
+    }
+    return { changed, truncatedCount, archivedCount, omittedChars };
+  }
+  shouldPreserveSkillToolResult(row) {
+    if (row.tool_name.startsWith("todo_")) {
+      return true;
+    }
+    if (row.tool_name !== "activate_skill" && row.tool_name !== "deactivate_skill") {
+      return false;
+    }
+    const content = row.content.trim();
+    if (content.startsWith("Tool error:")) {
+      return false;
+    }
+    try {
+      const parsed = JSON.parse(content);
+      const skill = typeof parsed.skill === "string" ? parsed.skill : void 0;
+      if (skill && this.activeSkillNames.has(skill)) {
+        return true;
+      }
+      const activeSkills = Array.isArray(parsed.activeSkills) ? parsed.activeSkills.filter((v) => typeof v === "string") : [];
+      for (const name of activeSkills) {
+        if (this.activeSkillNames.has(name)) {
+          return true;
+        }
+      }
+    } catch {
+    }
+    return false;
+  }
   async getTodos(conversationId) {
     if (!this.todoStore) return [];
     return this.todoStore.get(conversationId);
@@ -6336,12 +6573,31 @@ var AgentHarness = class _AgentHarness {
     let agent = this.parsedAgent;
     const runId = `run_${randomUUID3()}`;
     const start = now();
-    const maxSteps = agent.frontmatter.limits?.maxSteps ?? 50;
+    const maxSteps = agent.frontmatter.limits?.maxSteps ?? 20;
     const configuredTimeout = agent.frontmatter.limits?.timeout;
     const timeoutMs = this.environment === "development" && configuredTimeout == null ? 0 : (configuredTimeout ?? 300) * 1e3;
     const platformMaxDurationSec = Number(process.env.PONCHO_MAX_DURATION) || 0;
     const softDeadlineMs = input.disableSoftDeadline || platformMaxDurationSec <= 0 ? 0 : platformMaxDurationSec * 800;
     const messages = [...input.messages ?? []];
+    const conversationId = input.conversationId ?? "__default__";
+    this.seedToolResultArchive(conversationId, input.parameters);
+    const truncationSummary = this.truncateHistoricalToolResults(messages, conversationId);
+    if (truncationSummary.changed) {
+      console.info(
+        `[poncho][cost] Truncated ${truncationSummary.truncatedCount} historical tool result(s) (archived_new=${truncationSummary.archivedCount}, omitted_chars=${truncationSummary.omittedChars}) for conversation="${conversationId}"`
+      );
+    }
+    const hasFullToolResults = hasUntruncatedToolResults(messages);
+    const enablePromptCache = !hasFullToolResults;
+    if (!enablePromptCache) {
+      console.info(
+        `[poncho][cost] Prompt cache write disabled for run "${runId}" (untruncated tool results present in history).`
+      );
+    } else {
+      console.info(
+        `[poncho][cost] Prompt cache write enabled for run "${runId}" (history has no untruncated tool results).`
+      );
+    }
     const inputMessageCount = messages.length;
     const events = [];
     const renderCurrentAgentPrompt = () => renderAgentPrompt(this.parsedAgent, {
@@ -6421,7 +6677,6 @@ ${this.skillFingerprint}`;
     const browserEventQueue = [];
     const browserCleanups = [];
     const browserSession = this._browserSession;
-    const conversationId = input.conversationId ?? "__default__";
     if (browserSession) {
       browserCleanups.push(
         browserSession.onFrame(conversationId, (frame) => {
@@ -6488,6 +6743,7 @@ ${this.skillFingerprint}`;
     let totalInputTokens = 0;
     let totalOutputTokens = 0;
     let totalCachedTokens = 0;
+    let totalCacheWriteTokens = 0;
     let transientStepRetryCount = 0;
     let latestContextTokens = 0;
     let toolOutputEstimateSinceModel = 0;
@@ -6516,7 +6772,12 @@ ${this.skillFingerprint}`;
             status: "completed",
             response: responseText,
             steps: step - 1,
-            tokens: { input: totalInputTokens, output: totalOutputTokens, cached: totalCachedTokens },
+            tokens: {
+              input: totalInputTokens,
+              output: totalOutputTokens,
+              cached: totalCachedTokens,
+              cacheWrite: totalCacheWriteTokens
+            },
             duration: now() - start,
             continuation: true,
             continuationMessages: [...messages],
@@ -6529,7 +6790,6 @@ ${this.skillFingerprint}`;
         }
         const stepStart = now();
         yield pushEvent({ type: "step:started", step });
-        yield pushEvent({ type: "model:request", tokens: 0 });
         const dispatcherTools = this.dispatcher.list();
         const exposedToolNames = /* @__PURE__ */ new Map();
         const usedProviderToolNames = /* @__PURE__ */ new Set();
@@ -6548,6 +6808,15 @@ ${this.skillFingerprint}`;
             inputSchema: jsonSchemaToZod(tool.inputSchema)
           };
         }
+        const toolDefsJsonForEstimate = JSON.stringify(
+          dispatcherTools.map((t) => ({
+            name: t.name,
+            description: t.description,
+            inputSchema: t.inputSchema
+          }))
+        );
+        const requestTokenEstimate = estimateTotalTokens(integrityPrompt, messages, toolDefsJsonForEstimate);
+        yield pushEvent({ type: "model:request", tokens: requestTokenEstimate });
         const convertMessage = async (msg) => {
           if (msg.role === "tool") {
             const meta = msg.metadata;
@@ -6737,15 +7006,8 @@ ${textContent}` };
         }
         const modelInstance = this.modelProvider(modelName);
         const compactionConfig = resolveCompactionConfig(agent.frontmatter.compaction);
-        if (compactionConfig.enabled && step === 1) {
-          const toolDefsJson = JSON.stringify(
-            dispatcherTools.map((t) => ({
-              name: t.name,
-              description: t.description,
-              inputSchema: t.inputSchema
-            }))
-          );
-          const estimated = estimateTotalTokens(integrityPrompt, messages, toolDefsJson);
+        if (compactionConfig.enabled && (step === 1 || step % COMPACTION_CHECK_INTERVAL_STEPS === 0)) {
+          const estimated = estimateTotalTokens(integrityPrompt, messages, toolDefsJsonForEstimate);
           const lastReportedInput = totalInputTokens > 0 ? totalInputTokens : 0;
           const effectiveTokens = Math.max(estimated, lastReportedInput);
           if (effectiveTokens > compactionConfig.trigger * contextWindow) {
@@ -6758,11 +7020,14 @@ ${textContent}` };
             if (compactResult.compacted) {
               messages.length = 0;
               messages.push(...compactResult.messages);
-              const emittedMessages = [...compactResult.messages];
-              if (emittedMessages.length > 0 && emittedMessages[emittedMessages.length - 1].role === "user") {
-                emittedMessages.pop();
+              let emittedMessages;
+              if (step === 1) {
+                emittedMessages = [...compactResult.messages];
+                if (emittedMessages.length > 0 && emittedMessages[emittedMessages.length - 1].role === "user") {
+                  emittedMessages.pop();
+                }
               }
-              const tokensAfterCompaction = estimateTotalTokens(integrityPrompt, messages, toolDefsJson);
+              const tokensAfterCompaction = estimateTotalTokens(integrityPrompt, messages, toolDefsJsonForEstimate);
               latestContextTokens = tokensAfterCompaction;
               toolOutputEstimateSinceModel = 0;
               yield pushEvent({
@@ -6789,7 +7054,7 @@ ${textContent}` };
         const coreMessages = cachedCoreMessages;
         const temperature = agent.frontmatter.model?.temperature ?? 0.2;
         const maxTokens = agent.frontmatter.model?.maxTokens;
-        const cachedMessages = addPromptCacheBreakpoints(coreMessages, modelInstance);
+        const cachedMessages = enablePromptCache ? addPromptCacheBreakpoints(coreMessages, modelInstance) : coreMessages;
         const telemetryEnabled = this.loadedConfig?.telemetry?.enabled !== false;
         const result = await streamText({
           model: modelInstance,
@@ -6907,7 +7172,12 @@ ${textContent}` };
             status: "completed",
             response: responseText + fullText,
             steps: step,
-            tokens: { input: totalInputTokens, output: totalOutputTokens, cached: totalCachedTokens },
+            tokens: {
+              input: totalInputTokens,
+              output: totalOutputTokens,
+              cached: totalCachedTokens,
+              cacheWrite: totalCacheWriteTokens
+            },
             duration: now() - start,
             continuation: true,
             continuationMessages: [...messages],
@@ -6935,7 +7205,12 @@ ${textContent}` };
             status: "completed",
             response: responseText + fullText,
             steps: step,
-            tokens: { input: totalInputTokens, output: totalOutputTokens, cached: totalCachedTokens },
+            tokens: {
+              input: totalInputTokens,
+              output: totalOutputTokens,
+              cached: totalCachedTokens,
+              cacheWrite: totalCacheWriteTokens
+            },
             duration: now() - start,
             continuation: true,
             continuationMessages: [...messages],
@@ -6975,11 +7250,14 @@ ${textContent}` };
         const fullResult = await result.response;
         const usage = await result.usage;
         const toolCallsResult = await result.toolCalls;
-        const stepCachedTokens = usage.inputTokenDetails?.cacheReadTokens ?? 0;
+        const details = usage.inputTokenDetails ?? {};
+        const stepCachedTokens = typeof details.cacheReadTokens === "number" ? details.cacheReadTokens : 0;
+        const stepCacheWriteTokens = typeof details.cacheWriteTokens === "number" ? details.cacheWriteTokens : typeof details.cacheCreationTokens === "number" ? details.cacheCreationTokens : typeof details.cacheCreationInputTokens === "number" ? details.cacheCreationInputTokens : 0;
         const stepInputTokens = usage.inputTokens ?? 0;
         totalInputTokens += stepInputTokens;
         totalOutputTokens += usage.outputTokens ?? 0;
         totalCachedTokens += stepCachedTokens;
+        totalCacheWriteTokens += stepCacheWriteTokens;
         latestContextTokens = stepInputTokens;
         toolOutputEstimateSinceModel = 0;
         yield pushEvent({
@@ -6987,9 +7265,13 @@ ${textContent}` };
           usage: {
             input: stepInputTokens,
             output: usage.outputTokens ?? 0,
-            cached: stepCachedTokens
+            cached: stepCachedTokens,
+            cacheWrite: stepCacheWriteTokens
           }
         });
+        console.info(
+          `[poncho][cost] model="${modelName}" step=${step} input=${stepInputTokens} output=${usage.outputTokens ?? 0} cached=${stepCachedTokens} cacheWrite=${stepCacheWriteTokens} totals(input=${totalInputTokens}, output=${totalOutputTokens}, cached=${totalCachedTokens}, cacheWrite=${totalCacheWriteTokens})`
+        );
         const toolCalls = toolCallsResult.map((tc) => ({
           id: tc.toolCallId,
           name: tc.toolName,
@@ -7036,7 +7318,8 @@ ${textContent}` };
             tokens: {
               input: totalInputTokens,
               output: totalOutputTokens,
-              cached: totalCachedTokens
+              cached: totalCachedTokens,
+              cacheWrite: totalCacheWriteTokens
             },
             duration: now() - start,
             contextTokens: latestContextTokens + toolOutputEstimateSinceModel,
@@ -7193,7 +7476,12 @@ ${textContent}` };
             status: "completed",
             response: responseText + fullText,
             steps: step,
-            tokens: { input: totalInputTokens, output: totalOutputTokens, cached: totalCachedTokens },
+            tokens: {
+              input: totalInputTokens,
+              output: totalOutputTokens,
+              cached: totalCachedTokens,
+              cacheWrite: totalCacheWriteTokens
+            },
             duration: now() - start,
             continuation: true,
             continuationMessages: [...messages],
@@ -7224,6 +7512,20 @@ ${textContent}` };
               tool_name: result2.tool,
               content: `Tool error: ${result2.error}`
             });
+            {
+              const archive = this.archivedToolResultsByConversation.get(conversationId);
+              if (archive) {
+                archive[result2.callId] = {
+                  toolResultId: result2.callId,
+                  conversationId,
+                  toolName: result2.tool,
+                  toolCallId: result2.callId,
+                  createdAt: now(),
+                  sizeBytes: Buffer.byteLength(`Tool error: ${result2.error}`, "utf8"),
+                  payload: `Tool error: ${result2.error}`
+                };
+              }
+            }
             richToolResults.push({
               type: "tool-result",
               toolCallId: result2.callId,
@@ -7249,6 +7551,21 @@ ${textContent}` };
               tool_name: result2.tool,
               content: JSON.stringify(strippedOutput ?? null)
             });
+            {
+              const archive = this.archivedToolResultsByConversation.get(conversationId);
+              if (archive) {
+                const payload = JSON.stringify(result2.output ?? null);
+                archive[result2.callId] = {
+                  toolResultId: result2.callId,
+                  conversationId,
+                  toolName: result2.tool,
+                  toolCallId: result2.callId,
+                  createdAt: now(),
+                  sizeBytes: Buffer.byteLength(payload, "utf8"),
+                  payload
+                };
+              }
+            }
             if (mediaItems.length > 0) {
               richToolResults.push({
                 type: "tool-result",
@@ -7283,9 +7600,15 @@ ${textContent}` };
         messages.push({
           role: "assistant",
           content: assistantContent,
-          metadata: { timestamp: now(), id: randomUUID3(), step }
+          metadata: { timestamp: now(), id: randomUUID3(), step, runId }
         });
-        const toolMsgMeta = { timestamp: now(), id: randomUUID3(), step, _richToolResults: richToolResults };
+        const toolMsgMeta = {
+          timestamp: now(),
+          id: randomUUID3(),
+          step,
+          runId,
+          _richToolResults: richToolResults
+        };
         messages.push({
           role: "tool",
           content: JSON.stringify(toolResultsForModel),
@@ -7296,7 +7619,12 @@ ${textContent}` };
             status: "completed",
             response: responseText + fullText,
             steps: step,
-            tokens: { input: totalInputTokens, output: totalOutputTokens, cached: totalCachedTokens },
+            tokens: {
+              input: totalInputTokens,
+              output: totalOutputTokens,
+              cached: totalCachedTokens,
+              cacheWrite: totalCacheWriteTokens
+            },
             duration: now() - start,
             continuation: true,
             continuationMessages: [...messages],
@@ -7357,7 +7685,12 @@ ${this.skillFingerprint}`;
         status: "completed",
         response: responseText,
         steps: maxSteps,
-        tokens: { input: totalInputTokens, output: totalOutputTokens, cached: totalCachedTokens },
+        tokens: {
+          input: totalInputTokens,
+          output: totalOutputTokens,
+          cached: totalCachedTokens,
+          cacheWrite: totalCacheWriteTokens
+        },
         duration: now() - start,
         continuation: true,
         continuationMessages: [...messages],
@@ -8677,7 +9010,7 @@ var createConversationStore = (config, options) => {
 };
 // src/index.ts
-import { defineTool as defineTool7 } from "@poncho-ai/sdk";
+import { defineTool as defineTool8 } from "@poncho-ai/sdk";
 export {
   AgentHarness,
   InMemoryConversationStore,
@@ -8710,7 +9043,7 @@ export {
   createSubagentTools,
   createUploadStore,
   createWriteTool,
-  defineTool7 as defineTool,
+  defineTool8 as defineTool,
   deleteOpenAICodexSession,
   deriveUploadKey,
   ensureAgentIdentity,