npm - @poncho-ai/harness - Versions diffs - 0.34.0 → 0.34.1 - Mend

@poncho-ai/harness 0.34.0 → 0.34.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/.turbo/turbo-build.log CHANGED Viewed

@@ -1,5 +1,5 @@
-> @poncho-ai/harness@0.34.0 build /home/runner/work/poncho-ai/poncho-ai/packages/harness
+> @poncho-ai/harness@0.34.1 build /home/runner/work/poncho-ai/poncho-ai/packages/harness
 > node scripts/embed-docs.js && tsup src/index.ts --format esm --dts
 [embed-docs] Generated poncho-docs.ts with 4 topics
@@ -8,8 +8,8 @@
 [34mCLI[39m tsup v8.5.1
 [34mCLI[39m Target: es2022
 [34mESM[39m Build start
-[32mESM[39m [1mdist/index.js [22m[32m336.31 KB[39m
-[32mESM[39m ⚡️ Build success in 164ms
+[32mESM[39m [1mdist/index.js [22m[32m336.65 KB[39m
+[32mESM[39m ⚡️ Build success in 155ms
 [34mDTS[39m Build start
-[32mDTS[39m ⚡️ Build success in 7760ms
-[32mDTS[39m [1mdist/index.d.ts [22m[32m33.99 KB[39m
+[32mDTS[39m ⚡️ Build success in 7364ms
+[32mDTS[39m [1mdist/index.d.ts [22m[32m34.28 KB[39m

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,14 @@
 # @poncho-ai/harness
+## 0.34.1
+### Patch Changes
+- [`59a88cc`](https://github.com/cesr/poncho-ai/commit/59a88cc52b5c3aa7432b820424bb8067174233e5) Thanks [@cesr](https://github.com/cesr)! - fix: improve token estimation accuracy and handle missing attachments
+  - Use a JSON-specific token ratio for tool definitions to avoid inflating counts with many MCP tools.
+  - Track actual context size from model responses for compaction triggers instead of cumulative input tokens.
+  - Gracefully degrade when file attachments are missing or expired instead of crashing.
 ## 0.34.0
 ### Minor Changes

package/dist/index.d.ts CHANGED Viewed

@@ -83,6 +83,11 @@ declare const resolveCompactionConfig: (explicit?: Partial<CompactionConfig>) =>
 declare const estimateTokens: (text: string) => number;
 /**
  * Estimate the total token count of a system prompt + messages + tool defs.
+ *
+ * Tool definitions are structured JSON (property names, braces, enum values)
+ * which tokenizes more efficiently than natural language — roughly 5-6
+ * chars/token vs ~4 chars/token for prose.  We estimate them separately to
+ * avoid inflating the count when there are many MCP tools (100+).
  */
 declare const estimateTotalTokens: (systemPrompt: string, messages: Message[], toolDefinitionsJson?: string) => number;
 /**

package/dist/index.js CHANGED Viewed

@@ -397,10 +397,11 @@ var estimateTotalTokens = (systemPrompt, messages, toolDefinitionsJson) => {
       return sum + 200;
     }, 0);
   }
+  let tokens = Math.ceil(chars / 4 * OVERHEAD_MULTIPLIER);
   if (toolDefinitionsJson) {
-    chars += toolDefinitionsJson.length;
+    tokens += Math.ceil(toolDefinitionsJson.length / 6);
   }
-  return Math.ceil(chars / 4 * OVERHEAD_MULTIPLIER);
+  return tokens;
 };
 var findSafeSplitPoint = (messages, keepRecentMessages) => {
   const candidateIdx = messages.length - keepRecentMessages;
@@ -7289,19 +7290,24 @@ ${textContent}` };
                   };
                 }
                 let resolvedData;
-                if (part.data.startsWith(PONCHO_UPLOAD_SCHEME) && this.uploadStore) {
-                  const buf = await this.uploadStore.get(part.data);
-                  resolvedData = buf.toString("base64");
-                } else if (part.data.startsWith("https://") || part.data.startsWith("http://")) {
-                  if (this.uploadStore) {
+                try {
+                  if (part.data.startsWith(PONCHO_UPLOAD_SCHEME) && this.uploadStore) {
                     const buf = await this.uploadStore.get(part.data);
                     resolvedData = buf.toString("base64");
+                  } else if (part.data.startsWith("https://") || part.data.startsWith("http://")) {
+                    if (this.uploadStore) {
+                      const buf = await this.uploadStore.get(part.data);
+                      resolvedData = buf.toString("base64");
+                    } else {
+                      const resp = await fetch(part.data);
+                      resolvedData = Buffer.from(await resp.arrayBuffer()).toString("base64");
+                    }
                   } else {
-                    const resp = await fetch(part.data);
-                    resolvedData = Buffer.from(await resp.arrayBuffer()).toString("base64");
+                    resolvedData = part.data;
                   }
-                } else {
-                  resolvedData = part.data;
+                } catch {
+                  const label = part.filename ?? part.mediaType;
+                  return { type: "text", text: `[Attached file: ${label} \u2014 file is no longer available]` };
                 }
                 if (isSupportedImage) {
                   return {
@@ -7330,8 +7336,8 @@ ${textContent}` };
         const compactionConfig = resolveCompactionConfig(agent.frontmatter.compaction);
         if (compactionConfig.enabled && (step === 1 || step % COMPACTION_CHECK_INTERVAL_STEPS === 0)) {
           const estimated = estimateTotalTokens(systemPrompt, messages, toolDefsJsonForEstimate);
-          const lastReportedInput = totalInputTokens > 0 ? totalInputTokens : 0;
-          const effectiveTokens = Math.max(estimated, lastReportedInput);
+          const lastReportedContext = latestContextTokens > 0 ? latestContextTokens + toolOutputEstimateSinceModel : 0;
+          const effectiveTokens = Math.max(estimated, lastReportedContext);
           if (effectiveTokens > compactionConfig.trigger * contextWindow) {
             yield pushEvent({ type: "compaction:started", estimatedTokens: effectiveTokens });
             const compactResult = await compactMessages(

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@poncho-ai/harness",
-  "version": "0.34.0",
+  "version": "0.34.1",
   "description": "Agent execution runtime - conversation loop, tool dispatch, streaming",
   "repository": {
     "type": "git",

package/src/compaction.ts CHANGED Viewed

@@ -49,6 +49,11 @@ export const estimateTokens = (text: string): number =>
 /**
  * Estimate the total token count of a system prompt + messages + tool defs.
+ *
+ * Tool definitions are structured JSON (property names, braces, enum values)
+ * which tokenizes more efficiently than natural language — roughly 5-6
+ * chars/token vs ~4 chars/token for prose.  We estimate them separately to
+ * avoid inflating the count when there are many MCP tools (100+).
  */
 export const estimateTotalTokens = (
   systemPrompt: string,
@@ -64,10 +69,13 @@ export const estimateTotalTokens = (
           return sum + 200; // rough estimate for file/image references
         }, 0);
   }
+  let tokens = Math.ceil((chars / 4) * OVERHEAD_MULTIPLIER);
   if (toolDefinitionsJson) {
-    chars += toolDefinitionsJson.length;
+    // JSON-specific ratio — no overhead multiplier (structural tokens are
+    // already accounted for by the higher chars-per-token ratio).
+    tokens += Math.ceil(toolDefinitionsJson.length / 6);
   }
-  return Math.ceil((chars / 4) * OVERHEAD_MULTIPLIER);
+  return tokens;
 };
 /**

package/src/harness.ts CHANGED Viewed

@@ -2154,19 +2154,24 @@ ${boundedMainMemory.trim()}`
                 // Always resolve to base64 so the model doesn't need to
                 // fetch URLs itself (which fails for private blob stores).
                 let resolvedData: string;
-                if (part.data.startsWith(PONCHO_UPLOAD_SCHEME) && this.uploadStore) {
-                  const buf = await this.uploadStore.get(part.data);
-                  resolvedData = buf.toString("base64");
-                } else if (part.data.startsWith("https://") || part.data.startsWith("http://")) {
-                  if (this.uploadStore) {
+                try {
+                  if (part.data.startsWith(PONCHO_UPLOAD_SCHEME) && this.uploadStore) {
                     const buf = await this.uploadStore.get(part.data);
                     resolvedData = buf.toString("base64");
+                  } else if (part.data.startsWith("https://") || part.data.startsWith("http://")) {
+                    if (this.uploadStore) {
+                      const buf = await this.uploadStore.get(part.data);
+                      resolvedData = buf.toString("base64");
+                    } else {
+                      const resp = await fetch(part.data);
+                      resolvedData = Buffer.from(await resp.arrayBuffer()).toString("base64");
+                    }
                   } else {
-                    const resp = await fetch(part.data);
-                    resolvedData = Buffer.from(await resp.arrayBuffer()).toString("base64");
+                    resolvedData = part.data;
                   }
-                } else {
-                  resolvedData = part.data;
+                } catch {
+                  const label = part.filename ?? part.mediaType;
+                  return { type: "text" as const, text: `[Attached file: ${label} — file is no longer available]` };
                 }
                 if (isSupportedImage) {
                   return {
@@ -2200,8 +2205,13 @@ ${boundedMainMemory.trim()}`
         const compactionConfig = resolveCompactionConfig(agent.frontmatter.compaction);
         if (compactionConfig.enabled && (step === 1 || step % COMPACTION_CHECK_INTERVAL_STEPS === 0)) {
           const estimated = estimateTotalTokens(systemPrompt, messages, toolDefsJsonForEstimate);
-          const lastReportedInput = totalInputTokens > 0 ? totalInputTokens : 0;
-          const effectiveTokens = Math.max(estimated, lastReportedInput);
+          // Use the actual context size from the last model response (input tokens
+          // + tool output accumulated since), not totalInputTokens which is a
+          // cumulative sum across all steps and would wildly overcount.
+          const lastReportedContext = latestContextTokens > 0
+            ? latestContextTokens + toolOutputEstimateSinceModel
+            : 0;
+          const effectiveTokens = Math.max(estimated, lastReportedContext);
           if (effectiveTokens > compactionConfig.trigger * contextWindow) {
             yield pushEvent({ type: "compaction:started", estimatedTokens: effectiveTokens });