@poncho-ai/harness 0.34.0 → 0.34.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
 
2
- > @poncho-ai/harness@0.34.0 build /home/runner/work/poncho-ai/poncho-ai/packages/harness
2
+ > @poncho-ai/harness@0.34.1 build /home/runner/work/poncho-ai/poncho-ai/packages/harness
3
3
  > node scripts/embed-docs.js && tsup src/index.ts --format esm --dts
4
4
 
5
5
  [embed-docs] Generated poncho-docs.ts with 4 topics
@@ -8,8 +8,8 @@
8
8
  CLI tsup v8.5.1
9
9
  CLI Target: es2022
10
10
  ESM Build start
11
- ESM dist/index.js 336.31 KB
12
- ESM ⚡️ Build success in 164ms
11
+ ESM dist/index.js 336.65 KB
12
+ ESM ⚡️ Build success in 155ms
13
13
  DTS Build start
14
- DTS ⚡️ Build success in 7760ms
15
- DTS dist/index.d.ts 33.99 KB
14
+ DTS ⚡️ Build success in 7364ms
15
+ DTS dist/index.d.ts 34.28 KB
package/CHANGELOG.md CHANGED
@@ -1,5 +1,14 @@
1
1
  # @poncho-ai/harness
2
2
 
3
+ ## 0.34.1
4
+
5
+ ### Patch Changes
6
+
7
+ - [`59a88cc`](https://github.com/cesr/poncho-ai/commit/59a88cc52b5c3aa7432b820424bb8067174233e5) Thanks [@cesr](https://github.com/cesr)! - fix: improve token estimation accuracy and handle missing attachments
8
+ - Use a JSON-specific token ratio for tool definitions to avoid inflating counts with many MCP tools.
9
+ - Track actual context size from model responses for compaction triggers instead of cumulative input tokens.
10
+ - Gracefully degrade when file attachments are missing or expired instead of crashing.
11
+
3
12
  ## 0.34.0
4
13
 
5
14
  ### Minor Changes
package/dist/index.d.ts CHANGED
@@ -83,6 +83,11 @@ declare const resolveCompactionConfig: (explicit?: Partial<CompactionConfig>) =>
83
83
  declare const estimateTokens: (text: string) => number;
84
84
  /**
85
85
  * Estimate the total token count of a system prompt + messages + tool defs.
86
+ *
87
+ * Tool definitions are structured JSON (property names, braces, enum values)
88
+ * which tokenizes more efficiently than natural language — roughly 5-6
89
+ * chars/token vs ~4 chars/token for prose. We estimate them separately to
90
+ * avoid inflating the count when there are many MCP tools (100+).
86
91
  */
87
92
  declare const estimateTotalTokens: (systemPrompt: string, messages: Message[], toolDefinitionsJson?: string) => number;
88
93
  /**
package/dist/index.js CHANGED
@@ -397,10 +397,11 @@ var estimateTotalTokens = (systemPrompt, messages, toolDefinitionsJson) => {
397
397
  return sum + 200;
398
398
  }, 0);
399
399
  }
400
+ let tokens = Math.ceil(chars / 4 * OVERHEAD_MULTIPLIER);
400
401
  if (toolDefinitionsJson) {
401
- chars += toolDefinitionsJson.length;
402
+ tokens += Math.ceil(toolDefinitionsJson.length / 6);
402
403
  }
403
- return Math.ceil(chars / 4 * OVERHEAD_MULTIPLIER);
404
+ return tokens;
404
405
  };
405
406
  var findSafeSplitPoint = (messages, keepRecentMessages) => {
406
407
  const candidateIdx = messages.length - keepRecentMessages;
@@ -7289,19 +7290,24 @@ ${textContent}` };
7289
7290
  };
7290
7291
  }
7291
7292
  let resolvedData;
7292
- if (part.data.startsWith(PONCHO_UPLOAD_SCHEME) && this.uploadStore) {
7293
- const buf = await this.uploadStore.get(part.data);
7294
- resolvedData = buf.toString("base64");
7295
- } else if (part.data.startsWith("https://") || part.data.startsWith("http://")) {
7296
- if (this.uploadStore) {
7293
+ try {
7294
+ if (part.data.startsWith(PONCHO_UPLOAD_SCHEME) && this.uploadStore) {
7297
7295
  const buf = await this.uploadStore.get(part.data);
7298
7296
  resolvedData = buf.toString("base64");
7297
+ } else if (part.data.startsWith("https://") || part.data.startsWith("http://")) {
7298
+ if (this.uploadStore) {
7299
+ const buf = await this.uploadStore.get(part.data);
7300
+ resolvedData = buf.toString("base64");
7301
+ } else {
7302
+ const resp = await fetch(part.data);
7303
+ resolvedData = Buffer.from(await resp.arrayBuffer()).toString("base64");
7304
+ }
7299
7305
  } else {
7300
- const resp = await fetch(part.data);
7301
- resolvedData = Buffer.from(await resp.arrayBuffer()).toString("base64");
7306
+ resolvedData = part.data;
7302
7307
  }
7303
- } else {
7304
- resolvedData = part.data;
7308
+ } catch {
7309
+ const label = part.filename ?? part.mediaType;
7310
+ return { type: "text", text: `[Attached file: ${label} \u2014 file is no longer available]` };
7305
7311
  }
7306
7312
  if (isSupportedImage) {
7307
7313
  return {
@@ -7330,8 +7336,8 @@ ${textContent}` };
7330
7336
  const compactionConfig = resolveCompactionConfig(agent.frontmatter.compaction);
7331
7337
  if (compactionConfig.enabled && (step === 1 || step % COMPACTION_CHECK_INTERVAL_STEPS === 0)) {
7332
7338
  const estimated = estimateTotalTokens(systemPrompt, messages, toolDefsJsonForEstimate);
7333
- const lastReportedInput = totalInputTokens > 0 ? totalInputTokens : 0;
7334
- const effectiveTokens = Math.max(estimated, lastReportedInput);
7339
+ const lastReportedContext = latestContextTokens > 0 ? latestContextTokens + toolOutputEstimateSinceModel : 0;
7340
+ const effectiveTokens = Math.max(estimated, lastReportedContext);
7335
7341
  if (effectiveTokens > compactionConfig.trigger * contextWindow) {
7336
7342
  yield pushEvent({ type: "compaction:started", estimatedTokens: effectiveTokens });
7337
7343
  const compactResult = await compactMessages(
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@poncho-ai/harness",
3
- "version": "0.34.0",
3
+ "version": "0.34.1",
4
4
  "description": "Agent execution runtime - conversation loop, tool dispatch, streaming",
5
5
  "repository": {
6
6
  "type": "git",
package/src/compaction.ts CHANGED
@@ -49,6 +49,11 @@ export const estimateTokens = (text: string): number =>
49
49
 
50
50
  /**
51
51
  * Estimate the total token count of a system prompt + messages + tool defs.
52
+ *
53
+ * Tool definitions are structured JSON (property names, braces, enum values)
54
+ * which tokenizes more efficiently than natural language — roughly 5-6
55
+ * chars/token vs ~4 chars/token for prose. We estimate them separately to
56
+ * avoid inflating the count when there are many MCP tools (100+).
52
57
  */
53
58
  export const estimateTotalTokens = (
54
59
  systemPrompt: string,
@@ -64,10 +69,13 @@ export const estimateTotalTokens = (
64
69
  return sum + 200; // rough estimate for file/image references
65
70
  }, 0);
66
71
  }
72
+ let tokens = Math.ceil((chars / 4) * OVERHEAD_MULTIPLIER);
67
73
  if (toolDefinitionsJson) {
68
- chars += toolDefinitionsJson.length;
74
+ // JSON-specific ratio — no overhead multiplier (structural tokens are
75
+ // already accounted for by the higher chars-per-token ratio).
76
+ tokens += Math.ceil(toolDefinitionsJson.length / 6);
69
77
  }
70
- return Math.ceil((chars / 4) * OVERHEAD_MULTIPLIER);
78
+ return tokens;
71
79
  };
72
80
 
73
81
  /**
package/src/harness.ts CHANGED
@@ -2154,19 +2154,24 @@ ${boundedMainMemory.trim()}`
2154
2154
  // Always resolve to base64 so the model doesn't need to
2155
2155
  // fetch URLs itself (which fails for private blob stores).
2156
2156
  let resolvedData: string;
2157
- if (part.data.startsWith(PONCHO_UPLOAD_SCHEME) && this.uploadStore) {
2158
- const buf = await this.uploadStore.get(part.data);
2159
- resolvedData = buf.toString("base64");
2160
- } else if (part.data.startsWith("https://") || part.data.startsWith("http://")) {
2161
- if (this.uploadStore) {
2157
+ try {
2158
+ if (part.data.startsWith(PONCHO_UPLOAD_SCHEME) && this.uploadStore) {
2162
2159
  const buf = await this.uploadStore.get(part.data);
2163
2160
  resolvedData = buf.toString("base64");
2161
+ } else if (part.data.startsWith("https://") || part.data.startsWith("http://")) {
2162
+ if (this.uploadStore) {
2163
+ const buf = await this.uploadStore.get(part.data);
2164
+ resolvedData = buf.toString("base64");
2165
+ } else {
2166
+ const resp = await fetch(part.data);
2167
+ resolvedData = Buffer.from(await resp.arrayBuffer()).toString("base64");
2168
+ }
2164
2169
  } else {
2165
- const resp = await fetch(part.data);
2166
- resolvedData = Buffer.from(await resp.arrayBuffer()).toString("base64");
2170
+ resolvedData = part.data;
2167
2171
  }
2168
- } else {
2169
- resolvedData = part.data;
2172
+ } catch {
2173
+ const label = part.filename ?? part.mediaType;
2174
+ return { type: "text" as const, text: `[Attached file: ${label} — file is no longer available]` };
2170
2175
  }
2171
2176
  if (isSupportedImage) {
2172
2177
  return {
@@ -2200,8 +2205,13 @@ ${boundedMainMemory.trim()}`
2200
2205
  const compactionConfig = resolveCompactionConfig(agent.frontmatter.compaction);
2201
2206
  if (compactionConfig.enabled && (step === 1 || step % COMPACTION_CHECK_INTERVAL_STEPS === 0)) {
2202
2207
  const estimated = estimateTotalTokens(systemPrompt, messages, toolDefsJsonForEstimate);
2203
- const lastReportedInput = totalInputTokens > 0 ? totalInputTokens : 0;
2204
- const effectiveTokens = Math.max(estimated, lastReportedInput);
2208
+ // Use the actual context size from the last model response (input tokens
2209
+ // + tool output accumulated since), not totalInputTokens which is a
2210
+ // cumulative sum across all steps and would wildly overcount.
2211
+ const lastReportedContext = latestContextTokens > 0
2212
+ ? latestContextTokens + toolOutputEstimateSinceModel
2213
+ : 0;
2214
+ const effectiveTokens = Math.max(estimated, lastReportedContext);
2205
2215
 
2206
2216
  if (effectiveTokens > compactionConfig.trigger * contextWindow) {
2207
2217
  yield pushEvent({ type: "compaction:started", estimatedTokens: effectiveTokens });