@poncho-ai/harness 0.59.10 → 0.59.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
 
2
- > @poncho-ai/harness@0.59.10 build /home/runner/work/poncho-ai/poncho-ai/packages/harness
2
+ > @poncho-ai/harness@0.59.11 build /home/runner/work/poncho-ai/poncho-ai/packages/harness
3
3
  > node scripts/embed-docs.js && tsup src/index.ts --format esm --dts
4
4
 
5
5
  [embed-docs] Generated poncho-docs.ts with 4 topics
@@ -9,8 +9,8 @@
9
9
  CLI Target: es2022
10
10
  ESM Build start
11
11
  ESM dist/isolate-F2PPSUL6.js 53.82 KB
12
- ESM dist/index.js 559.89 KB
13
- ESM ⚡️ Build success in 238ms
12
+ ESM dist/index.js 560.62 KB
13
+ ESM ⚡️ Build success in 239ms
14
14
  DTS Build start
15
- DTS ⚡️ Build success in 7368ms
15
+ DTS ⚡️ Build success in 7569ms
16
16
  DTS dist/index.d.ts 102.06 KB
package/CHANGELOG.md CHANGED
@@ -1,5 +1,16 @@
1
1
  # @poncho-ai/harness
2
2
 
3
+ ## 0.59.11
4
+
5
+ ### Patch Changes
6
+
7
+ - [`7464ad0`](https://github.com/cesr/poncho-ai/commit/7464ad04448095b34c4b1cbd52da559cf6bd6024) Thanks [@cesr](https://github.com/cesr)! - The user's memory file gets its own 1-hour Anthropic cache breakpoint.
8
+ It previously rode the uncached dynamic system tail (with todos + time),
9
+ which re-wrote the memory block — typically the bulk of a new
10
+ conversation's one-time cache cost — on every cold prefix, despite memory
11
+ only changing on explicit writes. System prompt is now three tiers:
12
+ static (1h), memory (1h), volatile todos+time (uncached).
13
+
3
14
  ## 0.59.10
4
15
 
5
16
  ### Patch Changes
package/dist/index.js CHANGED
@@ -10720,11 +10720,11 @@ ${skillContextWindow}${browserContext}${fsContext}${isolateContext}` : `${agentP
10720
10720
  const timeContext = `
10721
10721
 
10722
10722
  Current UTC time (hour precision): ${hourlyTime}`;
10723
- const dynamicPart = `${memoryContext}${todoContext}${timeContext}`;
10724
- return { staticPart, dynamicPart };
10723
+ const dynamicPart = `${todoContext}${timeContext}`;
10724
+ return { staticPart, memoryPart: memoryContext, dynamicPart };
10725
10725
  };
10726
- let { staticPart: staticSystemPart, dynamicPart: dynamicSystemPart } = await buildSystemPromptParts();
10727
- let systemPrompt = `${staticSystemPart}${dynamicSystemPart}`;
10726
+ let { staticPart: staticSystemPart, memoryPart: memorySystemPart, dynamicPart: dynamicSystemPart } = await buildSystemPromptParts();
10727
+ let systemPrompt = `${staticSystemPart}${memorySystemPart}${dynamicSystemPart}`;
10728
10728
  let lastPromptFingerprint = `${this.agentFileFingerprint}
10729
10729
  ${this.skillFingerprint}`;
10730
10730
  const pushEvent = (event) => {
@@ -11203,6 +11203,18 @@ ${textContent}` };
11203
11203
  anthropic: { cacheControl: { type: "ephemeral", ttl: "1h" } }
11204
11204
  }
11205
11205
  },
11206
+ // Memory: per-user, conversation-independent, changes only on
11207
+ // an explicit memory write — its own 1h breakpoint means a
11208
+ // memory edit busts THIS block forward but a normal turn reads
11209
+ // it (plus everything before it) from cache. Breakpoint budget:
11210
+ // Anthropic allows 4; this is #2 of 3 (static, memory, tail).
11211
+ ...memorySystemPart.length > 0 ? [{
11212
+ role: "system",
11213
+ content: memorySystemPart,
11214
+ providerOptions: {
11215
+ anthropic: { cacheControl: { type: "ephemeral", ttl: "1h" } }
11216
+ }
11217
+ }] : [],
11206
11218
  ...dynamicSystemPart.length > 0 ? [{ role: "system", content: dynamicSystemPart }] : [],
11207
11219
  ...cachedMessages
11208
11220
  ] : cachedMessages;
@@ -11844,8 +11856,8 @@ ${textContent}` };
11844
11856
  const currentFingerprint = `${this.agentFileFingerprint}
11845
11857
  ${this.skillFingerprint}`;
11846
11858
  if (currentFingerprint !== lastPromptFingerprint) {
11847
- ({ staticPart: staticSystemPart, dynamicPart: dynamicSystemPart } = await buildSystemPromptParts());
11848
- systemPrompt = `${staticSystemPart}${dynamicSystemPart}`;
11859
+ ({ staticPart: staticSystemPart, memoryPart: memorySystemPart, dynamicPart: dynamicSystemPart } = await buildSystemPromptParts());
11860
+ systemPrompt = `${staticSystemPart}${memorySystemPart}${dynamicSystemPart}`;
11849
11861
  lastPromptFingerprint = currentFingerprint;
11850
11862
  }
11851
11863
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@poncho-ai/harness",
3
- "version": "0.59.10",
3
+ "version": "0.59.11",
4
4
  "description": "Agent execution runtime - conversation loop, tool dispatch, streaming",
5
5
  "repository": {
6
6
  "type": "git",
package/src/harness.ts CHANGED
@@ -2331,13 +2331,17 @@ ${typeStubs}
2331
2331
  Code is wrapped in an async IIFE — use \`return\` to return a value to the tool result.`;
2332
2332
  }
2333
2333
 
2334
- // Split the system prompt into a static portion (stable across turns
2335
- // and jobs within an hour, modulo MCP connect/skill author/memory edit)
2336
- // and a dynamic tail (memory, todos, time). The static portion gets a
2337
- // 1-hour Anthropic cache breakpoint downstream; the tail rides the
2338
- // existing 5-min message-level breakpoint. See the streamText site for
2339
- // the breakpoint wiring.
2340
- const buildSystemPromptParts = async (): Promise<{ staticPart: string; dynamicPart: string }> => {
2334
+ // Split the system prompt into THREE cacheability tiers (see the
2335
+ // streamText site for the breakpoint wiring):
2336
+ // 1. staticPart agent body + skills + runtime context. Stable across
2337
+ // turns, conversations, and jobs within an hour. 1h breakpoint.
2338
+ // 2. memoryPart the user's memory file. Per-user, shared by every
2339
+ // conversation, and only changes on an explicit memory write — so
2340
+ // it gets its own 1h breakpoint instead of riding the volatile
2341
+ // tail (where it busted the message-history cache for no reason).
2342
+ // 3. dynamicPart — todos + hour-quantized time. Genuinely volatile
2343
+ // within a conversation; uncached, kept as small as possible.
2344
+ const buildSystemPromptParts = async (): Promise<{ staticPart: string; memoryPart: string; dynamicPart: string }> => {
2341
2345
  const agentPrompt = renderCurrentAgentPrompt();
2342
2346
  const tenantSkills = await this.getSkillsForTenant(input.tenantId);
2343
2347
  const skillContextWindow = buildSkillContextWindow(tenantSkills);
@@ -2358,13 +2362,13 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
2358
2362
  return `${weekday} ${d.toISOString().slice(0, 13)}Z`;
2359
2363
  })();
2360
2364
  const timeContext = `\n\nCurrent UTC time (hour precision): ${hourlyTime}`;
2361
- const dynamicPart = `${memoryContext}${todoContext}${timeContext}`;
2362
- return { staticPart, dynamicPart };
2365
+ const dynamicPart = `${todoContext}${timeContext}`;
2366
+ return { staticPart, memoryPart: memoryContext, dynamicPart };
2363
2367
  };
2364
- let { staticPart: staticSystemPart, dynamicPart: dynamicSystemPart } =
2368
+ let { staticPart: staticSystemPart, memoryPart: memorySystemPart, dynamicPart: dynamicSystemPart } =
2365
2369
  await buildSystemPromptParts();
2366
2370
  // Concatenated form for legacy consumers (token estimation, telemetry).
2367
- let systemPrompt = `${staticSystemPart}${dynamicSystemPart}`;
2371
+ let systemPrompt = `${staticSystemPart}${memorySystemPart}${dynamicSystemPart}`;
2368
2372
  let lastPromptFingerprint = `${this.agentFileFingerprint}\n${this.skillFingerprint}`;
2369
2373
 
2370
2374
  const pushEvent = (event: AgentEvent): AgentEvent => {
@@ -3008,6 +3012,20 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
3008
3012
  anthropic: { cacheControl: { type: "ephemeral", ttl: "1h" } },
3009
3013
  },
3010
3014
  },
3015
+ // Memory: per-user, conversation-independent, changes only on
3016
+ // an explicit memory write — its own 1h breakpoint means a
3017
+ // memory edit busts THIS block forward but a normal turn reads
3018
+ // it (plus everything before it) from cache. Breakpoint budget:
3019
+ // Anthropic allows 4; this is #2 of 3 (static, memory, tail).
3020
+ ...(memorySystemPart.length > 0
3021
+ ? [{
3022
+ role: "system" as const,
3023
+ content: memorySystemPart,
3024
+ providerOptions: {
3025
+ anthropic: { cacheControl: { type: "ephemeral", ttl: "1h" } },
3026
+ },
3027
+ }]
3028
+ : []),
3011
3029
  ...(dynamicSystemPart.length > 0
3012
3030
  ? [{ role: "system" as const, content: dynamicSystemPart }]
3013
3031
  : []),
@@ -3794,9 +3812,9 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
3794
3812
  agent = this.parsedAgent as ParsedAgent;
3795
3813
  const currentFingerprint = `${this.agentFileFingerprint}\n${this.skillFingerprint}`;
3796
3814
  if (currentFingerprint !== lastPromptFingerprint) {
3797
- ({ staticPart: staticSystemPart, dynamicPart: dynamicSystemPart } =
3815
+ ({ staticPart: staticSystemPart, memoryPart: memorySystemPart, dynamicPart: dynamicSystemPart } =
3798
3816
  await buildSystemPromptParts());
3799
- systemPrompt = `${staticSystemPart}${dynamicSystemPart}`;
3817
+ systemPrompt = `${staticSystemPart}${memorySystemPart}${dynamicSystemPart}`;
3800
3818
  lastPromptFingerprint = currentFingerprint;
3801
3819
  }
3802
3820
  }