npm - @poncho-ai/harness - Versions diffs - 0.45.0 → 0.46.0 - Mend

@poncho-ai/harness 0.45.0 → 0.46.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/.turbo/turbo-build.log +5 -5
package/CHANGELOG.md +44 -0
package/dist/index.d.ts +6 -0
package/dist/index.js +32 -14
package/package.json +2 -2
package/src/harness.ts +71 -22
package/src/orchestrator/run-conversation-turn.ts +7 -0
package/src/prompt-cache.ts +1 -1

package/.turbo/turbo-build.log CHANGED Viewed

@@ -1,5 +1,5 @@
-> @poncho-ai/harness@0.45.0 build /home/runner/work/poncho-ai/poncho-ai/packages/harness
+> @poncho-ai/harness@0.46.0 build /home/runner/work/poncho-ai/poncho-ai/packages/harness
 > node scripts/embed-docs.js && tsup src/index.ts --format esm --dts
 [embed-docs] Generated poncho-docs.ts with 4 topics
@@ -8,9 +8,9 @@
 [34mCLI[39m tsup v8.5.1
 [34mCLI[39m Target: es2022
 [34mESM[39m Build start
+[32mESM[39m [1mdist/index.js            [22m[32m525.40 KB[39m
 [32mESM[39m [1mdist/isolate-VY35DGLM.js [22m[32m49.43 KB[39m
-[32mESM[39m [1mdist/index.js            [22m[32m524.35 KB[39m
-[32mESM[39m ⚡️ Build success in 230ms
+[32mESM[39m ⚡️ Build success in 214ms
 [34mDTS[39m Build start
-[32mDTS[39m ⚡️ Build success in 7575ms
-[32mDTS[39m [1mdist/index.d.ts [22m[32m85.07 KB[39m
+[32mDTS[39m ⚡️ Build success in 7043ms
+[32mDTS[39m [1mdist/index.d.ts [22m[32m85.30 KB[39m

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,49 @@
 # @poncho-ai/harness
+## 0.46.0
+### Minor Changes
+- [#118](https://github.com/cesr/poncho-ai/pull/118) [`e8df464`](https://github.com/cesr/poncho-ai/commit/e8df4649618cba0b408a6c143f923f0dcb2046c8) Thanks [@cesr](https://github.com/cesr)! - harness: 1h static system-prompt cache breakpoint + per-run cache kill-switch
+  Two related changes to Anthropic prompt caching:
+  **1-hour static system-prompt breakpoint.** The harness now splits the
+  assembled system prompt into a static portion (agent body + skill
+  context + browser/fs/isolate context — stable across many turns and
+  jobs within an hour) and a dynamic tail (memory, todos, time). On
+  Anthropic models, these are sent as two `role: "system"` messages with
+  `cacheControl: { ttl: "1h" }` on the static block. The existing 5-min
+  tail breakpoint on the last user/assistant/tool message is retained.
+  This lets later turns and job runs read ~95% of the system prompt at
+  0.1× (cache read) instead of paying 1× whenever the 5-min tail cache
+  has expired — the previous setup only cached for 5 minutes via the
+  tail breakpoint. Within-user cross-conversation and interactive-vs-job
+  all share the static cache.
+  **Per-run cache kill-switch.** Added `RunInput.disablePromptCache?:
+boolean` (also exposed on `RunConversationTurnOpts.disablePromptCache`,
+  forwarded into `runInput`). When set, the harness skips the 5-min tail
+  breakpoint for that run. The 1-hour static breakpoint is still
+  applied — the run still benefits from reading the shared static cache,
+  just doesn't write a new tail entry that won't be read before TTL.
+  Intended for one-shot programmatic invocations (cron-fired jobs,
+  subagent dispatch) where no follow-up turn is coming within the 5-min
+  TTL window, so the 1.25× write surcharge would be pure waste.
+  Non-Anthropic providers fall through to the previous single concatenated
+  `system:` string with no cache control — those providers auto-cache.
+  Internal: `isAnthropicModel` is now exported from `prompt-cache.ts`
+  for reuse at the streamText site.
+### Patch Changes
+- Updated dependencies [[`e8df464`](https://github.com/cesr/poncho-ai/commit/e8df4649618cba0b408a6c143f923f0dcb2046c8)]:
+  - @poncho-ai/sdk@1.12.0
 ## 0.45.0
 ### Minor Changes

package/dist/index.d.ts CHANGED Viewed

@@ -2036,6 +2036,12 @@ interface RunConversationTurnOpts {
     parameters?: Record<string, unknown>;
     abortSignal?: AbortSignal;
     tenantId?: string | null;
+    /**
+     * Forwarded to `RunInput.disablePromptCache`. Set true for one-shot
+     * turns with no follow-up coming (cron-fired jobs, etc.) so the
+     * harness skips the Anthropic cache write.
+     */
+    disablePromptCache?: boolean;
     /** Per-event hook — called for every AgentEvent yielded by the run, in order. */
     onEvent?: (event: AgentEvent) => void | Promise<void>;
 }

package/dist/index.js CHANGED Viewed

@@ -10129,10 +10129,13 @@ var AgentHarness = class _AgentHarness {
       );
     }
     const hasFullToolResults = hasUntruncatedToolResults(messages);
-    if (hasFullToolResults) {
-      costLog.debug(`cache breakpoint before untruncated tool results (run=${runId.slice(0, 12)})`);
+    const skipTailCache = input.disablePromptCache === true;
+    if (skipTailCache) {
+      costLog.debug(`tail cache breakpoint skipped \u2014 disablePromptCache (run=${runId.slice(0, 12)})`);
+    } else if (hasFullToolResults) {
+      costLog.debug(`tail cache breakpoint before untruncated tool results (run=${runId.slice(0, 12)})`);
     } else {
-      costLog.debug(`cache breakpoint at history tail (run=${runId.slice(0, 12)})`);
+      costLog.debug(`tail cache breakpoint at history tail (run=${runId.slice(0, 12)})`);
     }
     const inputMessageCount = messages.length;
     const events = [];
@@ -10221,11 +10224,11 @@ ${typeStubs}
 Code is wrapped in an async IIFE \u2014 use \`return\` to return a value to the tool result.`;
     }
-    const buildSystemPrompt = async () => {
+    const buildSystemPromptParts = async () => {
       const agentPrompt = renderCurrentAgentPrompt();
       const tenantSkills = await this.getSkillsForTenant(input.tenantId);
       const skillContextWindow = buildSkillContextWindow(tenantSkills);
-      const promptWithSkills = skillContextWindow ? `${agentPrompt}${developmentContext}
+      const staticPart = skillContextWindow ? `${agentPrompt}${developmentContext}
 ${skillContextWindow}${browserContext}${fsContext}${isolateContext}` : `${agentPrompt}${developmentContext}${browserContext}${fsContext}${isolateContext}`;
       const hourlyTime = (() => {
@@ -10237,9 +10240,11 @@ ${skillContextWindow}${browserContext}${fsContext}${isolateContext}` : `${agentP
       const timeContext = this.reminderStore ? `
 Current UTC time (hour precision): ${hourlyTime}` : "";
-      return `${promptWithSkills}${memoryContext}${todoContext}${timeContext}`;
+      const dynamicPart = `${memoryContext}${todoContext}${timeContext}`;
+      return { staticPart, dynamicPart };
     };
-    let systemPrompt = await buildSystemPrompt();
+    let { staticPart: staticSystemPart, dynamicPart: dynamicSystemPart } = await buildSystemPromptParts();
+    let systemPrompt = `${staticSystemPart}${dynamicSystemPart}`;
     let lastPromptFingerprint = `${this.agentFileFingerprint}
 ${this.skillFingerprint}`;
     const pushEvent = (event) => {
@@ -10673,17 +10678,28 @@ ${textContent}` };
           const coreMessages = cachedCoreMessages;
           const temperature = agent.frontmatter.model?.temperature ?? 0.2;
           const maxTokens = agent.frontmatter.model?.maxTokens;
-          const breakpointIndex = hasFullToolResults ? findLastStableCacheIndex(coreMessages) : coreMessages.length - 1;
-          const cachedMessages = addPromptCacheBreakpoints(
+          const cachedMessages = skipTailCache ? coreMessages : addPromptCacheBreakpoints(
             coreMessages,
             modelInstance,
-            breakpointIndex
+            hasFullToolResults ? findLastStableCacheIndex(coreMessages) : coreMessages.length - 1
           );
+          const useStaticCache = isAnthropicModel(modelInstance);
+          const finalMessages = useStaticCache ? [
+            {
+              role: "system",
+              content: staticSystemPart,
+              providerOptions: {
+                anthropic: { cacheControl: { type: "ephemeral", ttl: "1h" } }
+              }
+            },
+            ...dynamicSystemPart.length > 0 ? [{ role: "system", content: dynamicSystemPart }] : [],
+            ...cachedMessages
+          ] : cachedMessages;
           const telemetryEnabled = this.loadedConfig?.telemetry?.enabled !== false;
           const result = await streamText({
             model: modelInstance,
-            system: systemPrompt,
-            messages: cachedMessages,
+            ...useStaticCache ? {} : { system: systemPrompt },
+            messages: finalMessages,
             tools,
             temperature,
             abortSignal: input.abortSignal,
@@ -11308,7 +11324,8 @@ ${textContent}` };
               const currentFingerprint = `${this.agentFileFingerprint}
 ${this.skillFingerprint}`;
               if (currentFingerprint !== lastPromptFingerprint) {
-                systemPrompt = await buildSystemPrompt();
+                ({ staticPart: staticSystemPart, dynamicPart: dynamicSystemPart } = await buildSystemPromptParts());
+                systemPrompt = `${staticSystemPart}${dynamicSystemPart}`;
                 lastPromptFingerprint = currentFingerprint;
               }
             }
@@ -13577,7 +13594,8 @@ var runConversationTurn = async (opts) => {
         ),
         messages: harnessMessages,
         files: opts.files && opts.files.length > 0 ? opts.files : void 0,
-        abortSignal: opts.abortSignal
+        abortSignal: opts.abortSignal,
+        disablePromptCache: opts.disablePromptCache
       },
       initialContextTokens: conversation.contextTokens ?? 0,
       initialContextWindow: conversation.contextWindow ?? 0,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@poncho-ai/harness",
-  "version": "0.45.0",
+  "version": "0.46.0",
   "description": "Agent execution runtime - conversation loop, tool dispatch, streaming",
   "repository": {
     "type": "git",
@@ -34,7 +34,7 @@
     "mustache": "^4.2.0",
     "yaml": "^2.4.0",
     "zod": "^3.22.0",
-    "@poncho-ai/sdk": "1.11.0"
+    "@poncho-ai/sdk": "1.12.0"
   },
   "peerDependencies": {
     "esbuild": ">=0.17.0",

package/src/harness.ts CHANGED Viewed

@@ -59,7 +59,7 @@ import {
   mergeSkills,
 } from "./skill-context.js";
 import { generateText, streamText, type ModelMessage } from "ai";
-import { addPromptCacheBreakpoints } from "./prompt-cache.js";
+import { addPromptCacheBreakpoints, isAnthropicModel } from "./prompt-cache.js";
 import { jsonSchemaToZod } from "./schema-converter.js";
 import type { SkillMetadata } from "./skill-context.js";
 import { createSkillTools, normalizeScriptPolicyPath } from "./skill-tools.js";
@@ -2104,10 +2104,17 @@ export class AgentHarness {
       );
     }
     const hasFullToolResults = hasUntruncatedToolResults(messages);
-    if (hasFullToolResults) {
-      costLog.debug(`cache breakpoint before untruncated tool results (run=${runId.slice(0, 12)})`);
+    // The 5-min tail breakpoint is skipped only when the caller explicitly
+    // declares no follow-up is coming (jobs, programmatic one-shots). The
+    // 1-hour static breakpoint on the system prompt is always on — it
+    // amortizes across every later turn or job within the hour.
+    const skipTailCache = input.disablePromptCache === true;
+    if (skipTailCache) {
+      costLog.debug(`tail cache breakpoint skipped — disablePromptCache (run=${runId.slice(0, 12)})`);
+    } else if (hasFullToolResults) {
+      costLog.debug(`tail cache breakpoint before untruncated tool results (run=${runId.slice(0, 12)})`);
     } else {
-      costLog.debug(`cache breakpoint at history tail (run=${runId.slice(0, 12)})`);
+      costLog.debug(`tail cache breakpoint at history tail (run=${runId.slice(0, 12)})`);
     }
     const inputMessageCount = messages.length;
     const events: AgentEvent[] = [];
@@ -2210,11 +2217,17 @@ ${typeStubs}
 Code is wrapped in an async IIFE — use \`return\` to return a value to the tool result.`;
     }
-    const buildSystemPrompt = async (): Promise<string> => {
+    // Split the system prompt into a static portion (stable across turns
+    // and jobs within an hour, modulo MCP connect/skill author/memory edit)
+    // and a dynamic tail (memory, todos, time). The static portion gets a
+    // 1-hour Anthropic cache breakpoint downstream; the tail rides the
+    // existing 5-min message-level breakpoint. See the streamText site for
+    // the breakpoint wiring.
+    const buildSystemPromptParts = async (): Promise<{ staticPart: string; dynamicPart: string }> => {
       const agentPrompt = renderCurrentAgentPrompt();
       const tenantSkills = await this.getSkillsForTenant(input.tenantId);
       const skillContextWindow = buildSkillContextWindow(tenantSkills);
-      const promptWithSkills = skillContextWindow
+      const staticPart = skillContextWindow
         ? `${agentPrompt}${developmentContext}\n\n${skillContextWindow}${browserContext}${fsContext}${isolateContext}`
         : `${agentPrompt}${developmentContext}${browserContext}${fsContext}${isolateContext}`;
       // Quantize to the hour so the system prompt is stable across runs
@@ -2230,9 +2243,13 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
       const timeContext = this.reminderStore
         ? `\n\nCurrent UTC time (hour precision): ${hourlyTime}`
         : "";
-      return `${promptWithSkills}${memoryContext}${todoContext}${timeContext}`;
+      const dynamicPart = `${memoryContext}${todoContext}${timeContext}`;
+      return { staticPart, dynamicPart };
     };
-    let systemPrompt = await buildSystemPrompt();
+    let { staticPart: staticSystemPart, dynamicPart: dynamicSystemPart } =
+      await buildSystemPromptParts();
+    // Concatenated form for legacy consumers (token estimation, telemetry).
+    let systemPrompt = `${staticSystemPart}${dynamicSystemPart}`;
     let lastPromptFingerprint = `${this.agentFileFingerprint}\n${this.skillFingerprint}`;
     const pushEvent = (event: AgentEvent): AgentEvent => {
@@ -2772,25 +2789,55 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
         const temperature = agent.frontmatter.model?.temperature ?? 0.2;
         const maxTokens = agent.frontmatter.model?.maxTokens;
-        // Place the breakpoint before any untruncated tool-result so we
-        // cache only the stable prefix when prior-run tool results are
-        // still full-fidelity. Otherwise cache at the history tail.
-        const breakpointIndex = hasFullToolResults
-          ? findLastStableCacheIndex(coreMessages)
-          : coreMessages.length - 1;
-        const cachedMessages = addPromptCacheBreakpoints(
-          coreMessages,
-          modelInstance,
-          breakpointIndex,
-        );
+        // Place the tail breakpoint before any untruncated tool-result so
+        // we cache only the stable prefix when prior-run tool results are
+        // still full-fidelity. Otherwise cache at the history tail. When
+        // `skipTailCache` is set (per-run override), don't write the tail
+        // breakpoint at all. The 1-hour static-prefix breakpoint is added
+        // separately when assembling the final messages array.
+        const cachedMessages = skipTailCache
+          ? coreMessages
+          : addPromptCacheBreakpoints(
+              coreMessages,
+              modelInstance,
+              hasFullToolResults
+                ? findLastStableCacheIndex(coreMessages)
+                : coreMessages.length - 1,
+            );
+        // Anthropic: split system into two blocks with a 1-hour cache
+        // breakpoint at the boundary between the static portion (agent
+        // body + skills + browser/fs/isolate context — stable across many
+        // turns and jobs) and the dynamic tail (memory, todos, time).
+        // The static block becomes a hot cache that every later turn and
+        // job in the hour reads at 0.1× — much bigger payoff than the
+        // 5-min tail breakpoint, which only survives active back-and-forth.
+        // For non-Anthropic models, fall back to the single concatenated
+        // string via `system:` — those providers auto-cache.
+        const useStaticCache = isAnthropicModel(modelInstance);
+        const finalMessages: ModelMessage[] = useStaticCache
+          ? [
+              {
+                role: "system",
+                content: staticSystemPart,
+                providerOptions: {
+                  anthropic: { cacheControl: { type: "ephemeral", ttl: "1h" } },
+                },
+              },
+              ...(dynamicSystemPart.length > 0
+                ? [{ role: "system" as const, content: dynamicSystemPart }]
+                : []),
+              ...cachedMessages,
+            ]
+          : cachedMessages;
         const telemetryEnabled = this.loadedConfig?.telemetry?.enabled !== false;
         const result = await streamText({
           model: modelInstance,
-          system: systemPrompt,
-          messages: cachedMessages,
+          ...(useStaticCache ? {} : { system: systemPrompt }),
+          messages: finalMessages,
           tools,
           temperature,
           abortSignal: input.abortSignal,
@@ -3532,7 +3579,9 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
             agent = this.parsedAgent as ParsedAgent;
             const currentFingerprint = `${this.agentFileFingerprint}\n${this.skillFingerprint}`;
             if (currentFingerprint !== lastPromptFingerprint) {
-              systemPrompt = await buildSystemPrompt();
+              ({ staticPart: staticSystemPart, dynamicPart: dynamicSystemPart } =
+                await buildSystemPromptParts());
+              systemPrompt = `${staticSystemPart}${dynamicSystemPart}`;
               lastPromptFingerprint = currentFingerprint;
             }
           }

package/src/orchestrator/run-conversation-turn.ts CHANGED Viewed

@@ -62,6 +62,12 @@ export interface RunConversationTurnOpts {
   parameters?: Record<string, unknown>;
   abortSignal?: AbortSignal;
   tenantId?: string | null;
+  /**
+   * Forwarded to `RunInput.disablePromptCache`. Set true for one-shot
+   * turns with no follow-up coming (cron-fired jobs, etc.) so the
+   * harness skips the Anthropic cache write.
+   */
+  disablePromptCache?: boolean;
   /** Per-event hook — called for every AgentEvent yielded by the run, in order. */
   onEvent?: (event: AgentEvent) => void | Promise<void>;
 }
@@ -203,6 +209,7 @@ export const runConversationTurn = async (
         messages: harnessMessages,
         files: opts.files && opts.files.length > 0 ? opts.files : undefined,
         abortSignal: opts.abortSignal,
+        disablePromptCache: opts.disablePromptCache,
       },
       initialContextTokens: conversation.contextTokens ?? 0,
       initialContextWindow: conversation.contextWindow ?? 0,

package/src/prompt-cache.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 import type { ModelMessage, LanguageModel } from "ai";
-function isAnthropicModel(model: LanguageModel): boolean {
+export function isAnthropicModel(model: LanguageModel): boolean {
   if (typeof model === "string") {
     return model.includes("anthropic") || model.includes("claude");
   }