@poncho-ai/harness 0.45.0 → 0.46.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
 
2
- > @poncho-ai/harness@0.45.0 build /home/runner/work/poncho-ai/poncho-ai/packages/harness
2
+ > @poncho-ai/harness@0.46.0 build /home/runner/work/poncho-ai/poncho-ai/packages/harness
3
3
  > node scripts/embed-docs.js && tsup src/index.ts --format esm --dts
4
4
 
5
5
  [embed-docs] Generated poncho-docs.ts with 4 topics
@@ -8,9 +8,9 @@
8
8
  CLI tsup v8.5.1
9
9
  CLI Target: es2022
10
10
  ESM Build start
11
+ ESM dist/index.js 525.40 KB
11
12
  ESM dist/isolate-VY35DGLM.js 49.43 KB
12
- ESM dist/index.js 524.35 KB
13
- ESM ⚡️ Build success in 230ms
13
+ ESM ⚡️ Build success in 214ms
14
14
  DTS Build start
15
- DTS ⚡️ Build success in 7575ms
16
- DTS dist/index.d.ts 85.07 KB
15
+ DTS ⚡️ Build success in 7043ms
16
+ DTS dist/index.d.ts 85.30 KB
package/CHANGELOG.md CHANGED
@@ -1,5 +1,49 @@
1
1
  # @poncho-ai/harness
2
2
 
3
+ ## 0.46.0
4
+
5
+ ### Minor Changes
6
+
7
+ - [#118](https://github.com/cesr/poncho-ai/pull/118) [`e8df464`](https://github.com/cesr/poncho-ai/commit/e8df4649618cba0b408a6c143f923f0dcb2046c8) Thanks [@cesr](https://github.com/cesr)! - harness: 1h static system-prompt cache breakpoint + per-run cache kill-switch
8
+
9
+ Two related changes to Anthropic prompt caching:
10
+
11
+ **1-hour static system-prompt breakpoint.** The harness now splits the
12
+ assembled system prompt into a static portion (agent body + skill
13
+ context + browser/fs/isolate context — stable across many turns and
14
+ jobs within an hour) and a dynamic tail (memory, todos, time). On
15
+ Anthropic models, these are sent as two `role: "system"` messages with
16
+ `cacheControl: { ttl: "1h" }` on the static block. The existing 5-min
17
+ tail breakpoint on the last user/assistant/tool message is retained.
18
+
19
+ This lets later turns and job runs read ~95% of the system prompt at
20
+ 0.1× (cache read) instead of paying 1× whenever the 5-min tail cache
21
+ has expired — the previous setup only cached for 5 minutes via the
22
+ tail breakpoint. Within-user cross-conversation and interactive-vs-job
23
+ all share the static cache.
24
+
25
+ **Per-run cache kill-switch.** Added `RunInput.disablePromptCache?:
26
+ boolean` (also exposed on `RunConversationTurnOpts.disablePromptCache`,
27
+ forwarded into `runInput`). When set, the harness skips the 5-min tail
28
+ breakpoint for that run. The 1-hour static breakpoint is still
29
+ applied — the run still benefits from reading the shared static cache,
30
+ just doesn't write a new tail entry that won't be read before TTL.
31
+
32
+ Intended for one-shot programmatic invocations (cron-fired jobs,
33
+ subagent dispatch) where no follow-up turn is coming within the 5-min
34
+ TTL window, so the 1.25× write surcharge would be pure waste.
35
+
36
+ Non-Anthropic providers fall through to the previous single concatenated
37
+ `system:` string with no cache control — those providers auto-cache.
38
+
39
+ Internal: `isAnthropicModel` is now exported from `prompt-cache.ts`
40
+ for reuse at the streamText site.
41
+
42
+ ### Patch Changes
43
+
44
+ - Updated dependencies [[`e8df464`](https://github.com/cesr/poncho-ai/commit/e8df4649618cba0b408a6c143f923f0dcb2046c8)]:
45
+ - @poncho-ai/sdk@1.12.0
46
+
3
47
  ## 0.45.0
4
48
 
5
49
  ### Minor Changes
package/dist/index.d.ts CHANGED
@@ -2036,6 +2036,12 @@ interface RunConversationTurnOpts {
2036
2036
  parameters?: Record<string, unknown>;
2037
2037
  abortSignal?: AbortSignal;
2038
2038
  tenantId?: string | null;
2039
+ /**
2040
+ * Forwarded to `RunInput.disablePromptCache`. Set true for one-shot
2041
+ * turns with no follow-up coming (cron-fired jobs, etc.) so the
2042
+ * harness skips the Anthropic cache write.
2043
+ */
2044
+ disablePromptCache?: boolean;
2039
2045
  /** Per-event hook — called for every AgentEvent yielded by the run, in order. */
2040
2046
  onEvent?: (event: AgentEvent) => void | Promise<void>;
2041
2047
  }
package/dist/index.js CHANGED
@@ -10129,10 +10129,13 @@ var AgentHarness = class _AgentHarness {
10129
10129
  );
10130
10130
  }
10131
10131
  const hasFullToolResults = hasUntruncatedToolResults(messages);
10132
- if (hasFullToolResults) {
10133
- costLog.debug(`cache breakpoint before untruncated tool results (run=${runId.slice(0, 12)})`);
10132
+ const skipTailCache = input.disablePromptCache === true;
10133
+ if (skipTailCache) {
10134
+ costLog.debug(`tail cache breakpoint skipped \u2014 disablePromptCache (run=${runId.slice(0, 12)})`);
10135
+ } else if (hasFullToolResults) {
10136
+ costLog.debug(`tail cache breakpoint before untruncated tool results (run=${runId.slice(0, 12)})`);
10134
10137
  } else {
10135
- costLog.debug(`cache breakpoint at history tail (run=${runId.slice(0, 12)})`);
10138
+ costLog.debug(`tail cache breakpoint at history tail (run=${runId.slice(0, 12)})`);
10136
10139
  }
10137
10140
  const inputMessageCount = messages.length;
10138
10141
  const events = [];
@@ -10221,11 +10224,11 @@ ${typeStubs}
10221
10224
 
10222
10225
  Code is wrapped in an async IIFE \u2014 use \`return\` to return a value to the tool result.`;
10223
10226
  }
10224
- const buildSystemPrompt = async () => {
10227
+ const buildSystemPromptParts = async () => {
10225
10228
  const agentPrompt = renderCurrentAgentPrompt();
10226
10229
  const tenantSkills = await this.getSkillsForTenant(input.tenantId);
10227
10230
  const skillContextWindow = buildSkillContextWindow(tenantSkills);
10228
- const promptWithSkills = skillContextWindow ? `${agentPrompt}${developmentContext}
10231
+ const staticPart = skillContextWindow ? `${agentPrompt}${developmentContext}
10229
10232
 
10230
10233
  ${skillContextWindow}${browserContext}${fsContext}${isolateContext}` : `${agentPrompt}${developmentContext}${browserContext}${fsContext}${isolateContext}`;
10231
10234
  const hourlyTime = (() => {
@@ -10237,9 +10240,11 @@ ${skillContextWindow}${browserContext}${fsContext}${isolateContext}` : `${agentP
10237
10240
  const timeContext = this.reminderStore ? `
10238
10241
 
10239
10242
  Current UTC time (hour precision): ${hourlyTime}` : "";
10240
- return `${promptWithSkills}${memoryContext}${todoContext}${timeContext}`;
10243
+ const dynamicPart = `${memoryContext}${todoContext}${timeContext}`;
10244
+ return { staticPart, dynamicPart };
10241
10245
  };
10242
- let systemPrompt = await buildSystemPrompt();
10246
+ let { staticPart: staticSystemPart, dynamicPart: dynamicSystemPart } = await buildSystemPromptParts();
10247
+ let systemPrompt = `${staticSystemPart}${dynamicSystemPart}`;
10243
10248
  let lastPromptFingerprint = `${this.agentFileFingerprint}
10244
10249
  ${this.skillFingerprint}`;
10245
10250
  const pushEvent = (event) => {
@@ -10673,17 +10678,28 @@ ${textContent}` };
10673
10678
  const coreMessages = cachedCoreMessages;
10674
10679
  const temperature = agent.frontmatter.model?.temperature ?? 0.2;
10675
10680
  const maxTokens = agent.frontmatter.model?.maxTokens;
10676
- const breakpointIndex = hasFullToolResults ? findLastStableCacheIndex(coreMessages) : coreMessages.length - 1;
10677
- const cachedMessages = addPromptCacheBreakpoints(
10681
+ const cachedMessages = skipTailCache ? coreMessages : addPromptCacheBreakpoints(
10678
10682
  coreMessages,
10679
10683
  modelInstance,
10680
- breakpointIndex
10684
+ hasFullToolResults ? findLastStableCacheIndex(coreMessages) : coreMessages.length - 1
10681
10685
  );
10686
+ const useStaticCache = isAnthropicModel(modelInstance);
10687
+ const finalMessages = useStaticCache ? [
10688
+ {
10689
+ role: "system",
10690
+ content: staticSystemPart,
10691
+ providerOptions: {
10692
+ anthropic: { cacheControl: { type: "ephemeral", ttl: "1h" } }
10693
+ }
10694
+ },
10695
+ ...dynamicSystemPart.length > 0 ? [{ role: "system", content: dynamicSystemPart }] : [],
10696
+ ...cachedMessages
10697
+ ] : cachedMessages;
10682
10698
  const telemetryEnabled = this.loadedConfig?.telemetry?.enabled !== false;
10683
10699
  const result = await streamText({
10684
10700
  model: modelInstance,
10685
- system: systemPrompt,
10686
- messages: cachedMessages,
10701
+ ...useStaticCache ? {} : { system: systemPrompt },
10702
+ messages: finalMessages,
10687
10703
  tools,
10688
10704
  temperature,
10689
10705
  abortSignal: input.abortSignal,
@@ -11308,7 +11324,8 @@ ${textContent}` };
11308
11324
  const currentFingerprint = `${this.agentFileFingerprint}
11309
11325
  ${this.skillFingerprint}`;
11310
11326
  if (currentFingerprint !== lastPromptFingerprint) {
11311
- systemPrompt = await buildSystemPrompt();
11327
+ ({ staticPart: staticSystemPart, dynamicPart: dynamicSystemPart } = await buildSystemPromptParts());
11328
+ systemPrompt = `${staticSystemPart}${dynamicSystemPart}`;
11312
11329
  lastPromptFingerprint = currentFingerprint;
11313
11330
  }
11314
11331
  }
@@ -13577,7 +13594,8 @@ var runConversationTurn = async (opts) => {
13577
13594
  ),
13578
13595
  messages: harnessMessages,
13579
13596
  files: opts.files && opts.files.length > 0 ? opts.files : void 0,
13580
- abortSignal: opts.abortSignal
13597
+ abortSignal: opts.abortSignal,
13598
+ disablePromptCache: opts.disablePromptCache
13581
13599
  },
13582
13600
  initialContextTokens: conversation.contextTokens ?? 0,
13583
13601
  initialContextWindow: conversation.contextWindow ?? 0,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@poncho-ai/harness",
3
- "version": "0.45.0",
3
+ "version": "0.46.0",
4
4
  "description": "Agent execution runtime - conversation loop, tool dispatch, streaming",
5
5
  "repository": {
6
6
  "type": "git",
@@ -34,7 +34,7 @@
34
34
  "mustache": "^4.2.0",
35
35
  "yaml": "^2.4.0",
36
36
  "zod": "^3.22.0",
37
- "@poncho-ai/sdk": "1.11.0"
37
+ "@poncho-ai/sdk": "1.12.0"
38
38
  },
39
39
  "peerDependencies": {
40
40
  "esbuild": ">=0.17.0",
package/src/harness.ts CHANGED
@@ -59,7 +59,7 @@ import {
59
59
  mergeSkills,
60
60
  } from "./skill-context.js";
61
61
  import { generateText, streamText, type ModelMessage } from "ai";
62
- import { addPromptCacheBreakpoints } from "./prompt-cache.js";
62
+ import { addPromptCacheBreakpoints, isAnthropicModel } from "./prompt-cache.js";
63
63
  import { jsonSchemaToZod } from "./schema-converter.js";
64
64
  import type { SkillMetadata } from "./skill-context.js";
65
65
  import { createSkillTools, normalizeScriptPolicyPath } from "./skill-tools.js";
@@ -2104,10 +2104,17 @@ export class AgentHarness {
2104
2104
  );
2105
2105
  }
2106
2106
  const hasFullToolResults = hasUntruncatedToolResults(messages);
2107
- if (hasFullToolResults) {
2108
- costLog.debug(`cache breakpoint before untruncated tool results (run=${runId.slice(0, 12)})`);
2107
+ // The 5-min tail breakpoint is skipped only when the caller explicitly
2108
+ // declares no follow-up is coming (jobs, programmatic one-shots). The
2109
+ // 1-hour static breakpoint on the system prompt is always on — it
2110
+ // amortizes across every later turn or job within the hour.
2111
+ const skipTailCache = input.disablePromptCache === true;
2112
+ if (skipTailCache) {
2113
+ costLog.debug(`tail cache breakpoint skipped — disablePromptCache (run=${runId.slice(0, 12)})`);
2114
+ } else if (hasFullToolResults) {
2115
+ costLog.debug(`tail cache breakpoint before untruncated tool results (run=${runId.slice(0, 12)})`);
2109
2116
  } else {
2110
- costLog.debug(`cache breakpoint at history tail (run=${runId.slice(0, 12)})`);
2117
+ costLog.debug(`tail cache breakpoint at history tail (run=${runId.slice(0, 12)})`);
2111
2118
  }
2112
2119
  const inputMessageCount = messages.length;
2113
2120
  const events: AgentEvent[] = [];
@@ -2210,11 +2217,17 @@ ${typeStubs}
2210
2217
  Code is wrapped in an async IIFE — use \`return\` to return a value to the tool result.`;
2211
2218
  }
2212
2219
 
2213
- const buildSystemPrompt = async (): Promise<string> => {
2220
+ // Split the system prompt into a static portion (stable across turns
2221
+ // and jobs within an hour, modulo MCP connect/skill author/memory edit)
2222
+ // and a dynamic tail (memory, todos, time). The static portion gets a
2223
+ // 1-hour Anthropic cache breakpoint downstream; the tail rides the
2224
+ // existing 5-min message-level breakpoint. See the streamText site for
2225
+ // the breakpoint wiring.
2226
+ const buildSystemPromptParts = async (): Promise<{ staticPart: string; dynamicPart: string }> => {
2214
2227
  const agentPrompt = renderCurrentAgentPrompt();
2215
2228
  const tenantSkills = await this.getSkillsForTenant(input.tenantId);
2216
2229
  const skillContextWindow = buildSkillContextWindow(tenantSkills);
2217
- const promptWithSkills = skillContextWindow
2230
+ const staticPart = skillContextWindow
2218
2231
  ? `${agentPrompt}${developmentContext}\n\n${skillContextWindow}${browserContext}${fsContext}${isolateContext}`
2219
2232
  : `${agentPrompt}${developmentContext}${browserContext}${fsContext}${isolateContext}`;
2220
2233
  // Quantize to the hour so the system prompt is stable across runs
@@ -2230,9 +2243,13 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
2230
2243
  const timeContext = this.reminderStore
2231
2244
  ? `\n\nCurrent UTC time (hour precision): ${hourlyTime}`
2232
2245
  : "";
2233
- return `${promptWithSkills}${memoryContext}${todoContext}${timeContext}`;
2246
+ const dynamicPart = `${memoryContext}${todoContext}${timeContext}`;
2247
+ return { staticPart, dynamicPart };
2234
2248
  };
2235
- let systemPrompt = await buildSystemPrompt();
2249
+ let { staticPart: staticSystemPart, dynamicPart: dynamicSystemPart } =
2250
+ await buildSystemPromptParts();
2251
+ // Concatenated form for legacy consumers (token estimation, telemetry).
2252
+ let systemPrompt = `${staticSystemPart}${dynamicSystemPart}`;
2236
2253
  let lastPromptFingerprint = `${this.agentFileFingerprint}\n${this.skillFingerprint}`;
2237
2254
 
2238
2255
  const pushEvent = (event: AgentEvent): AgentEvent => {
@@ -2772,25 +2789,55 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
2772
2789
 
2773
2790
  const temperature = agent.frontmatter.model?.temperature ?? 0.2;
2774
2791
  const maxTokens = agent.frontmatter.model?.maxTokens;
2775
- // Place the breakpoint before any untruncated tool-result so we
2776
- // cache only the stable prefix when prior-run tool results are
2777
- // still full-fidelity. Otherwise cache at the history tail.
2778
- const breakpointIndex = hasFullToolResults
2779
- ? findLastStableCacheIndex(coreMessages)
2780
- : coreMessages.length - 1;
2781
- const cachedMessages = addPromptCacheBreakpoints(
2782
- coreMessages,
2783
- modelInstance,
2784
- breakpointIndex,
2785
- );
2792
+ // Place the tail breakpoint before any untruncated tool-result so
2793
+ // we cache only the stable prefix when prior-run tool results are
2794
+ // still full-fidelity. Otherwise cache at the history tail. When
2795
+ // `skipTailCache` is set (per-run override), don't write the tail
2796
+ // breakpoint at all. The 1-hour static-prefix breakpoint is added
2797
+ // separately when assembling the final messages array.
2798
+ const cachedMessages = skipTailCache
2799
+ ? coreMessages
2800
+ : addPromptCacheBreakpoints(
2801
+ coreMessages,
2802
+ modelInstance,
2803
+ hasFullToolResults
2804
+ ? findLastStableCacheIndex(coreMessages)
2805
+ : coreMessages.length - 1,
2806
+ );
2807
+
2808
+ // Anthropic: split system into two blocks with a 1-hour cache
2809
+ // breakpoint at the boundary between the static portion (agent
2810
+ // body + skills + browser/fs/isolate context — stable across many
2811
+ // turns and jobs) and the dynamic tail (memory, todos, time).
2812
+ // The static block becomes a hot cache that every later turn and
2813
+ // job in the hour reads at 0.1× — much bigger payoff than the
2814
+ // 5-min tail breakpoint, which only survives active back-and-forth.
2815
+ // For non-Anthropic models, fall back to the single concatenated
2816
+ // string via `system:` — those providers auto-cache.
2817
+ const useStaticCache = isAnthropicModel(modelInstance);
2818
+ const finalMessages: ModelMessage[] = useStaticCache
2819
+ ? [
2820
+ {
2821
+ role: "system",
2822
+ content: staticSystemPart,
2823
+ providerOptions: {
2824
+ anthropic: { cacheControl: { type: "ephemeral", ttl: "1h" } },
2825
+ },
2826
+ },
2827
+ ...(dynamicSystemPart.length > 0
2828
+ ? [{ role: "system" as const, content: dynamicSystemPart }]
2829
+ : []),
2830
+ ...cachedMessages,
2831
+ ]
2832
+ : cachedMessages;
2786
2833
 
2787
2834
  const telemetryEnabled = this.loadedConfig?.telemetry?.enabled !== false;
2788
2835
 
2789
2836
 
2790
2837
  const result = await streamText({
2791
2838
  model: modelInstance,
2792
- system: systemPrompt,
2793
- messages: cachedMessages,
2839
+ ...(useStaticCache ? {} : { system: systemPrompt }),
2840
+ messages: finalMessages,
2794
2841
  tools,
2795
2842
  temperature,
2796
2843
  abortSignal: input.abortSignal,
@@ -3532,7 +3579,9 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
3532
3579
  agent = this.parsedAgent as ParsedAgent;
3533
3580
  const currentFingerprint = `${this.agentFileFingerprint}\n${this.skillFingerprint}`;
3534
3581
  if (currentFingerprint !== lastPromptFingerprint) {
3535
- systemPrompt = await buildSystemPrompt();
3582
+ ({ staticPart: staticSystemPart, dynamicPart: dynamicSystemPart } =
3583
+ await buildSystemPromptParts());
3584
+ systemPrompt = `${staticSystemPart}${dynamicSystemPart}`;
3536
3585
  lastPromptFingerprint = currentFingerprint;
3537
3586
  }
3538
3587
  }
@@ -62,6 +62,12 @@ export interface RunConversationTurnOpts {
62
62
  parameters?: Record<string, unknown>;
63
63
  abortSignal?: AbortSignal;
64
64
  tenantId?: string | null;
65
+ /**
66
+ * Forwarded to `RunInput.disablePromptCache`. Set true for one-shot
67
+ * turns with no follow-up coming (cron-fired jobs, etc.) so the
68
+ * harness skips the Anthropic cache write.
69
+ */
70
+ disablePromptCache?: boolean;
65
71
  /** Per-event hook — called for every AgentEvent yielded by the run, in order. */
66
72
  onEvent?: (event: AgentEvent) => void | Promise<void>;
67
73
  }
@@ -203,6 +209,7 @@ export const runConversationTurn = async (
203
209
  messages: harnessMessages,
204
210
  files: opts.files && opts.files.length > 0 ? opts.files : undefined,
205
211
  abortSignal: opts.abortSignal,
212
+ disablePromptCache: opts.disablePromptCache,
206
213
  },
207
214
  initialContextTokens: conversation.contextTokens ?? 0,
208
215
  initialContextWindow: conversation.contextWindow ?? 0,
@@ -1,6 +1,6 @@
1
1
  import type { ModelMessage, LanguageModel } from "ai";
2
2
 
3
- function isAnthropicModel(model: LanguageModel): boolean {
3
+ export function isAnthropicModel(model: LanguageModel): boolean {
4
4
  if (typeof model === "string") {
5
5
  return model.includes("anthropic") || model.includes("claude");
6
6
  }