@poncho-ai/harness 0.45.0 → 0.47.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
 
2
- > @poncho-ai/harness@0.45.0 build /home/runner/work/poncho-ai/poncho-ai/packages/harness
2
+ > @poncho-ai/harness@0.47.0 build /home/runner/work/poncho-ai/poncho-ai/packages/harness
3
3
  > node scripts/embed-docs.js && tsup src/index.ts --format esm --dts
4
4
 
5
5
  [embed-docs] Generated poncho-docs.ts with 4 topics
@@ -9,8 +9,8 @@
9
9
  CLI Target: es2022
10
10
  ESM Build start
11
11
  ESM dist/isolate-VY35DGLM.js 49.43 KB
12
- ESM dist/index.js 524.35 KB
13
- ESM ⚡️ Build success in 230ms
12
+ ESM dist/index.js 525.35 KB
13
+ ESM ⚡️ Build success in 249ms
14
14
  DTS Build start
15
- DTS ⚡️ Build success in 7575ms
16
- DTS dist/index.d.ts 85.07 KB
15
+ DTS ⚡️ Build success in 7482ms
16
+ DTS dist/index.d.ts 85.30 KB
package/CHANGELOG.md CHANGED
@@ -1,5 +1,68 @@
1
1
  # @poncho-ai/harness
2
2
 
3
+ ## 0.47.0
4
+
5
+ ### Minor Changes
6
+
7
+ - [#120](https://github.com/cesr/poncho-ai/pull/120) [`6cda4ab`](https://github.com/cesr/poncho-ai/commit/6cda4ab39865d89590f42927e281c5fb58cc99f4) Thanks [@cesr](https://github.com/cesr)! - harness: always inject the current hour into the system prompt
8
+
9
+ The dynamic system-prompt builder now emits
10
+ `Current UTC time (hour precision): Mon 2026-05-20T09Z` on every run,
11
+ not just when a `reminderStore` is configured. Knowing "what day is it"
12
+ is universally useful — drafting messages, computing relative dates,
13
+ deciding whether a stale memory still applies — and isn't specific to
14
+ reminder-firing logic.
15
+
16
+ Format also drops the zeroed-out minutes/seconds tail (`T09:00:00.000Z`
17
+ → `T09Z`) so the hour quantization is visible to the model rather than
18
+ hidden behind noise. The prompt-cache properties are unchanged: the
19
+ string is still hour-stable and lives in the dynamic prompt section, so
20
+ hourly rollovers don't bust the static cache breakpoint.
21
+
22
+ ## 0.46.0
23
+
24
+ ### Minor Changes
25
+
26
+ - [#118](https://github.com/cesr/poncho-ai/pull/118) [`e8df464`](https://github.com/cesr/poncho-ai/commit/e8df4649618cba0b408a6c143f923f0dcb2046c8) Thanks [@cesr](https://github.com/cesr)! - harness: 1h static system-prompt cache breakpoint + per-run cache kill-switch
27
+
28
+ Two related changes to Anthropic prompt caching:
29
+
30
+ **1-hour static system-prompt breakpoint.** The harness now splits the
31
+ assembled system prompt into a static portion (agent body + skill
32
+ context + browser/fs/isolate context — stable across many turns and
33
+ jobs within an hour) and a dynamic tail (memory, todos, time). On
34
+ Anthropic models, these are sent as two `role: "system"` messages with
35
+ `cacheControl: { ttl: "1h" }` on the static block. The existing 5-min
36
+ tail breakpoint on the last user/assistant/tool message is retained.
37
+
38
+ This lets later turns and job runs read ~95% of the system prompt at
39
+ 0.1× (cache read) instead of paying 1× whenever the 5-min tail cache
40
+ has expired — the previous setup only cached for 5 minutes via the
41
+ tail breakpoint. Within-user cross-conversation and interactive-vs-job
42
+ all share the static cache.
43
+
44
+ **Per-run cache kill-switch.** Added `RunInput.disablePromptCache?:
45
+ boolean` (also exposed on `RunConversationTurnOpts.disablePromptCache`,
46
+ forwarded into `runInput`). When set, the harness skips the 5-min tail
47
+ breakpoint for that run. The 1-hour static breakpoint is still
48
+ applied — the run still benefits from reading the shared static cache,
49
+ just doesn't write a new tail entry that won't be read before TTL.
50
+
51
+ Intended for one-shot programmatic invocations (cron-fired jobs,
52
+ subagent dispatch) where no follow-up turn is coming within the 5-min
53
+ TTL window, so the 1.25× write surcharge would be pure waste.
54
+
55
+ Non-Anthropic providers fall through to the previous single concatenated
56
+ `system:` string with no cache control — those providers auto-cache.
57
+
58
+ Internal: `isAnthropicModel` is now exported from `prompt-cache.ts`
59
+ for reuse at the streamText site.
60
+
61
+ ### Patch Changes
62
+
63
+ - Updated dependencies [[`e8df464`](https://github.com/cesr/poncho-ai/commit/e8df4649618cba0b408a6c143f923f0dcb2046c8)]:
64
+ - @poncho-ai/sdk@1.12.0
65
+
3
66
  ## 0.45.0
4
67
 
5
68
  ### Minor Changes
package/dist/index.d.ts CHANGED
@@ -2036,6 +2036,12 @@ interface RunConversationTurnOpts {
2036
2036
  parameters?: Record<string, unknown>;
2037
2037
  abortSignal?: AbortSignal;
2038
2038
  tenantId?: string | null;
2039
+ /**
2040
+ * Forwarded to `RunInput.disablePromptCache`. Set true for one-shot
2041
+ * turns with no follow-up coming (cron-fired jobs, etc.) so the
2042
+ * harness skips the Anthropic cache write.
2043
+ */
2044
+ disablePromptCache?: boolean;
2039
2045
  /** Per-event hook — called for every AgentEvent yielded by the run, in order. */
2040
2046
  onEvent?: (event: AgentEvent) => void | Promise<void>;
2041
2047
  }
package/dist/index.js CHANGED
@@ -10129,10 +10129,13 @@ var AgentHarness = class _AgentHarness {
10129
10129
  );
10130
10130
  }
10131
10131
  const hasFullToolResults = hasUntruncatedToolResults(messages);
10132
- if (hasFullToolResults) {
10133
- costLog.debug(`cache breakpoint before untruncated tool results (run=${runId.slice(0, 12)})`);
10132
+ const skipTailCache = input.disablePromptCache === true;
10133
+ if (skipTailCache) {
10134
+ costLog.debug(`tail cache breakpoint skipped \u2014 disablePromptCache (run=${runId.slice(0, 12)})`);
10135
+ } else if (hasFullToolResults) {
10136
+ costLog.debug(`tail cache breakpoint before untruncated tool results (run=${runId.slice(0, 12)})`);
10134
10137
  } else {
10135
- costLog.debug(`cache breakpoint at history tail (run=${runId.slice(0, 12)})`);
10138
+ costLog.debug(`tail cache breakpoint at history tail (run=${runId.slice(0, 12)})`);
10136
10139
  }
10137
10140
  const inputMessageCount = messages.length;
10138
10141
  const events = [];
@@ -10221,25 +10224,26 @@ ${typeStubs}
10221
10224
 
10222
10225
  Code is wrapped in an async IIFE \u2014 use \`return\` to return a value to the tool result.`;
10223
10226
  }
10224
- const buildSystemPrompt = async () => {
10227
+ const buildSystemPromptParts = async () => {
10225
10228
  const agentPrompt = renderCurrentAgentPrompt();
10226
10229
  const tenantSkills = await this.getSkillsForTenant(input.tenantId);
10227
10230
  const skillContextWindow = buildSkillContextWindow(tenantSkills);
10228
- const promptWithSkills = skillContextWindow ? `${agentPrompt}${developmentContext}
10231
+ const staticPart = skillContextWindow ? `${agentPrompt}${developmentContext}
10229
10232
 
10230
10233
  ${skillContextWindow}${browserContext}${fsContext}${isolateContext}` : `${agentPrompt}${developmentContext}${browserContext}${fsContext}${isolateContext}`;
10231
10234
  const hourlyTime = (() => {
10232
10235
  const d = /* @__PURE__ */ new Date();
10233
- d.setUTCMinutes(0, 0, 0);
10234
10236
  const weekday = ["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"][d.getUTCDay()];
10235
- return `${weekday} ${d.toISOString()}`;
10237
+ return `${weekday} ${d.toISOString().slice(0, 13)}Z`;
10236
10238
  })();
10237
- const timeContext = this.reminderStore ? `
10239
+ const timeContext = `
10238
10240
 
10239
- Current UTC time (hour precision): ${hourlyTime}` : "";
10240
- return `${promptWithSkills}${memoryContext}${todoContext}${timeContext}`;
10241
+ Current UTC time (hour precision): ${hourlyTime}`;
10242
+ const dynamicPart = `${memoryContext}${todoContext}${timeContext}`;
10243
+ return { staticPart, dynamicPart };
10241
10244
  };
10242
- let systemPrompt = await buildSystemPrompt();
10245
+ let { staticPart: staticSystemPart, dynamicPart: dynamicSystemPart } = await buildSystemPromptParts();
10246
+ let systemPrompt = `${staticSystemPart}${dynamicSystemPart}`;
10243
10247
  let lastPromptFingerprint = `${this.agentFileFingerprint}
10244
10248
  ${this.skillFingerprint}`;
10245
10249
  const pushEvent = (event) => {
@@ -10673,17 +10677,28 @@ ${textContent}` };
10673
10677
  const coreMessages = cachedCoreMessages;
10674
10678
  const temperature = agent.frontmatter.model?.temperature ?? 0.2;
10675
10679
  const maxTokens = agent.frontmatter.model?.maxTokens;
10676
- const breakpointIndex = hasFullToolResults ? findLastStableCacheIndex(coreMessages) : coreMessages.length - 1;
10677
- const cachedMessages = addPromptCacheBreakpoints(
10680
+ const cachedMessages = skipTailCache ? coreMessages : addPromptCacheBreakpoints(
10678
10681
  coreMessages,
10679
10682
  modelInstance,
10680
- breakpointIndex
10683
+ hasFullToolResults ? findLastStableCacheIndex(coreMessages) : coreMessages.length - 1
10681
10684
  );
10685
+ const useStaticCache = isAnthropicModel(modelInstance);
10686
+ const finalMessages = useStaticCache ? [
10687
+ {
10688
+ role: "system",
10689
+ content: staticSystemPart,
10690
+ providerOptions: {
10691
+ anthropic: { cacheControl: { type: "ephemeral", ttl: "1h" } }
10692
+ }
10693
+ },
10694
+ ...dynamicSystemPart.length > 0 ? [{ role: "system", content: dynamicSystemPart }] : [],
10695
+ ...cachedMessages
10696
+ ] : cachedMessages;
10682
10697
  const telemetryEnabled = this.loadedConfig?.telemetry?.enabled !== false;
10683
10698
  const result = await streamText({
10684
10699
  model: modelInstance,
10685
- system: systemPrompt,
10686
- messages: cachedMessages,
10700
+ ...useStaticCache ? {} : { system: systemPrompt },
10701
+ messages: finalMessages,
10687
10702
  tools,
10688
10703
  temperature,
10689
10704
  abortSignal: input.abortSignal,
@@ -11308,7 +11323,8 @@ ${textContent}` };
11308
11323
  const currentFingerprint = `${this.agentFileFingerprint}
11309
11324
  ${this.skillFingerprint}`;
11310
11325
  if (currentFingerprint !== lastPromptFingerprint) {
11311
- systemPrompt = await buildSystemPrompt();
11326
+ ({ staticPart: staticSystemPart, dynamicPart: dynamicSystemPart } = await buildSystemPromptParts());
11327
+ systemPrompt = `${staticSystemPart}${dynamicSystemPart}`;
11312
11328
  lastPromptFingerprint = currentFingerprint;
11313
11329
  }
11314
11330
  }
@@ -13577,7 +13593,8 @@ var runConversationTurn = async (opts) => {
13577
13593
  ),
13578
13594
  messages: harnessMessages,
13579
13595
  files: opts.files && opts.files.length > 0 ? opts.files : void 0,
13580
- abortSignal: opts.abortSignal
13596
+ abortSignal: opts.abortSignal,
13597
+ disablePromptCache: opts.disablePromptCache
13581
13598
  },
13582
13599
  initialContextTokens: conversation.contextTokens ?? 0,
13583
13600
  initialContextWindow: conversation.contextWindow ?? 0,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@poncho-ai/harness",
3
- "version": "0.45.0",
3
+ "version": "0.47.0",
4
4
  "description": "Agent execution runtime - conversation loop, tool dispatch, streaming",
5
5
  "repository": {
6
6
  "type": "git",
@@ -34,7 +34,7 @@
34
34
  "mustache": "^4.2.0",
35
35
  "yaml": "^2.4.0",
36
36
  "zod": "^3.22.0",
37
- "@poncho-ai/sdk": "1.11.0"
37
+ "@poncho-ai/sdk": "1.12.0"
38
38
  },
39
39
  "peerDependencies": {
40
40
  "esbuild": ">=0.17.0",
package/src/harness.ts CHANGED
@@ -59,7 +59,7 @@ import {
59
59
  mergeSkills,
60
60
  } from "./skill-context.js";
61
61
  import { generateText, streamText, type ModelMessage } from "ai";
62
- import { addPromptCacheBreakpoints } from "./prompt-cache.js";
62
+ import { addPromptCacheBreakpoints, isAnthropicModel } from "./prompt-cache.js";
63
63
  import { jsonSchemaToZod } from "./schema-converter.js";
64
64
  import type { SkillMetadata } from "./skill-context.js";
65
65
  import { createSkillTools, normalizeScriptPolicyPath } from "./skill-tools.js";
@@ -2104,10 +2104,17 @@ export class AgentHarness {
2104
2104
  );
2105
2105
  }
2106
2106
  const hasFullToolResults = hasUntruncatedToolResults(messages);
2107
- if (hasFullToolResults) {
2108
- costLog.debug(`cache breakpoint before untruncated tool results (run=${runId.slice(0, 12)})`);
2107
+ // The 5-min tail breakpoint is skipped only when the caller explicitly
2108
+ // declares no follow-up is coming (jobs, programmatic one-shots). The
2109
+ // 1-hour static breakpoint on the system prompt is always on — it
2110
+ // amortizes across every later turn or job within the hour.
2111
+ const skipTailCache = input.disablePromptCache === true;
2112
+ if (skipTailCache) {
2113
+ costLog.debug(`tail cache breakpoint skipped — disablePromptCache (run=${runId.slice(0, 12)})`);
2114
+ } else if (hasFullToolResults) {
2115
+ costLog.debug(`tail cache breakpoint before untruncated tool results (run=${runId.slice(0, 12)})`);
2109
2116
  } else {
2110
- costLog.debug(`cache breakpoint at history tail (run=${runId.slice(0, 12)})`);
2117
+ costLog.debug(`tail cache breakpoint at history tail (run=${runId.slice(0, 12)})`);
2111
2118
  }
2112
2119
  const inputMessageCount = messages.length;
2113
2120
  const events: AgentEvent[] = [];
@@ -2210,29 +2217,40 @@ ${typeStubs}
2210
2217
  Code is wrapped in an async IIFE — use \`return\` to return a value to the tool result.`;
2211
2218
  }
2212
2219
 
2213
- const buildSystemPrompt = async (): Promise<string> => {
2220
+ // Split the system prompt into a static portion (stable across turns
2221
+ // and jobs within an hour, modulo MCP connect/skill author/memory edit)
2222
+ // and a dynamic tail (memory, todos, time). The static portion gets a
2223
+ // 1-hour Anthropic cache breakpoint downstream; the tail rides the
2224
+ // existing 5-min message-level breakpoint. See the streamText site for
2225
+ // the breakpoint wiring.
2226
+ const buildSystemPromptParts = async (): Promise<{ staticPart: string; dynamicPart: string }> => {
2214
2227
  const agentPrompt = renderCurrentAgentPrompt();
2215
2228
  const tenantSkills = await this.getSkillsForTenant(input.tenantId);
2216
2229
  const skillContextWindow = buildSkillContextWindow(tenantSkills);
2217
- const promptWithSkills = skillContextWindow
2230
+ const staticPart = skillContextWindow
2218
2231
  ? `${agentPrompt}${developmentContext}\n\n${skillContextWindow}${browserContext}${fsContext}${isolateContext}`
2219
2232
  : `${agentPrompt}${developmentContext}${browserContext}${fsContext}${isolateContext}`;
2220
2233
  // Quantize to the hour so the system prompt is stable across runs
2221
2234
  // within the same hour. Including a per-millisecond timestamp would
2222
2235
  // invalidate the prompt cache on every run, since the system prompt
2223
- // is the first block the cache tries to match.
2236
+ // is the first block the cache tries to match. Format is
2237
+ // `Weekday YYYY-MM-DDTHHZ` — minutes/seconds dropped to make the
2238
+ // hour-quantization visible to the model rather than hidden behind
2239
+ // a zeroed-out tail. Always emitted: every agent needs to know
2240
+ // "what day is it" even without reminders configured.
2224
2241
  const hourlyTime = (() => {
2225
2242
  const d = new Date();
2226
- d.setUTCMinutes(0, 0, 0);
2227
2243
  const weekday = ["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"][d.getUTCDay()];
2228
- return `${weekday} ${d.toISOString()}`;
2244
+ return `${weekday} ${d.toISOString().slice(0, 13)}Z`;
2229
2245
  })();
2230
- const timeContext = this.reminderStore
2231
- ? `\n\nCurrent UTC time (hour precision): ${hourlyTime}`
2232
- : "";
2233
- return `${promptWithSkills}${memoryContext}${todoContext}${timeContext}`;
2246
+ const timeContext = `\n\nCurrent UTC time (hour precision): ${hourlyTime}`;
2247
+ const dynamicPart = `${memoryContext}${todoContext}${timeContext}`;
2248
+ return { staticPart, dynamicPart };
2234
2249
  };
2235
- let systemPrompt = await buildSystemPrompt();
2250
+ let { staticPart: staticSystemPart, dynamicPart: dynamicSystemPart } =
2251
+ await buildSystemPromptParts();
2252
+ // Concatenated form for legacy consumers (token estimation, telemetry).
2253
+ let systemPrompt = `${staticSystemPart}${dynamicSystemPart}`;
2236
2254
  let lastPromptFingerprint = `${this.agentFileFingerprint}\n${this.skillFingerprint}`;
2237
2255
 
2238
2256
  const pushEvent = (event: AgentEvent): AgentEvent => {
@@ -2772,25 +2790,55 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
2772
2790
 
2773
2791
  const temperature = agent.frontmatter.model?.temperature ?? 0.2;
2774
2792
  const maxTokens = agent.frontmatter.model?.maxTokens;
2775
- // Place the breakpoint before any untruncated tool-result so we
2776
- // cache only the stable prefix when prior-run tool results are
2777
- // still full-fidelity. Otherwise cache at the history tail.
2778
- const breakpointIndex = hasFullToolResults
2779
- ? findLastStableCacheIndex(coreMessages)
2780
- : coreMessages.length - 1;
2781
- const cachedMessages = addPromptCacheBreakpoints(
2782
- coreMessages,
2783
- modelInstance,
2784
- breakpointIndex,
2785
- );
2793
+ // Place the tail breakpoint before any untruncated tool-result so
2794
+ // we cache only the stable prefix when prior-run tool results are
2795
+ // still full-fidelity. Otherwise cache at the history tail. When
2796
+ // `skipTailCache` is set (per-run override), don't write the tail
2797
+ // breakpoint at all. The 1-hour static-prefix breakpoint is added
2798
+ // separately when assembling the final messages array.
2799
+ const cachedMessages = skipTailCache
2800
+ ? coreMessages
2801
+ : addPromptCacheBreakpoints(
2802
+ coreMessages,
2803
+ modelInstance,
2804
+ hasFullToolResults
2805
+ ? findLastStableCacheIndex(coreMessages)
2806
+ : coreMessages.length - 1,
2807
+ );
2808
+
2809
+ // Anthropic: split system into two blocks with a 1-hour cache
2810
+ // breakpoint at the boundary between the static portion (agent
2811
+ // body + skills + browser/fs/isolate context — stable across many
2812
+ // turns and jobs) and the dynamic tail (memory, todos, time).
2813
+ // The static block becomes a hot cache that every later turn and
2814
+ // job in the hour reads at 0.1× — much bigger payoff than the
2815
+ // 5-min tail breakpoint, which only survives active back-and-forth.
2816
+ // For non-Anthropic models, fall back to the single concatenated
2817
+ // string via `system:` — those providers auto-cache.
2818
+ const useStaticCache = isAnthropicModel(modelInstance);
2819
+ const finalMessages: ModelMessage[] = useStaticCache
2820
+ ? [
2821
+ {
2822
+ role: "system",
2823
+ content: staticSystemPart,
2824
+ providerOptions: {
2825
+ anthropic: { cacheControl: { type: "ephemeral", ttl: "1h" } },
2826
+ },
2827
+ },
2828
+ ...(dynamicSystemPart.length > 0
2829
+ ? [{ role: "system" as const, content: dynamicSystemPart }]
2830
+ : []),
2831
+ ...cachedMessages,
2832
+ ]
2833
+ : cachedMessages;
2786
2834
 
2787
2835
  const telemetryEnabled = this.loadedConfig?.telemetry?.enabled !== false;
2788
2836
 
2789
2837
 
2790
2838
  const result = await streamText({
2791
2839
  model: modelInstance,
2792
- system: systemPrompt,
2793
- messages: cachedMessages,
2840
+ ...(useStaticCache ? {} : { system: systemPrompt }),
2841
+ messages: finalMessages,
2794
2842
  tools,
2795
2843
  temperature,
2796
2844
  abortSignal: input.abortSignal,
@@ -3532,7 +3580,9 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
3532
3580
  agent = this.parsedAgent as ParsedAgent;
3533
3581
  const currentFingerprint = `${this.agentFileFingerprint}\n${this.skillFingerprint}`;
3534
3582
  if (currentFingerprint !== lastPromptFingerprint) {
3535
- systemPrompt = await buildSystemPrompt();
3583
+ ({ staticPart: staticSystemPart, dynamicPart: dynamicSystemPart } =
3584
+ await buildSystemPromptParts());
3585
+ systemPrompt = `${staticSystemPart}${dynamicSystemPart}`;
3536
3586
  lastPromptFingerprint = currentFingerprint;
3537
3587
  }
3538
3588
  }
@@ -62,6 +62,12 @@ export interface RunConversationTurnOpts {
62
62
  parameters?: Record<string, unknown>;
63
63
  abortSignal?: AbortSignal;
64
64
  tenantId?: string | null;
65
+ /**
66
+ * Forwarded to `RunInput.disablePromptCache`. Set true for one-shot
67
+ * turns with no follow-up coming (cron-fired jobs, etc.) so the
68
+ * harness skips the Anthropic cache write.
69
+ */
70
+ disablePromptCache?: boolean;
65
71
  /** Per-event hook — called for every AgentEvent yielded by the run, in order. */
66
72
  onEvent?: (event: AgentEvent) => void | Promise<void>;
67
73
  }
@@ -203,6 +209,7 @@ export const runConversationTurn = async (
203
209
  messages: harnessMessages,
204
210
  files: opts.files && opts.files.length > 0 ? opts.files : undefined,
205
211
  abortSignal: opts.abortSignal,
212
+ disablePromptCache: opts.disablePromptCache,
206
213
  },
207
214
  initialContextTokens: conversation.contextTokens ?? 0,
208
215
  initialContextWindow: conversation.contextWindow ?? 0,
@@ -1,6 +1,6 @@
1
1
  import type { ModelMessage, LanguageModel } from "ai";
2
2
 
3
- function isAnthropicModel(model: LanguageModel): boolean {
3
+ export function isAnthropicModel(model: LanguageModel): boolean {
4
4
  if (typeof model === "string") {
5
5
  return model.includes("anthropic") || model.includes("claude");
6
6
  }