@poncho-ai/harness 0.37.1 → 0.37.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
 
2
- > @poncho-ai/harness@0.37.1 build /home/runner/work/poncho-ai/poncho-ai/packages/harness
2
+ > @poncho-ai/harness@0.37.2 build /home/runner/work/poncho-ai/poncho-ai/packages/harness
3
3
  > node scripts/embed-docs.js && tsup src/index.ts --format esm --dts
4
4
 
5
5
  [embed-docs] Generated poncho-docs.ts with 4 topics
@@ -8,9 +8,9 @@
8
8
  CLI tsup v8.5.1
9
9
  CLI Target: es2022
10
10
  ESM Build start
11
- ESM dist/index.js 389.90 KB
11
+ ESM dist/index.js 390.92 KB
12
12
  ESM dist/isolate-TCWTUVG4.js 47.34 KB
13
- ESM ⚡️ Build success in 211ms
13
+ ESM ⚡️ Build success in 247ms
14
14
  DTS Build start
15
- DTS ⚡️ Build success in 6845ms
15
+ DTS ⚡️ Build success in 7644ms
16
16
  DTS dist/index.d.ts 56.62 KB
package/CHANGELOG.md CHANGED
@@ -1,5 +1,17 @@
1
1
  # @poncho-ai/harness
2
2
 
3
+ ## 0.37.2
4
+
5
+ ### Patch Changes
6
+
7
+ - [`2229f74`](https://github.com/cesr/poncho-ai/commit/2229f74ae4d02c5618c60787a7db925060cc1313) Thanks [@cesr](https://github.com/cesr)! - fix: stop invalidating the prompt cache across runs and preserve cache reads when tool results are in flight.
8
+
9
+ Two issues were degrading prompt-cache hit rates to ~0 between turns:
10
+ 1. The system prompt embedded `new Date().toISOString()` (millisecond precision) on every run when a reminder store was active, which changed the very first block of the prefix and prevented any cross-run cache match. The timestamp is now quantized to the hour, which keeps the system prompt stable across runs while still giving the agent a usable sense of time.
11
+ 2. When the message history contained untruncated tool results from the previous run, prompt caching was disabled entirely — no `cache_control` breakpoint was emitted, which also killed cache _reads_ of the stable prefix (system prompt + earlier turns). The breakpoint is now placed immediately before the first untruncated tool result instead, so the stable prefix is still cached and read while the soon-to-be-truncated tail stays out of the cache.
12
+
13
+ `addPromptCacheBreakpoints` now takes an optional `targetIndex` to support this.
14
+
3
15
  ## 0.37.1
4
16
 
5
17
  ### Patch Changes
package/dist/index.js CHANGED
@@ -6659,15 +6659,19 @@ function isAnthropicModel(model) {
6659
6659
  }
6660
6660
  return model.provider === "anthropic" || model.provider.includes("anthropic") || model.modelId.includes("anthropic") || model.modelId.includes("claude");
6661
6661
  }
6662
- function addPromptCacheBreakpoints(messages, model) {
6662
+ function addPromptCacheBreakpoints(messages, model, targetIndex) {
6663
6663
  if (messages.length === 0 || !isAnthropicModel(model)) {
6664
6664
  return messages;
6665
6665
  }
6666
+ const index = targetIndex ?? messages.length - 1;
6667
+ if (index < 0 || index >= messages.length) {
6668
+ return messages;
6669
+ }
6666
6670
  const cacheDirective = {
6667
6671
  anthropic: { cacheControl: { type: "ephemeral" } }
6668
6672
  };
6669
- return messages.map((message, index) => {
6670
- if (index === messages.length - 1) {
6673
+ return messages.map((message, i) => {
6674
+ if (i === index) {
6671
6675
  return {
6672
6676
  ...message,
6673
6677
  providerOptions: {
@@ -7800,6 +7804,25 @@ var hasUntruncatedToolResults = (messages) => {
7800
7804
  }
7801
7805
  return false;
7802
7806
  };
7807
+ var findLastStableCacheIndex = (messages) => {
7808
+ for (let i = 0; i < messages.length; i += 1) {
7809
+ const msg = messages[i];
7810
+ if (msg.role !== "tool") continue;
7811
+ if (!Array.isArray(msg.content)) continue;
7812
+ for (const part of msg.content) {
7813
+ if (!part || typeof part !== "object") continue;
7814
+ const p = part;
7815
+ if (p.type !== "tool-result" || !p.output) continue;
7816
+ if (p.output.type === "json") return i - 1;
7817
+ if (p.output.type === "text" && typeof p.output.value === "string") {
7818
+ if (!p.output.value.startsWith(TOOL_RESULT_TRUNCATED_PREFIX)) {
7819
+ return i - 1;
7820
+ }
7821
+ }
7822
+ }
7823
+ }
7824
+ return messages.length - 1;
7825
+ };
7803
7826
  var DEVELOPMENT_MODE_CONTEXT = `## Development Mode Context
7804
7827
 
7805
7828
  You are running locally in development mode. Treat this as an editable agent workspace.
@@ -9072,14 +9095,13 @@ var AgentHarness = class _AgentHarness {
9072
9095
  );
9073
9096
  }
9074
9097
  const hasFullToolResults = hasUntruncatedToolResults(messages);
9075
- const enablePromptCache = !hasFullToolResults;
9076
- if (!enablePromptCache) {
9098
+ if (hasFullToolResults) {
9077
9099
  console.info(
9078
- `[poncho][cost] Prompt cache write disabled for run "${runId}" (untruncated tool results present in history).`
9100
+ `[poncho][cost] Prompt cache breakpoint will be placed before untruncated tool results for run "${runId}" (stable prefix only).`
9079
9101
  );
9080
9102
  } else {
9081
9103
  console.info(
9082
- `[poncho][cost] Prompt cache write enabled for run "${runId}" (history has no untruncated tool results).`
9104
+ `[poncho][cost] Prompt cache breakpoint will be placed at history tail for run "${runId}" (no untruncated tool results).`
9083
9105
  );
9084
9106
  }
9085
9107
  const inputMessageCount = messages.length;
@@ -9174,9 +9196,14 @@ Code is wrapped in an async IIFE \u2014 use \`return\` to return a value to the
9174
9196
  const promptWithSkills = this.skillContextWindow ? `${agentPrompt}${developmentContext}
9175
9197
 
9176
9198
  ${this.skillContextWindow}${browserContext}${fsContext}${isolateContext}` : `${agentPrompt}${developmentContext}${browserContext}${fsContext}${isolateContext}`;
9199
+ const hourlyTime = (() => {
9200
+ const d = /* @__PURE__ */ new Date();
9201
+ d.setUTCMinutes(0, 0, 0);
9202
+ return d.toISOString();
9203
+ })();
9177
9204
  const timeContext = this.reminderStore ? `
9178
9205
 
9179
- Current UTC time: ${(/* @__PURE__ */ new Date()).toISOString()}` : "";
9206
+ Current UTC time (hour precision): ${hourlyTime}` : "";
9180
9207
  return `${promptWithSkills}${memoryContext}${todoContext}${timeContext}`;
9181
9208
  };
9182
9209
  let systemPrompt = buildSystemPrompt();
@@ -9615,7 +9642,12 @@ ${textContent}` };
9615
9642
  const coreMessages = cachedCoreMessages;
9616
9643
  const temperature = agent.frontmatter.model?.temperature ?? 0.2;
9617
9644
  const maxTokens = agent.frontmatter.model?.maxTokens;
9618
- const cachedMessages = enablePromptCache ? addPromptCacheBreakpoints(coreMessages, modelInstance) : coreMessages;
9645
+ const breakpointIndex = hasFullToolResults ? findLastStableCacheIndex(coreMessages) : coreMessages.length - 1;
9646
+ const cachedMessages = addPromptCacheBreakpoints(
9647
+ coreMessages,
9648
+ modelInstance,
9649
+ breakpointIndex
9650
+ );
9619
9651
  const telemetryEnabled = this.loadedConfig?.telemetry?.enabled !== false;
9620
9652
  const result = await streamText({
9621
9653
  model: modelInstance,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@poncho-ai/harness",
3
- "version": "0.37.1",
3
+ "version": "0.37.2",
4
4
  "description": "Agent execution runtime - conversation loop, tool dispatch, streaming",
5
5
  "repository": {
6
6
  "type": "git",
package/src/harness.ts CHANGED
@@ -333,6 +333,39 @@ const hasUntruncatedToolResults = (messages: Message[]): boolean => {
333
333
  return false;
334
334
  };
335
335
 
336
+ /**
337
+ * Finds the last ModelMessage index that's safe to place a prompt cache
338
+ * breakpoint at — i.e. the last index before any untruncated tool-result.
339
+ *
340
+ * Untruncated tool-results from a prior run will be truncated on the next
341
+ * run, which would invalidate any cache write covering them. Placing the
342
+ * breakpoint just before them lets us cache only the stable prefix (system
343
+ * prompt + earlier turns) while still reading it back next turn.
344
+ *
345
+ * Returns `messages.length - 1` when there are no untruncated tool-results
346
+ * (normal tail-of-history caching).
347
+ */
348
+ const findLastStableCacheIndex = (messages: ModelMessage[]): number => {
349
+ for (let i = 0; i < messages.length; i += 1) {
350
+ const msg = messages[i]!;
351
+ if (msg.role !== "tool") continue;
352
+ if (!Array.isArray(msg.content)) continue;
353
+ for (const part of msg.content) {
354
+ if (!part || typeof part !== "object") continue;
355
+ const p = part as { type?: string; output?: { type?: string; value?: unknown } };
356
+ if (p.type !== "tool-result" || !p.output) continue;
357
+ // JSON outputs bypass truncation (only text content is truncated).
358
+ if (p.output.type === "json") return i - 1;
359
+ if (p.output.type === "text" && typeof p.output.value === "string") {
360
+ if (!p.output.value.startsWith(TOOL_RESULT_TRUNCATED_PREFIX)) {
361
+ return i - 1;
362
+ }
363
+ }
364
+ }
365
+ }
366
+ return messages.length - 1;
367
+ };
368
+
336
369
  const DEVELOPMENT_MODE_CONTEXT = `## Development Mode Context
337
370
 
338
371
  You are running locally in development mode. Treat this as an editable agent workspace.
@@ -1799,16 +1832,15 @@ export class AgentHarness {
1799
1832
  );
1800
1833
  }
1801
1834
  const hasFullToolResults = hasUntruncatedToolResults(messages);
1802
- const enablePromptCache = !hasFullToolResults;
1803
- if (!enablePromptCache) {
1835
+ if (hasFullToolResults) {
1804
1836
  console.info(
1805
- `[poncho][cost] Prompt cache write disabled for run "${runId}" ` +
1806
- `(untruncated tool results present in history).`,
1837
+ `[poncho][cost] Prompt cache breakpoint will be placed before untruncated ` +
1838
+ `tool results for run "${runId}" (stable prefix only).`,
1807
1839
  );
1808
1840
  } else {
1809
1841
  console.info(
1810
- `[poncho][cost] Prompt cache write enabled for run "${runId}" ` +
1811
- `(history has no untruncated tool results).`,
1842
+ `[poncho][cost] Prompt cache breakpoint will be placed at history tail ` +
1843
+ `for run "${runId}" (no untruncated tool results).`,
1812
1844
  );
1813
1845
  }
1814
1846
  const inputMessageCount = messages.length;
@@ -1917,8 +1949,17 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
1917
1949
  const promptWithSkills = this.skillContextWindow
1918
1950
  ? `${agentPrompt}${developmentContext}\n\n${this.skillContextWindow}${browserContext}${fsContext}${isolateContext}`
1919
1951
  : `${agentPrompt}${developmentContext}${browserContext}${fsContext}${isolateContext}`;
1952
+ // Quantize to the hour so the system prompt is stable across runs
1953
+ // within the same hour. Including a per-millisecond timestamp would
1954
+ // invalidate the prompt cache on every run, since the system prompt
1955
+ // is the first block the cache tries to match.
1956
+ const hourlyTime = (() => {
1957
+ const d = new Date();
1958
+ d.setUTCMinutes(0, 0, 0);
1959
+ return d.toISOString();
1960
+ })();
1920
1961
  const timeContext = this.reminderStore
1921
- ? `\n\nCurrent UTC time: ${new Date().toISOString()}`
1962
+ ? `\n\nCurrent UTC time (hour precision): ${hourlyTime}`
1922
1963
  : "";
1923
1964
  return `${promptWithSkills}${memoryContext}${todoContext}${timeContext}`;
1924
1965
  };
@@ -2452,9 +2493,17 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
2452
2493
 
2453
2494
  const temperature = agent.frontmatter.model?.temperature ?? 0.2;
2454
2495
  const maxTokens = agent.frontmatter.model?.maxTokens;
2455
- const cachedMessages = enablePromptCache
2456
- ? addPromptCacheBreakpoints(coreMessages, modelInstance)
2457
- : coreMessages;
2496
+ // Place the breakpoint before any untruncated tool-result so we
2497
+ // cache only the stable prefix when prior-run tool results are
2498
+ // still full-fidelity. Otherwise cache at the history tail.
2499
+ const breakpointIndex = hasFullToolResults
2500
+ ? findLastStableCacheIndex(coreMessages)
2501
+ : coreMessages.length - 1;
2502
+ const cachedMessages = addPromptCacheBreakpoints(
2503
+ coreMessages,
2504
+ modelInstance,
2505
+ breakpointIndex,
2506
+ );
2458
2507
 
2459
2508
  const telemetryEnabled = this.loadedConfig?.telemetry?.enabled !== false;
2460
2509
 
@@ -17,23 +17,32 @@ function isAnthropicModel(model: LanguageModel): boolean {
17
17
  * explicit opt-in (Anthropic). For providers with automatic caching
18
18
  * (OpenAI), messages are returned unchanged.
19
19
  *
20
- * For Anthropic, marks the last message with ephemeral cache control so the
21
- * conversation prefix is incrementally cached across steps.
20
+ * For Anthropic, marks the target message with ephemeral cache control so
21
+ * the conversation prefix is incrementally cached across steps. When
22
+ * `targetIndex` is omitted, the last message is used (default behavior).
23
+ * Callers that want to cache only a stable prefix (e.g. skipping tool
24
+ * results that will be truncated next turn) can pass an earlier index.
22
25
  */
23
26
  export function addPromptCacheBreakpoints(
24
27
  messages: ModelMessage[],
25
28
  model: LanguageModel,
29
+ targetIndex?: number,
26
30
  ): ModelMessage[] {
27
31
  if (messages.length === 0 || !isAnthropicModel(model)) {
28
32
  return messages;
29
33
  }
30
34
 
35
+ const index = targetIndex ?? messages.length - 1;
36
+ if (index < 0 || index >= messages.length) {
37
+ return messages;
38
+ }
39
+
31
40
  const cacheDirective = {
32
41
  anthropic: { cacheControl: { type: "ephemeral" as const } },
33
42
  };
34
43
 
35
- return messages.map((message, index) => {
36
- if (index === messages.length - 1) {
44
+ return messages.map((message, i) => {
45
+ if (i === index) {
37
46
  return {
38
47
  ...message,
39
48
  providerOptions: {
@@ -617,7 +617,7 @@ description: Safe skill
617
617
  script: "../outside.ts",
618
618
  }, stubContext);
619
619
  expect(result).toMatchObject({
620
- error: expect.stringContaining("must be relative and within the allowed directory"),
620
+ error: expect.stringContaining("Expected a relative path"),
621
621
  });
622
622
  });
623
623