@poncho-ai/harness 0.37.0 → 0.37.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
 
2
- > @poncho-ai/harness@0.37.0 build /home/runner/work/poncho-ai/poncho-ai/packages/harness
2
+ > @poncho-ai/harness@0.37.2 build /home/runner/work/poncho-ai/poncho-ai/packages/harness
3
3
  > node scripts/embed-docs.js && tsup src/index.ts --format esm --dts
4
4
 
5
5
  [embed-docs] Generated poncho-docs.ts with 4 topics
@@ -8,9 +8,9 @@
8
8
  CLI tsup v8.5.1
9
9
  CLI Target: es2022
10
10
  ESM Build start
11
- ESM dist/index.js 389.90 KB
11
+ ESM dist/index.js 390.92 KB
12
12
  ESM dist/isolate-TCWTUVG4.js 47.34 KB
13
- ESM ⚡️ Build success in 206ms
13
+ ESM ⚡️ Build success in 247ms
14
14
  DTS Build start
15
- DTS ⚡️ Build success in 7213ms
15
+ DTS ⚡️ Build success in 7644ms
16
16
  DTS dist/index.d.ts 56.62 KB
package/CHANGELOG.md CHANGED
@@ -1,5 +1,23 @@
1
1
  # @poncho-ai/harness
2
2
 
3
+ ## 0.37.2
4
+
5
+ ### Patch Changes
6
+
7
+ - [`2229f74`](https://github.com/cesr/poncho-ai/commit/2229f74ae4d02c5618c60787a7db925060cc1313) Thanks [@cesr](https://github.com/cesr)! - fix: stop invalidating the prompt cache across runs and preserve cache reads when tool results are in flight.
8
+
9
+ Two issues were degrading prompt-cache hit rates to ~0 between turns:
10
+ 1. The system prompt embedded `new Date().toISOString()` (millisecond precision) on every run when a reminder store was active, which changed the very first block of the prefix and prevented any cross-run cache match. The timestamp is now quantized to the hour, which keeps the system prompt stable across runs while still giving the agent a usable sense of time.
11
+ 2. When the message history contained untruncated tool results from the previous run, prompt caching was disabled entirely — no `cache_control` breakpoint was emitted, which also killed cache _reads_ of the stable prefix (system prompt + earlier turns). The breakpoint is now placed immediately before the first untruncated tool result instead, so the stable prefix is still cached and read while the soon-to-be-truncated tail stays out of the cache.
12
+
13
+ `addPromptCacheBreakpoints` now takes an optional `targetIndex` to support this.
14
+
15
+ ## 0.37.1
16
+
17
+ ### Patch Changes
18
+
19
+ - [`fb61a62`](https://github.com/cesr/poncho-ai/commit/fb61a6259367f0a62d0acd7a20ef2fae93013819) Thanks [@cesr](https://github.com/cesr)! - fix: migration script now discovers and migrates all agent directories instead of only the first one
20
+
3
21
  ## 0.37.0
4
22
 
5
23
  ### Minor Changes
package/dist/index.js CHANGED
@@ -6659,15 +6659,19 @@ function isAnthropicModel(model) {
6659
6659
  }
6660
6660
  return model.provider === "anthropic" || model.provider.includes("anthropic") || model.modelId.includes("anthropic") || model.modelId.includes("claude");
6661
6661
  }
6662
- function addPromptCacheBreakpoints(messages, model) {
6662
+ function addPromptCacheBreakpoints(messages, model, targetIndex) {
6663
6663
  if (messages.length === 0 || !isAnthropicModel(model)) {
6664
6664
  return messages;
6665
6665
  }
6666
+ const index = targetIndex ?? messages.length - 1;
6667
+ if (index < 0 || index >= messages.length) {
6668
+ return messages;
6669
+ }
6666
6670
  const cacheDirective = {
6667
6671
  anthropic: { cacheControl: { type: "ephemeral" } }
6668
6672
  };
6669
- return messages.map((message, index) => {
6670
- if (index === messages.length - 1) {
6673
+ return messages.map((message, i) => {
6674
+ if (i === index) {
6671
6675
  return {
6672
6676
  ...message,
6673
6677
  providerOptions: {
@@ -7800,6 +7804,25 @@ var hasUntruncatedToolResults = (messages) => {
7800
7804
  }
7801
7805
  return false;
7802
7806
  };
7807
+ var findLastStableCacheIndex = (messages) => {
7808
+ for (let i = 0; i < messages.length; i += 1) {
7809
+ const msg = messages[i];
7810
+ if (msg.role !== "tool") continue;
7811
+ if (!Array.isArray(msg.content)) continue;
7812
+ for (const part of msg.content) {
7813
+ if (!part || typeof part !== "object") continue;
7814
+ const p = part;
7815
+ if (p.type !== "tool-result" || !p.output) continue;
7816
+ if (p.output.type === "json") return i - 1;
7817
+ if (p.output.type === "text" && typeof p.output.value === "string") {
7818
+ if (!p.output.value.startsWith(TOOL_RESULT_TRUNCATED_PREFIX)) {
7819
+ return i - 1;
7820
+ }
7821
+ }
7822
+ }
7823
+ }
7824
+ return messages.length - 1;
7825
+ };
7803
7826
  var DEVELOPMENT_MODE_CONTEXT = `## Development Mode Context
7804
7827
 
7805
7828
  You are running locally in development mode. Treat this as an editable agent workspace.
@@ -9072,14 +9095,13 @@ var AgentHarness = class _AgentHarness {
9072
9095
  );
9073
9096
  }
9074
9097
  const hasFullToolResults = hasUntruncatedToolResults(messages);
9075
- const enablePromptCache = !hasFullToolResults;
9076
- if (!enablePromptCache) {
9098
+ if (hasFullToolResults) {
9077
9099
  console.info(
9078
- `[poncho][cost] Prompt cache write disabled for run "${runId}" (untruncated tool results present in history).`
9100
+ `[poncho][cost] Prompt cache breakpoint will be placed before untruncated tool results for run "${runId}" (stable prefix only).`
9079
9101
  );
9080
9102
  } else {
9081
9103
  console.info(
9082
- `[poncho][cost] Prompt cache write enabled for run "${runId}" (history has no untruncated tool results).`
9104
+ `[poncho][cost] Prompt cache breakpoint will be placed at history tail for run "${runId}" (no untruncated tool results).`
9083
9105
  );
9084
9106
  }
9085
9107
  const inputMessageCount = messages.length;
@@ -9174,9 +9196,14 @@ Code is wrapped in an async IIFE \u2014 use \`return\` to return a value to the
9174
9196
  const promptWithSkills = this.skillContextWindow ? `${agentPrompt}${developmentContext}
9175
9197
 
9176
9198
  ${this.skillContextWindow}${browserContext}${fsContext}${isolateContext}` : `${agentPrompt}${developmentContext}${browserContext}${fsContext}${isolateContext}`;
9199
+ const hourlyTime = (() => {
9200
+ const d = /* @__PURE__ */ new Date();
9201
+ d.setUTCMinutes(0, 0, 0);
9202
+ return d.toISOString();
9203
+ })();
9177
9204
  const timeContext = this.reminderStore ? `
9178
9205
 
9179
- Current UTC time: ${(/* @__PURE__ */ new Date()).toISOString()}` : "";
9206
+ Current UTC time (hour precision): ${hourlyTime}` : "";
9180
9207
  return `${promptWithSkills}${memoryContext}${todoContext}${timeContext}`;
9181
9208
  };
9182
9209
  let systemPrompt = buildSystemPrompt();
@@ -9615,7 +9642,12 @@ ${textContent}` };
9615
9642
  const coreMessages = cachedCoreMessages;
9616
9643
  const temperature = agent.frontmatter.model?.temperature ?? 0.2;
9617
9644
  const maxTokens = agent.frontmatter.model?.maxTokens;
9618
- const cachedMessages = enablePromptCache ? addPromptCacheBreakpoints(coreMessages, modelInstance) : coreMessages;
9645
+ const breakpointIndex = hasFullToolResults ? findLastStableCacheIndex(coreMessages) : coreMessages.length - 1;
9646
+ const cachedMessages = addPromptCacheBreakpoints(
9647
+ coreMessages,
9648
+ modelInstance,
9649
+ breakpointIndex
9650
+ );
9619
9651
  const telemetryEnabled = this.loadedConfig?.telemetry?.enabled !== false;
9620
9652
  const result = await streamText({
9621
9653
  model: modelInstance,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@poncho-ai/harness",
3
- "version": "0.37.0",
3
+ "version": "0.37.2",
4
4
  "description": "Agent execution runtime - conversation loop, tool dispatch, streaming",
5
5
  "repository": {
6
6
  "type": "git",
@@ -106,30 +106,26 @@ async function readJsonSafe(filePath) {
106
106
  }
107
107
  }
108
108
 
109
- async function findAgentDir(workingDir) {
109
+ async function findAgentDirs(workingDir) {
110
110
  const ponchoDir = resolve(workingDir, ".poncho");
111
+ const results = [];
111
112
  try {
112
113
  const entries = await readdir(ponchoDir, { withFileTypes: true });
113
114
  for (const e of entries) {
114
115
  if (e.isDirectory() && !e.name.startsWith(".")) {
115
- return { dir: resolve(ponchoDir, e.name), id: e.name };
116
+ results.push({ dir: resolve(ponchoDir, e.name), id: e.name });
116
117
  }
117
118
  }
118
119
  } catch { /* no .poncho dir */ }
119
- return undefined;
120
+ return results;
120
121
  }
121
122
 
122
123
  // ---------------------------------------------------------------------------
123
124
  // Read from local
124
125
  // ---------------------------------------------------------------------------
125
126
 
126
- async function readLocal(workingDir) {
127
- const agent = await findAgentDir(workingDir);
128
- if (!agent) {
129
- console.error("No .poncho agent directory found in", workingDir);
130
- process.exit(1);
131
- }
132
- console.log(`Found local agent: ${agent.id} at ${agent.dir}`);
127
+ async function readLocalAgent(agent) {
128
+ console.log(` Reading agent: ${agent.id} at ${agent.dir}`);
133
129
 
134
130
  const data = { agentId: agent.id, conversations: [], memories: [], todos: [], reminders: [] };
135
131
 
@@ -189,6 +185,29 @@ async function readLocal(workingDir) {
189
185
  return data;
190
186
  }
191
187
 
188
+ async function readLocal(workingDir) {
189
+ const agents = await findAgentDirs(workingDir);
190
+ if (agents.length === 0) {
191
+ console.error("No .poncho agent directory found in", workingDir);
192
+ process.exit(1);
193
+ }
194
+
195
+ // If --agent-id is specified, filter to that agent
196
+ const filtered = AGENT_ID ? agents.filter((a) => a.id === AGENT_ID) : agents;
197
+ if (filtered.length === 0) {
198
+ console.error(`Agent "${AGENT_ID}" not found. Available agents: ${agents.map((a) => a.id).join(", ")}`);
199
+ process.exit(1);
200
+ }
201
+
202
+ console.log(`Found ${filtered.length} agent(s) to migrate`);
203
+
204
+ const results = [];
205
+ for (const agent of filtered) {
206
+ results.push(await readLocalAgent(agent));
207
+ }
208
+ return results;
209
+ }
210
+
192
211
  // ---------------------------------------------------------------------------
193
212
  // Read from Upstash
194
213
  // ---------------------------------------------------------------------------
@@ -323,9 +342,12 @@ async function readUpstash(agentId) {
323
342
  async function readFromEngine(sourceProvider, agentId) {
324
343
  if (!agentId) {
325
344
  // Try to detect from .poncho directory
326
- const agent = await findAgentDir(WORKING_DIR);
327
- if (agent) agentId = agent.id;
328
- else {
345
+ const agents = await findAgentDirs(WORKING_DIR);
346
+ if (agents.length === 1) agentId = agents[0].id;
347
+ else if (agents.length > 1) {
348
+ console.error(`Multiple agents found: ${agents.map((a) => a.id).join(", ")}. Use --agent-id to specify one.`);
349
+ process.exit(1);
350
+ } else {
329
351
  console.error("--agent-id is required for engine source (or run from a project with .poncho/)");
330
352
  process.exit(1);
331
353
  }
@@ -513,40 +535,43 @@ async function main() {
513
535
  console.log(`Working dir: ${WORKING_DIR}`);
514
536
  if (DRY_RUN) console.log("(dry run — no data will be written)\n");
515
537
 
516
- // Read source
517
- let data;
538
+ // Read source — local returns an array (one per agent), others return a single object
539
+ let dataList;
518
540
  if (SOURCE === "local") {
519
- data = await readLocal(WORKING_DIR);
541
+ dataList = await readLocal(WORKING_DIR);
520
542
  } else if (SOURCE === "upstash") {
521
- data = await readUpstash(AGENT_ID);
543
+ dataList = [await readUpstash(AGENT_ID)];
522
544
  } else if (SOURCE === "sqlite" || SOURCE === "postgresql") {
523
- data = await readFromEngine(SOURCE, AGENT_ID);
545
+ dataList = [await readFromEngine(SOURCE, AGENT_ID)];
524
546
  } else {
525
547
  console.error(`Unknown source: ${SOURCE}. Use "local", "upstash", "sqlite", or "postgresql".`);
526
548
  process.exit(1);
527
549
  }
528
550
 
529
- console.log(`\nRead from ${SOURCE}:`);
530
- console.log(` Conversations: ${data.conversations.length}`);
531
- console.log(` Memories: ${data.memories?.length ?? 0}`);
532
- console.log(` Todo lists: ${data.todos.length}`);
533
- console.log(` Reminders: ${data.reminders.length}`);
534
- if (data.vfsFiles?.length) console.log(` VFS files: ${data.vfsFiles.length}`);
551
+ for (const data of dataList) {
552
+ console.log(`\nAgent: ${data.agentId}`);
553
+ console.log(` Read from ${SOURCE}:`);
554
+ console.log(` Conversations: ${data.conversations.length}`);
555
+ console.log(` Memories: ${data.memories?.length ?? 0}`);
556
+ console.log(` Todo lists: ${data.todos.length}`);
557
+ console.log(` Reminders: ${data.reminders.length}`);
558
+ if (data.vfsFiles?.length) console.log(` VFS files: ${data.vfsFiles.length}`);
559
+
560
+ if (data.conversations.length === 0 && !data.memories?.length && data.todos.length === 0 && data.reminders.length === 0 && !data.vfsFiles?.length) {
561
+ console.log(" Nothing to migrate for this agent.");
562
+ continue;
563
+ }
535
564
 
536
- if (data.conversations.length === 0 && !data.memories?.length && data.todos.length === 0 && data.reminders.length === 0 && !data.vfsFiles?.length) {
537
- console.log("\nNothing to migrate.");
538
- process.exit(0);
539
- }
565
+ const result = await writeToEngine(data);
540
566
 
541
- // Write to target
542
- const result = await writeToEngine(data);
567
+ console.log(` ${DRY_RUN ? "Would import" : "Imported"} to ${TARGET}:`);
568
+ console.log(` Conversations: ${result.convCount}`);
569
+ console.log(` Memories: ${result.memoryCount}`);
570
+ console.log(` Todos: ${result.todoCount}`);
571
+ console.log(` Reminders: ${result.reminderCount}`);
572
+ if (result.vfsCount) console.log(` VFS files: ${result.vfsCount}`);
573
+ }
543
574
 
544
- console.log(`\n${DRY_RUN ? "Would import" : "Imported"} to ${TARGET}:`);
545
- console.log(` Conversations: ${result.convCount}`);
546
- console.log(` Memories: ${result.memoryCount}`);
547
- console.log(` Todos: ${result.todoCount}`);
548
- console.log(` Reminders: ${result.reminderCount}`);
549
- if (result.vfsCount) console.log(` VFS files: ${result.vfsCount}`);
550
575
  console.log("\nDone!");
551
576
  }
552
577
 
package/src/harness.ts CHANGED
@@ -333,6 +333,39 @@ const hasUntruncatedToolResults = (messages: Message[]): boolean => {
333
333
  return false;
334
334
  };
335
335
 
336
+ /**
337
+ * Finds the last ModelMessage index that's safe to place a prompt cache
338
+ * breakpoint at — i.e. the last index before any untruncated tool-result.
339
+ *
340
+ * Untruncated tool-results from a prior run will be truncated on the next
341
+ * run, which would invalidate any cache write covering them. Placing the
342
+ * breakpoint just before them lets us cache only the stable prefix (system
343
+ * prompt + earlier turns) while still reading it back next turn.
344
+ *
345
+ * Returns `messages.length - 1` when there are no untruncated tool-results
346
+ * (normal tail-of-history caching).
347
+ */
348
+ const findLastStableCacheIndex = (messages: ModelMessage[]): number => {
349
+ for (let i = 0; i < messages.length; i += 1) {
350
+ const msg = messages[i]!;
351
+ if (msg.role !== "tool") continue;
352
+ if (!Array.isArray(msg.content)) continue;
353
+ for (const part of msg.content) {
354
+ if (!part || typeof part !== "object") continue;
355
+ const p = part as { type?: string; output?: { type?: string; value?: unknown } };
356
+ if (p.type !== "tool-result" || !p.output) continue;
357
+ // JSON outputs bypass truncation (only text content is truncated).
358
+ if (p.output.type === "json") return i - 1;
359
+ if (p.output.type === "text" && typeof p.output.value === "string") {
360
+ if (!p.output.value.startsWith(TOOL_RESULT_TRUNCATED_PREFIX)) {
361
+ return i - 1;
362
+ }
363
+ }
364
+ }
365
+ }
366
+ return messages.length - 1;
367
+ };
368
+
336
369
  const DEVELOPMENT_MODE_CONTEXT = `## Development Mode Context
337
370
 
338
371
  You are running locally in development mode. Treat this as an editable agent workspace.
@@ -1799,16 +1832,15 @@ export class AgentHarness {
1799
1832
  );
1800
1833
  }
1801
1834
  const hasFullToolResults = hasUntruncatedToolResults(messages);
1802
- const enablePromptCache = !hasFullToolResults;
1803
- if (!enablePromptCache) {
1835
+ if (hasFullToolResults) {
1804
1836
  console.info(
1805
- `[poncho][cost] Prompt cache write disabled for run "${runId}" ` +
1806
- `(untruncated tool results present in history).`,
1837
+ `[poncho][cost] Prompt cache breakpoint will be placed before untruncated ` +
1838
+ `tool results for run "${runId}" (stable prefix only).`,
1807
1839
  );
1808
1840
  } else {
1809
1841
  console.info(
1810
- `[poncho][cost] Prompt cache write enabled for run "${runId}" ` +
1811
- `(history has no untruncated tool results).`,
1842
+ `[poncho][cost] Prompt cache breakpoint will be placed at history tail ` +
1843
+ `for run "${runId}" (no untruncated tool results).`,
1812
1844
  );
1813
1845
  }
1814
1846
  const inputMessageCount = messages.length;
@@ -1917,8 +1949,17 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
1917
1949
  const promptWithSkills = this.skillContextWindow
1918
1950
  ? `${agentPrompt}${developmentContext}\n\n${this.skillContextWindow}${browserContext}${fsContext}${isolateContext}`
1919
1951
  : `${agentPrompt}${developmentContext}${browserContext}${fsContext}${isolateContext}`;
1952
+ // Quantize to the hour so the system prompt is stable across runs
1953
+ // within the same hour. Including a per-millisecond timestamp would
1954
+ // invalidate the prompt cache on every run, since the system prompt
1955
+ // is the first block the cache tries to match.
1956
+ const hourlyTime = (() => {
1957
+ const d = new Date();
1958
+ d.setUTCMinutes(0, 0, 0);
1959
+ return d.toISOString();
1960
+ })();
1920
1961
  const timeContext = this.reminderStore
1921
- ? `\n\nCurrent UTC time: ${new Date().toISOString()}`
1962
+ ? `\n\nCurrent UTC time (hour precision): ${hourlyTime}`
1922
1963
  : "";
1923
1964
  return `${promptWithSkills}${memoryContext}${todoContext}${timeContext}`;
1924
1965
  };
@@ -2452,9 +2493,17 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
2452
2493
 
2453
2494
  const temperature = agent.frontmatter.model?.temperature ?? 0.2;
2454
2495
  const maxTokens = agent.frontmatter.model?.maxTokens;
2455
- const cachedMessages = enablePromptCache
2456
- ? addPromptCacheBreakpoints(coreMessages, modelInstance)
2457
- : coreMessages;
2496
+ // Place the breakpoint before any untruncated tool-result so we
2497
+ // cache only the stable prefix when prior-run tool results are
2498
+ // still full-fidelity. Otherwise cache at the history tail.
2499
+ const breakpointIndex = hasFullToolResults
2500
+ ? findLastStableCacheIndex(coreMessages)
2501
+ : coreMessages.length - 1;
2502
+ const cachedMessages = addPromptCacheBreakpoints(
2503
+ coreMessages,
2504
+ modelInstance,
2505
+ breakpointIndex,
2506
+ );
2458
2507
 
2459
2508
  const telemetryEnabled = this.loadedConfig?.telemetry?.enabled !== false;
2460
2509
 
@@ -17,23 +17,32 @@ function isAnthropicModel(model: LanguageModel): boolean {
17
17
  * explicit opt-in (Anthropic). For providers with automatic caching
18
18
  * (OpenAI), messages are returned unchanged.
19
19
  *
20
- * For Anthropic, marks the last message with ephemeral cache control so the
21
- * conversation prefix is incrementally cached across steps.
20
+ * For Anthropic, marks the target message with ephemeral cache control so
21
+ * the conversation prefix is incrementally cached across steps. When
22
+ * `targetIndex` is omitted, the last message is used (default behavior).
23
+ * Callers that want to cache only a stable prefix (e.g. skipping tool
24
+ * results that will be truncated next turn) can pass an earlier index.
22
25
  */
23
26
  export function addPromptCacheBreakpoints(
24
27
  messages: ModelMessage[],
25
28
  model: LanguageModel,
29
+ targetIndex?: number,
26
30
  ): ModelMessage[] {
27
31
  if (messages.length === 0 || !isAnthropicModel(model)) {
28
32
  return messages;
29
33
  }
30
34
 
35
+ const index = targetIndex ?? messages.length - 1;
36
+ if (index < 0 || index >= messages.length) {
37
+ return messages;
38
+ }
39
+
31
40
  const cacheDirective = {
32
41
  anthropic: { cacheControl: { type: "ephemeral" as const } },
33
42
  };
34
43
 
35
- return messages.map((message, index) => {
36
- if (index === messages.length - 1) {
44
+ return messages.map((message, i) => {
45
+ if (i === index) {
37
46
  return {
38
47
  ...message,
39
48
  providerOptions: {
@@ -617,7 +617,7 @@ description: Safe skill
617
617
  script: "../outside.ts",
618
618
  }, stubContext);
619
619
  expect(result).toMatchObject({
620
- error: expect.stringContaining("must be relative and within the allowed directory"),
620
+ error: expect.stringContaining("Expected a relative path"),
621
621
  });
622
622
  });
623
623