@poncho-ai/harness 0.37.1 → 0.37.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +4 -4
- package/CHANGELOG.md +12 -0
- package/dist/index.js +41 -9
- package/package.json +1 -1
- package/src/harness.ts +59 -10
- package/src/prompt-cache.ts +13 -4
- package/test/harness.test.ts +1 -1
package/.turbo/turbo-build.log
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
|
|
2
|
-
> @poncho-ai/harness@0.37.
|
|
2
|
+
> @poncho-ai/harness@0.37.2 build /home/runner/work/poncho-ai/poncho-ai/packages/harness
|
|
3
3
|
> node scripts/embed-docs.js && tsup src/index.ts --format esm --dts
|
|
4
4
|
|
|
5
5
|
[embed-docs] Generated poncho-docs.ts with 4 topics
|
|
@@ -8,9 +8,9 @@
|
|
|
8
8
|
[34mCLI[39m tsup v8.5.1
|
|
9
9
|
[34mCLI[39m Target: es2022
|
|
10
10
|
[34mESM[39m Build start
|
|
11
|
-
[32mESM[39m [1mdist/index.js [22m[
|
|
11
|
+
[32mESM[39m [1mdist/index.js [22m[32m390.92 KB[39m
|
|
12
12
|
[32mESM[39m [1mdist/isolate-TCWTUVG4.js [22m[32m47.34 KB[39m
|
|
13
|
-
[32mESM[39m ⚡️ Build success in
|
|
13
|
+
[32mESM[39m ⚡️ Build success in 247ms
|
|
14
14
|
[34mDTS[39m Build start
|
|
15
|
-
[32mDTS[39m ⚡️ Build success in
|
|
15
|
+
[32mDTS[39m ⚡️ Build success in 7644ms
|
|
16
16
|
[32mDTS[39m [1mdist/index.d.ts [22m[32m56.62 KB[39m
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,17 @@
|
|
|
1
1
|
# @poncho-ai/harness
|
|
2
2
|
|
|
3
|
+
## 0.37.2
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- [`2229f74`](https://github.com/cesr/poncho-ai/commit/2229f74ae4d02c5618c60787a7db925060cc1313) Thanks [@cesr](https://github.com/cesr)! - fix: stop invalidating the prompt cache across runs and preserve cache reads when tool results are in flight.
|
|
8
|
+
|
|
9
|
+
Two issues were degrading prompt-cache hit rates to ~0 between turns:
|
|
10
|
+
1. The system prompt embedded `new Date().toISOString()` (millisecond precision) on every run when a reminder store was active, which changed the very first block of the prefix and prevented any cross-run cache match. The timestamp is now quantized to the hour, which keeps the system prompt stable across runs while still giving the agent a usable sense of time.
|
|
11
|
+
2. When the message history contained untruncated tool results from the previous run, prompt caching was disabled entirely — no `cache_control` breakpoint was emitted, which also killed cache _reads_ of the stable prefix (system prompt + earlier turns). The breakpoint is now placed immediately before the first untruncated tool result instead, so the stable prefix is still cached and read while the soon-to-be-truncated tail stays out of the cache.
|
|
12
|
+
|
|
13
|
+
`addPromptCacheBreakpoints` now takes an optional `targetIndex` to support this.
|
|
14
|
+
|
|
3
15
|
## 0.37.1
|
|
4
16
|
|
|
5
17
|
### Patch Changes
|
package/dist/index.js
CHANGED
|
@@ -6659,15 +6659,19 @@ function isAnthropicModel(model) {
|
|
|
6659
6659
|
}
|
|
6660
6660
|
return model.provider === "anthropic" || model.provider.includes("anthropic") || model.modelId.includes("anthropic") || model.modelId.includes("claude");
|
|
6661
6661
|
}
|
|
6662
|
-
function addPromptCacheBreakpoints(messages, model) {
|
|
6662
|
+
function addPromptCacheBreakpoints(messages, model, targetIndex) {
|
|
6663
6663
|
if (messages.length === 0 || !isAnthropicModel(model)) {
|
|
6664
6664
|
return messages;
|
|
6665
6665
|
}
|
|
6666
|
+
const index = targetIndex ?? messages.length - 1;
|
|
6667
|
+
if (index < 0 || index >= messages.length) {
|
|
6668
|
+
return messages;
|
|
6669
|
+
}
|
|
6666
6670
|
const cacheDirective = {
|
|
6667
6671
|
anthropic: { cacheControl: { type: "ephemeral" } }
|
|
6668
6672
|
};
|
|
6669
|
-
return messages.map((message,
|
|
6670
|
-
if (
|
|
6673
|
+
return messages.map((message, i) => {
|
|
6674
|
+
if (i === index) {
|
|
6671
6675
|
return {
|
|
6672
6676
|
...message,
|
|
6673
6677
|
providerOptions: {
|
|
@@ -7800,6 +7804,25 @@ var hasUntruncatedToolResults = (messages) => {
|
|
|
7800
7804
|
}
|
|
7801
7805
|
return false;
|
|
7802
7806
|
};
|
|
7807
|
+
var findLastStableCacheIndex = (messages) => {
|
|
7808
|
+
for (let i = 0; i < messages.length; i += 1) {
|
|
7809
|
+
const msg = messages[i];
|
|
7810
|
+
if (msg.role !== "tool") continue;
|
|
7811
|
+
if (!Array.isArray(msg.content)) continue;
|
|
7812
|
+
for (const part of msg.content) {
|
|
7813
|
+
if (!part || typeof part !== "object") continue;
|
|
7814
|
+
const p = part;
|
|
7815
|
+
if (p.type !== "tool-result" || !p.output) continue;
|
|
7816
|
+
if (p.output.type === "json") return i - 1;
|
|
7817
|
+
if (p.output.type === "text" && typeof p.output.value === "string") {
|
|
7818
|
+
if (!p.output.value.startsWith(TOOL_RESULT_TRUNCATED_PREFIX)) {
|
|
7819
|
+
return i - 1;
|
|
7820
|
+
}
|
|
7821
|
+
}
|
|
7822
|
+
}
|
|
7823
|
+
}
|
|
7824
|
+
return messages.length - 1;
|
|
7825
|
+
};
|
|
7803
7826
|
var DEVELOPMENT_MODE_CONTEXT = `## Development Mode Context
|
|
7804
7827
|
|
|
7805
7828
|
You are running locally in development mode. Treat this as an editable agent workspace.
|
|
@@ -9072,14 +9095,13 @@ var AgentHarness = class _AgentHarness {
|
|
|
9072
9095
|
);
|
|
9073
9096
|
}
|
|
9074
9097
|
const hasFullToolResults = hasUntruncatedToolResults(messages);
|
|
9075
|
-
|
|
9076
|
-
if (!enablePromptCache) {
|
|
9098
|
+
if (hasFullToolResults) {
|
|
9077
9099
|
console.info(
|
|
9078
|
-
`[poncho][cost] Prompt cache
|
|
9100
|
+
`[poncho][cost] Prompt cache breakpoint will be placed before untruncated tool results for run "${runId}" (stable prefix only).`
|
|
9079
9101
|
);
|
|
9080
9102
|
} else {
|
|
9081
9103
|
console.info(
|
|
9082
|
-
`[poncho][cost] Prompt cache
|
|
9104
|
+
`[poncho][cost] Prompt cache breakpoint will be placed at history tail for run "${runId}" (no untruncated tool results).`
|
|
9083
9105
|
);
|
|
9084
9106
|
}
|
|
9085
9107
|
const inputMessageCount = messages.length;
|
|
@@ -9174,9 +9196,14 @@ Code is wrapped in an async IIFE \u2014 use \`return\` to return a value to the
|
|
|
9174
9196
|
const promptWithSkills = this.skillContextWindow ? `${agentPrompt}${developmentContext}
|
|
9175
9197
|
|
|
9176
9198
|
${this.skillContextWindow}${browserContext}${fsContext}${isolateContext}` : `${agentPrompt}${developmentContext}${browserContext}${fsContext}${isolateContext}`;
|
|
9199
|
+
const hourlyTime = (() => {
|
|
9200
|
+
const d = /* @__PURE__ */ new Date();
|
|
9201
|
+
d.setUTCMinutes(0, 0, 0);
|
|
9202
|
+
return d.toISOString();
|
|
9203
|
+
})();
|
|
9177
9204
|
const timeContext = this.reminderStore ? `
|
|
9178
9205
|
|
|
9179
|
-
Current UTC time: ${
|
|
9206
|
+
Current UTC time (hour precision): ${hourlyTime}` : "";
|
|
9180
9207
|
return `${promptWithSkills}${memoryContext}${todoContext}${timeContext}`;
|
|
9181
9208
|
};
|
|
9182
9209
|
let systemPrompt = buildSystemPrompt();
|
|
@@ -9615,7 +9642,12 @@ ${textContent}` };
|
|
|
9615
9642
|
const coreMessages = cachedCoreMessages;
|
|
9616
9643
|
const temperature = agent.frontmatter.model?.temperature ?? 0.2;
|
|
9617
9644
|
const maxTokens = agent.frontmatter.model?.maxTokens;
|
|
9618
|
-
const
|
|
9645
|
+
const breakpointIndex = hasFullToolResults ? findLastStableCacheIndex(coreMessages) : coreMessages.length - 1;
|
|
9646
|
+
const cachedMessages = addPromptCacheBreakpoints(
|
|
9647
|
+
coreMessages,
|
|
9648
|
+
modelInstance,
|
|
9649
|
+
breakpointIndex
|
|
9650
|
+
);
|
|
9619
9651
|
const telemetryEnabled = this.loadedConfig?.telemetry?.enabled !== false;
|
|
9620
9652
|
const result = await streamText({
|
|
9621
9653
|
model: modelInstance,
|
package/package.json
CHANGED
package/src/harness.ts
CHANGED
|
@@ -333,6 +333,39 @@ const hasUntruncatedToolResults = (messages: Message[]): boolean => {
|
|
|
333
333
|
return false;
|
|
334
334
|
};
|
|
335
335
|
|
|
336
|
+
/**
|
|
337
|
+
* Finds the last ModelMessage index that's safe to place a prompt cache
|
|
338
|
+
* breakpoint at — i.e. the last index before any untruncated tool-result.
|
|
339
|
+
*
|
|
340
|
+
* Untruncated tool-results from a prior run will be truncated on the next
|
|
341
|
+
* run, which would invalidate any cache write covering them. Placing the
|
|
342
|
+
* breakpoint just before them lets us cache only the stable prefix (system
|
|
343
|
+
* prompt + earlier turns) while still reading it back next turn.
|
|
344
|
+
*
|
|
345
|
+
* Returns `messages.length - 1` when there are no untruncated tool-results
|
|
346
|
+
* (normal tail-of-history caching).
|
|
347
|
+
*/
|
|
348
|
+
const findLastStableCacheIndex = (messages: ModelMessage[]): number => {
|
|
349
|
+
for (let i = 0; i < messages.length; i += 1) {
|
|
350
|
+
const msg = messages[i]!;
|
|
351
|
+
if (msg.role !== "tool") continue;
|
|
352
|
+
if (!Array.isArray(msg.content)) continue;
|
|
353
|
+
for (const part of msg.content) {
|
|
354
|
+
if (!part || typeof part !== "object") continue;
|
|
355
|
+
const p = part as { type?: string; output?: { type?: string; value?: unknown } };
|
|
356
|
+
if (p.type !== "tool-result" || !p.output) continue;
|
|
357
|
+
// JSON outputs bypass truncation (only text content is truncated).
|
|
358
|
+
if (p.output.type === "json") return i - 1;
|
|
359
|
+
if (p.output.type === "text" && typeof p.output.value === "string") {
|
|
360
|
+
if (!p.output.value.startsWith(TOOL_RESULT_TRUNCATED_PREFIX)) {
|
|
361
|
+
return i - 1;
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
return messages.length - 1;
|
|
367
|
+
};
|
|
368
|
+
|
|
336
369
|
const DEVELOPMENT_MODE_CONTEXT = `## Development Mode Context
|
|
337
370
|
|
|
338
371
|
You are running locally in development mode. Treat this as an editable agent workspace.
|
|
@@ -1799,16 +1832,15 @@ export class AgentHarness {
|
|
|
1799
1832
|
);
|
|
1800
1833
|
}
|
|
1801
1834
|
const hasFullToolResults = hasUntruncatedToolResults(messages);
|
|
1802
|
-
|
|
1803
|
-
if (!enablePromptCache) {
|
|
1835
|
+
if (hasFullToolResults) {
|
|
1804
1836
|
console.info(
|
|
1805
|
-
`[poncho][cost] Prompt cache
|
|
1806
|
-
`
|
|
1837
|
+
`[poncho][cost] Prompt cache breakpoint will be placed before untruncated ` +
|
|
1838
|
+
`tool results for run "${runId}" (stable prefix only).`,
|
|
1807
1839
|
);
|
|
1808
1840
|
} else {
|
|
1809
1841
|
console.info(
|
|
1810
|
-
`[poncho][cost] Prompt cache
|
|
1811
|
-
`
|
|
1842
|
+
`[poncho][cost] Prompt cache breakpoint will be placed at history tail ` +
|
|
1843
|
+
`for run "${runId}" (no untruncated tool results).`,
|
|
1812
1844
|
);
|
|
1813
1845
|
}
|
|
1814
1846
|
const inputMessageCount = messages.length;
|
|
@@ -1917,8 +1949,17 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
|
|
|
1917
1949
|
const promptWithSkills = this.skillContextWindow
|
|
1918
1950
|
? `${agentPrompt}${developmentContext}\n\n${this.skillContextWindow}${browserContext}${fsContext}${isolateContext}`
|
|
1919
1951
|
: `${agentPrompt}${developmentContext}${browserContext}${fsContext}${isolateContext}`;
|
|
1952
|
+
// Quantize to the hour so the system prompt is stable across runs
|
|
1953
|
+
// within the same hour. Including a per-millisecond timestamp would
|
|
1954
|
+
// invalidate the prompt cache on every run, since the system prompt
|
|
1955
|
+
// is the first block the cache tries to match.
|
|
1956
|
+
const hourlyTime = (() => {
|
|
1957
|
+
const d = new Date();
|
|
1958
|
+
d.setUTCMinutes(0, 0, 0);
|
|
1959
|
+
return d.toISOString();
|
|
1960
|
+
})();
|
|
1920
1961
|
const timeContext = this.reminderStore
|
|
1921
|
-
? `\n\nCurrent UTC time: ${
|
|
1962
|
+
? `\n\nCurrent UTC time (hour precision): ${hourlyTime}`
|
|
1922
1963
|
: "";
|
|
1923
1964
|
return `${promptWithSkills}${memoryContext}${todoContext}${timeContext}`;
|
|
1924
1965
|
};
|
|
@@ -2452,9 +2493,17 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
|
|
|
2452
2493
|
|
|
2453
2494
|
const temperature = agent.frontmatter.model?.temperature ?? 0.2;
|
|
2454
2495
|
const maxTokens = agent.frontmatter.model?.maxTokens;
|
|
2455
|
-
|
|
2456
|
-
|
|
2457
|
-
|
|
2496
|
+
// Place the breakpoint before any untruncated tool-result so we
|
|
2497
|
+
// cache only the stable prefix when prior-run tool results are
|
|
2498
|
+
// still full-fidelity. Otherwise cache at the history tail.
|
|
2499
|
+
const breakpointIndex = hasFullToolResults
|
|
2500
|
+
? findLastStableCacheIndex(coreMessages)
|
|
2501
|
+
: coreMessages.length - 1;
|
|
2502
|
+
const cachedMessages = addPromptCacheBreakpoints(
|
|
2503
|
+
coreMessages,
|
|
2504
|
+
modelInstance,
|
|
2505
|
+
breakpointIndex,
|
|
2506
|
+
);
|
|
2458
2507
|
|
|
2459
2508
|
const telemetryEnabled = this.loadedConfig?.telemetry?.enabled !== false;
|
|
2460
2509
|
|
package/src/prompt-cache.ts
CHANGED
|
@@ -17,23 +17,32 @@ function isAnthropicModel(model: LanguageModel): boolean {
|
|
|
17
17
|
* explicit opt-in (Anthropic). For providers with automatic caching
|
|
18
18
|
* (OpenAI), messages are returned unchanged.
|
|
19
19
|
*
|
|
20
|
-
* For Anthropic, marks the
|
|
21
|
-
* conversation prefix is incrementally cached across steps.
|
|
20
|
+
* For Anthropic, marks the target message with ephemeral cache control so
|
|
21
|
+
* the conversation prefix is incrementally cached across steps. When
|
|
22
|
+
* `targetIndex` is omitted, the last message is used (default behavior).
|
|
23
|
+
* Callers that want to cache only a stable prefix (e.g. skipping tool
|
|
24
|
+
* results that will be truncated next turn) can pass an earlier index.
|
|
22
25
|
*/
|
|
23
26
|
export function addPromptCacheBreakpoints(
|
|
24
27
|
messages: ModelMessage[],
|
|
25
28
|
model: LanguageModel,
|
|
29
|
+
targetIndex?: number,
|
|
26
30
|
): ModelMessage[] {
|
|
27
31
|
if (messages.length === 0 || !isAnthropicModel(model)) {
|
|
28
32
|
return messages;
|
|
29
33
|
}
|
|
30
34
|
|
|
35
|
+
const index = targetIndex ?? messages.length - 1;
|
|
36
|
+
if (index < 0 || index >= messages.length) {
|
|
37
|
+
return messages;
|
|
38
|
+
}
|
|
39
|
+
|
|
31
40
|
const cacheDirective = {
|
|
32
41
|
anthropic: { cacheControl: { type: "ephemeral" as const } },
|
|
33
42
|
};
|
|
34
43
|
|
|
35
|
-
return messages.map((message,
|
|
36
|
-
if (
|
|
44
|
+
return messages.map((message, i) => {
|
|
45
|
+
if (i === index) {
|
|
37
46
|
return {
|
|
38
47
|
...message,
|
|
39
48
|
providerOptions: {
|
package/test/harness.test.ts
CHANGED
|
@@ -617,7 +617,7 @@ description: Safe skill
|
|
|
617
617
|
script: "../outside.ts",
|
|
618
618
|
}, stubContext);
|
|
619
619
|
expect(result).toMatchObject({
|
|
620
|
-
error: expect.stringContaining("
|
|
620
|
+
error: expect.stringContaining("Expected a relative path"),
|
|
621
621
|
});
|
|
622
622
|
});
|
|
623
623
|
|