@poncho-ai/harness 0.45.0 → 0.47.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +5 -5
- package/CHANGELOG.md +63 -0
- package/dist/index.d.ts +6 -0
- package/dist/index.js +35 -18
- package/package.json +2 -2
- package/src/harness.ts +78 -28
- package/src/orchestrator/run-conversation-turn.ts +7 -0
- package/src/prompt-cache.ts +1 -1
package/.turbo/turbo-build.log
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
|
|
2
|
-
> @poncho-ai/harness@0.
|
|
2
|
+
> @poncho-ai/harness@0.47.0 build /home/runner/work/poncho-ai/poncho-ai/packages/harness
|
|
3
3
|
> node scripts/embed-docs.js && tsup src/index.ts --format esm --dts
|
|
4
4
|
|
|
5
5
|
[embed-docs] Generated poncho-docs.ts with 4 topics
|
|
@@ -9,8 +9,8 @@
|
|
|
9
9
|
[34mCLI[39m Target: es2022
|
|
10
10
|
[34mESM[39m Build start
|
|
11
11
|
[32mESM[39m [1mdist/isolate-VY35DGLM.js [22m[32m49.43 KB[39m
|
|
12
|
-
[32mESM[39m [1mdist/index.js [22m[
|
|
13
|
-
[32mESM[39m ⚡️ Build success in
|
|
12
|
+
[32mESM[39m [1mdist/index.js [22m[32m525.35 KB[39m
|
|
13
|
+
[32mESM[39m ⚡️ Build success in 249ms
|
|
14
14
|
[34mDTS[39m Build start
|
|
15
|
-
[32mDTS[39m ⚡️ Build success in
|
|
16
|
-
[32mDTS[39m [1mdist/index.d.ts [22m[32m85.
|
|
15
|
+
[32mDTS[39m ⚡️ Build success in 7482ms
|
|
16
|
+
[32mDTS[39m [1mdist/index.d.ts [22m[32m85.30 KB[39m
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,68 @@
|
|
|
1
1
|
# @poncho-ai/harness
|
|
2
2
|
|
|
3
|
+
## 0.47.0
|
|
4
|
+
|
|
5
|
+
### Minor Changes
|
|
6
|
+
|
|
7
|
+
- [#120](https://github.com/cesr/poncho-ai/pull/120) [`6cda4ab`](https://github.com/cesr/poncho-ai/commit/6cda4ab39865d89590f42927e281c5fb58cc99f4) Thanks [@cesr](https://github.com/cesr)! - harness: always inject the current hour into the system prompt
|
|
8
|
+
|
|
9
|
+
The dynamic system-prompt builder now emits
|
|
10
|
+
`Current UTC time (hour precision): Mon 2026-05-20T09Z` on every run,
|
|
11
|
+
not just when a `reminderStore` is configured. Knowing "what day is it"
|
|
12
|
+
is universally useful — drafting messages, computing relative dates,
|
|
13
|
+
deciding whether a stale memory still applies — and isn't specific to
|
|
14
|
+
reminder-firing logic.
|
|
15
|
+
|
|
16
|
+
Format also drops the zeroed-out minutes/seconds tail (`T09:00:00.000Z`
|
|
17
|
+
→ `T09Z`) so the hour quantization is visible to the model rather than
|
|
18
|
+
hidden behind noise. The prompt-cache properties are unchanged: the
|
|
19
|
+
string is still hour-stable and lives in the dynamic prompt section, so
|
|
20
|
+
hourly rollovers don't bust the static cache breakpoint.
|
|
21
|
+
|
|
22
|
+
## 0.46.0
|
|
23
|
+
|
|
24
|
+
### Minor Changes
|
|
25
|
+
|
|
26
|
+
- [#118](https://github.com/cesr/poncho-ai/pull/118) [`e8df464`](https://github.com/cesr/poncho-ai/commit/e8df4649618cba0b408a6c143f923f0dcb2046c8) Thanks [@cesr](https://github.com/cesr)! - harness: 1h static system-prompt cache breakpoint + per-run cache kill-switch
|
|
27
|
+
|
|
28
|
+
Two related changes to Anthropic prompt caching:
|
|
29
|
+
|
|
30
|
+
**1-hour static system-prompt breakpoint.** The harness now splits the
|
|
31
|
+
assembled system prompt into a static portion (agent body + skill
|
|
32
|
+
context + browser/fs/isolate context — stable across many turns and
|
|
33
|
+
jobs within an hour) and a dynamic tail (memory, todos, time). On
|
|
34
|
+
Anthropic models, these are sent as two `role: "system"` messages with
|
|
35
|
+
`cacheControl: { ttl: "1h" }` on the static block. The existing 5-min
|
|
36
|
+
tail breakpoint on the last user/assistant/tool message is retained.
|
|
37
|
+
|
|
38
|
+
This lets later turns and job runs read ~95% of the system prompt at
|
|
39
|
+
0.1× (cache read) instead of paying 1× whenever the 5-min tail cache
|
|
40
|
+
has expired — the previous setup only cached for 5 minutes via the
|
|
41
|
+
tail breakpoint. Within-user cross-conversation and interactive-vs-job
|
|
42
|
+
all share the static cache.
|
|
43
|
+
|
|
44
|
+
**Per-run cache kill-switch.** Added `RunInput.disablePromptCache?:
|
|
45
|
+
boolean` (also exposed on `RunConversationTurnOpts.disablePromptCache`,
|
|
46
|
+
forwarded into `runInput`). When set, the harness skips the 5-min tail
|
|
47
|
+
breakpoint for that run. The 1-hour static breakpoint is still
|
|
48
|
+
applied — the run still benefits from reading the shared static cache,
|
|
49
|
+
just doesn't write a new tail entry that won't be read before TTL.
|
|
50
|
+
|
|
51
|
+
Intended for one-shot programmatic invocations (cron-fired jobs,
|
|
52
|
+
subagent dispatch) where no follow-up turn is coming within the 5-min
|
|
53
|
+
TTL window, so the 1.25× write surcharge would be pure waste.
|
|
54
|
+
|
|
55
|
+
Non-Anthropic providers fall through to the previous single concatenated
|
|
56
|
+
`system:` string with no cache control — those providers auto-cache.
|
|
57
|
+
|
|
58
|
+
Internal: `isAnthropicModel` is now exported from `prompt-cache.ts`
|
|
59
|
+
for reuse at the streamText site.
|
|
60
|
+
|
|
61
|
+
### Patch Changes
|
|
62
|
+
|
|
63
|
+
- Updated dependencies [[`e8df464`](https://github.com/cesr/poncho-ai/commit/e8df4649618cba0b408a6c143f923f0dcb2046c8)]:
|
|
64
|
+
- @poncho-ai/sdk@1.12.0
|
|
65
|
+
|
|
3
66
|
## 0.45.0
|
|
4
67
|
|
|
5
68
|
### Minor Changes
|
package/dist/index.d.ts
CHANGED
|
@@ -2036,6 +2036,12 @@ interface RunConversationTurnOpts {
|
|
|
2036
2036
|
parameters?: Record<string, unknown>;
|
|
2037
2037
|
abortSignal?: AbortSignal;
|
|
2038
2038
|
tenantId?: string | null;
|
|
2039
|
+
/**
|
|
2040
|
+
* Forwarded to `RunInput.disablePromptCache`. Set true for one-shot
|
|
2041
|
+
* turns with no follow-up coming (cron-fired jobs, etc.) so the
|
|
2042
|
+
* harness skips the Anthropic cache write.
|
|
2043
|
+
*/
|
|
2044
|
+
disablePromptCache?: boolean;
|
|
2039
2045
|
/** Per-event hook — called for every AgentEvent yielded by the run, in order. */
|
|
2040
2046
|
onEvent?: (event: AgentEvent) => void | Promise<void>;
|
|
2041
2047
|
}
|
package/dist/index.js
CHANGED
|
@@ -10129,10 +10129,13 @@ var AgentHarness = class _AgentHarness {
|
|
|
10129
10129
|
);
|
|
10130
10130
|
}
|
|
10131
10131
|
const hasFullToolResults = hasUntruncatedToolResults(messages);
|
|
10132
|
-
|
|
10133
|
-
|
|
10132
|
+
const skipTailCache = input.disablePromptCache === true;
|
|
10133
|
+
if (skipTailCache) {
|
|
10134
|
+
costLog.debug(`tail cache breakpoint skipped \u2014 disablePromptCache (run=${runId.slice(0, 12)})`);
|
|
10135
|
+
} else if (hasFullToolResults) {
|
|
10136
|
+
costLog.debug(`tail cache breakpoint before untruncated tool results (run=${runId.slice(0, 12)})`);
|
|
10134
10137
|
} else {
|
|
10135
|
-
costLog.debug(`cache breakpoint at history tail (run=${runId.slice(0, 12)})`);
|
|
10138
|
+
costLog.debug(`tail cache breakpoint at history tail (run=${runId.slice(0, 12)})`);
|
|
10136
10139
|
}
|
|
10137
10140
|
const inputMessageCount = messages.length;
|
|
10138
10141
|
const events = [];
|
|
@@ -10221,25 +10224,26 @@ ${typeStubs}
|
|
|
10221
10224
|
|
|
10222
10225
|
Code is wrapped in an async IIFE \u2014 use \`return\` to return a value to the tool result.`;
|
|
10223
10226
|
}
|
|
10224
|
-
const
|
|
10227
|
+
const buildSystemPromptParts = async () => {
|
|
10225
10228
|
const agentPrompt = renderCurrentAgentPrompt();
|
|
10226
10229
|
const tenantSkills = await this.getSkillsForTenant(input.tenantId);
|
|
10227
10230
|
const skillContextWindow = buildSkillContextWindow(tenantSkills);
|
|
10228
|
-
const
|
|
10231
|
+
const staticPart = skillContextWindow ? `${agentPrompt}${developmentContext}
|
|
10229
10232
|
|
|
10230
10233
|
${skillContextWindow}${browserContext}${fsContext}${isolateContext}` : `${agentPrompt}${developmentContext}${browserContext}${fsContext}${isolateContext}`;
|
|
10231
10234
|
const hourlyTime = (() => {
|
|
10232
10235
|
const d = /* @__PURE__ */ new Date();
|
|
10233
|
-
d.setUTCMinutes(0, 0, 0);
|
|
10234
10236
|
const weekday = ["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"][d.getUTCDay()];
|
|
10235
|
-
return `${weekday} ${d.toISOString()}`;
|
|
10237
|
+
return `${weekday} ${d.toISOString().slice(0, 13)}Z`;
|
|
10236
10238
|
})();
|
|
10237
|
-
const timeContext =
|
|
10239
|
+
const timeContext = `
|
|
10238
10240
|
|
|
10239
|
-
Current UTC time (hour precision): ${hourlyTime}
|
|
10240
|
-
|
|
10241
|
+
Current UTC time (hour precision): ${hourlyTime}`;
|
|
10242
|
+
const dynamicPart = `${memoryContext}${todoContext}${timeContext}`;
|
|
10243
|
+
return { staticPart, dynamicPart };
|
|
10241
10244
|
};
|
|
10242
|
-
let
|
|
10245
|
+
let { staticPart: staticSystemPart, dynamicPart: dynamicSystemPart } = await buildSystemPromptParts();
|
|
10246
|
+
let systemPrompt = `${staticSystemPart}${dynamicSystemPart}`;
|
|
10243
10247
|
let lastPromptFingerprint = `${this.agentFileFingerprint}
|
|
10244
10248
|
${this.skillFingerprint}`;
|
|
10245
10249
|
const pushEvent = (event) => {
|
|
@@ -10673,17 +10677,28 @@ ${textContent}` };
|
|
|
10673
10677
|
const coreMessages = cachedCoreMessages;
|
|
10674
10678
|
const temperature = agent.frontmatter.model?.temperature ?? 0.2;
|
|
10675
10679
|
const maxTokens = agent.frontmatter.model?.maxTokens;
|
|
10676
|
-
const
|
|
10677
|
-
const cachedMessages = addPromptCacheBreakpoints(
|
|
10680
|
+
const cachedMessages = skipTailCache ? coreMessages : addPromptCacheBreakpoints(
|
|
10678
10681
|
coreMessages,
|
|
10679
10682
|
modelInstance,
|
|
10680
|
-
|
|
10683
|
+
hasFullToolResults ? findLastStableCacheIndex(coreMessages) : coreMessages.length - 1
|
|
10681
10684
|
);
|
|
10685
|
+
const useStaticCache = isAnthropicModel(modelInstance);
|
|
10686
|
+
const finalMessages = useStaticCache ? [
|
|
10687
|
+
{
|
|
10688
|
+
role: "system",
|
|
10689
|
+
content: staticSystemPart,
|
|
10690
|
+
providerOptions: {
|
|
10691
|
+
anthropic: { cacheControl: { type: "ephemeral", ttl: "1h" } }
|
|
10692
|
+
}
|
|
10693
|
+
},
|
|
10694
|
+
...dynamicSystemPart.length > 0 ? [{ role: "system", content: dynamicSystemPart }] : [],
|
|
10695
|
+
...cachedMessages
|
|
10696
|
+
] : cachedMessages;
|
|
10682
10697
|
const telemetryEnabled = this.loadedConfig?.telemetry?.enabled !== false;
|
|
10683
10698
|
const result = await streamText({
|
|
10684
10699
|
model: modelInstance,
|
|
10685
|
-
system: systemPrompt,
|
|
10686
|
-
messages:
|
|
10700
|
+
...useStaticCache ? {} : { system: systemPrompt },
|
|
10701
|
+
messages: finalMessages,
|
|
10687
10702
|
tools,
|
|
10688
10703
|
temperature,
|
|
10689
10704
|
abortSignal: input.abortSignal,
|
|
@@ -11308,7 +11323,8 @@ ${textContent}` };
|
|
|
11308
11323
|
const currentFingerprint = `${this.agentFileFingerprint}
|
|
11309
11324
|
${this.skillFingerprint}`;
|
|
11310
11325
|
if (currentFingerprint !== lastPromptFingerprint) {
|
|
11311
|
-
|
|
11326
|
+
({ staticPart: staticSystemPart, dynamicPart: dynamicSystemPart } = await buildSystemPromptParts());
|
|
11327
|
+
systemPrompt = `${staticSystemPart}${dynamicSystemPart}`;
|
|
11312
11328
|
lastPromptFingerprint = currentFingerprint;
|
|
11313
11329
|
}
|
|
11314
11330
|
}
|
|
@@ -13577,7 +13593,8 @@ var runConversationTurn = async (opts) => {
|
|
|
13577
13593
|
),
|
|
13578
13594
|
messages: harnessMessages,
|
|
13579
13595
|
files: opts.files && opts.files.length > 0 ? opts.files : void 0,
|
|
13580
|
-
abortSignal: opts.abortSignal
|
|
13596
|
+
abortSignal: opts.abortSignal,
|
|
13597
|
+
disablePromptCache: opts.disablePromptCache
|
|
13581
13598
|
},
|
|
13582
13599
|
initialContextTokens: conversation.contextTokens ?? 0,
|
|
13583
13600
|
initialContextWindow: conversation.contextWindow ?? 0,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@poncho-ai/harness",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.47.0",
|
|
4
4
|
"description": "Agent execution runtime - conversation loop, tool dispatch, streaming",
|
|
5
5
|
"repository": {
|
|
6
6
|
"type": "git",
|
|
@@ -34,7 +34,7 @@
|
|
|
34
34
|
"mustache": "^4.2.0",
|
|
35
35
|
"yaml": "^2.4.0",
|
|
36
36
|
"zod": "^3.22.0",
|
|
37
|
-
"@poncho-ai/sdk": "1.
|
|
37
|
+
"@poncho-ai/sdk": "1.12.0"
|
|
38
38
|
},
|
|
39
39
|
"peerDependencies": {
|
|
40
40
|
"esbuild": ">=0.17.0",
|
package/src/harness.ts
CHANGED
|
@@ -59,7 +59,7 @@ import {
|
|
|
59
59
|
mergeSkills,
|
|
60
60
|
} from "./skill-context.js";
|
|
61
61
|
import { generateText, streamText, type ModelMessage } from "ai";
|
|
62
|
-
import { addPromptCacheBreakpoints } from "./prompt-cache.js";
|
|
62
|
+
import { addPromptCacheBreakpoints, isAnthropicModel } from "./prompt-cache.js";
|
|
63
63
|
import { jsonSchemaToZod } from "./schema-converter.js";
|
|
64
64
|
import type { SkillMetadata } from "./skill-context.js";
|
|
65
65
|
import { createSkillTools, normalizeScriptPolicyPath } from "./skill-tools.js";
|
|
@@ -2104,10 +2104,17 @@ export class AgentHarness {
|
|
|
2104
2104
|
);
|
|
2105
2105
|
}
|
|
2106
2106
|
const hasFullToolResults = hasUntruncatedToolResults(messages);
|
|
2107
|
-
|
|
2108
|
-
|
|
2107
|
+
// The 5-min tail breakpoint is skipped only when the caller explicitly
|
|
2108
|
+
// declares no follow-up is coming (jobs, programmatic one-shots). The
|
|
2109
|
+
// 1-hour static breakpoint on the system prompt is always on — it
|
|
2110
|
+
// amortizes across every later turn or job within the hour.
|
|
2111
|
+
const skipTailCache = input.disablePromptCache === true;
|
|
2112
|
+
if (skipTailCache) {
|
|
2113
|
+
costLog.debug(`tail cache breakpoint skipped — disablePromptCache (run=${runId.slice(0, 12)})`);
|
|
2114
|
+
} else if (hasFullToolResults) {
|
|
2115
|
+
costLog.debug(`tail cache breakpoint before untruncated tool results (run=${runId.slice(0, 12)})`);
|
|
2109
2116
|
} else {
|
|
2110
|
-
costLog.debug(`cache breakpoint at history tail (run=${runId.slice(0, 12)})`);
|
|
2117
|
+
costLog.debug(`tail cache breakpoint at history tail (run=${runId.slice(0, 12)})`);
|
|
2111
2118
|
}
|
|
2112
2119
|
const inputMessageCount = messages.length;
|
|
2113
2120
|
const events: AgentEvent[] = [];
|
|
@@ -2210,29 +2217,40 @@ ${typeStubs}
|
|
|
2210
2217
|
Code is wrapped in an async IIFE — use \`return\` to return a value to the tool result.`;
|
|
2211
2218
|
}
|
|
2212
2219
|
|
|
2213
|
-
|
|
2220
|
+
// Split the system prompt into a static portion (stable across turns
|
|
2221
|
+
// and jobs within an hour, modulo MCP connect/skill author/memory edit)
|
|
2222
|
+
// and a dynamic tail (memory, todos, time). The static portion gets a
|
|
2223
|
+
// 1-hour Anthropic cache breakpoint downstream; the tail rides the
|
|
2224
|
+
// existing 5-min message-level breakpoint. See the streamText site for
|
|
2225
|
+
// the breakpoint wiring.
|
|
2226
|
+
const buildSystemPromptParts = async (): Promise<{ staticPart: string; dynamicPart: string }> => {
|
|
2214
2227
|
const agentPrompt = renderCurrentAgentPrompt();
|
|
2215
2228
|
const tenantSkills = await this.getSkillsForTenant(input.tenantId);
|
|
2216
2229
|
const skillContextWindow = buildSkillContextWindow(tenantSkills);
|
|
2217
|
-
const
|
|
2230
|
+
const staticPart = skillContextWindow
|
|
2218
2231
|
? `${agentPrompt}${developmentContext}\n\n${skillContextWindow}${browserContext}${fsContext}${isolateContext}`
|
|
2219
2232
|
: `${agentPrompt}${developmentContext}${browserContext}${fsContext}${isolateContext}`;
|
|
2220
2233
|
// Quantize to the hour so the system prompt is stable across runs
|
|
2221
2234
|
// within the same hour. Including a per-millisecond timestamp would
|
|
2222
2235
|
// invalidate the prompt cache on every run, since the system prompt
|
|
2223
|
-
// is the first block the cache tries to match.
|
|
2236
|
+
// is the first block the cache tries to match. Format is
|
|
2237
|
+
// `Weekday YYYY-MM-DDTHHZ` — minutes/seconds dropped to make the
|
|
2238
|
+
// hour-quantization visible to the model rather than hidden behind
|
|
2239
|
+
// a zeroed-out tail. Always emitted: every agent needs to know
|
|
2240
|
+
// "what day is it" even without reminders configured.
|
|
2224
2241
|
const hourlyTime = (() => {
|
|
2225
2242
|
const d = new Date();
|
|
2226
|
-
d.setUTCMinutes(0, 0, 0);
|
|
2227
2243
|
const weekday = ["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"][d.getUTCDay()];
|
|
2228
|
-
return `${weekday} ${d.toISOString()}`;
|
|
2244
|
+
return `${weekday} ${d.toISOString().slice(0, 13)}Z`;
|
|
2229
2245
|
})();
|
|
2230
|
-
const timeContext =
|
|
2231
|
-
|
|
2232
|
-
|
|
2233
|
-
return `${promptWithSkills}${memoryContext}${todoContext}${timeContext}`;
|
|
2246
|
+
const timeContext = `\n\nCurrent UTC time (hour precision): ${hourlyTime}`;
|
|
2247
|
+
const dynamicPart = `${memoryContext}${todoContext}${timeContext}`;
|
|
2248
|
+
return { staticPart, dynamicPart };
|
|
2234
2249
|
};
|
|
2235
|
-
let
|
|
2250
|
+
let { staticPart: staticSystemPart, dynamicPart: dynamicSystemPart } =
|
|
2251
|
+
await buildSystemPromptParts();
|
|
2252
|
+
// Concatenated form for legacy consumers (token estimation, telemetry).
|
|
2253
|
+
let systemPrompt = `${staticSystemPart}${dynamicSystemPart}`;
|
|
2236
2254
|
let lastPromptFingerprint = `${this.agentFileFingerprint}\n${this.skillFingerprint}`;
|
|
2237
2255
|
|
|
2238
2256
|
const pushEvent = (event: AgentEvent): AgentEvent => {
|
|
@@ -2772,25 +2790,55 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
|
|
|
2772
2790
|
|
|
2773
2791
|
const temperature = agent.frontmatter.model?.temperature ?? 0.2;
|
|
2774
2792
|
const maxTokens = agent.frontmatter.model?.maxTokens;
|
|
2775
|
-
// Place the breakpoint before any untruncated tool-result so
|
|
2776
|
-
// cache only the stable prefix when prior-run tool results are
|
|
2777
|
-
// still full-fidelity. Otherwise cache at the history tail.
|
|
2778
|
-
|
|
2779
|
-
|
|
2780
|
-
|
|
2781
|
-
const cachedMessages =
|
|
2782
|
-
coreMessages
|
|
2783
|
-
|
|
2784
|
-
|
|
2785
|
-
|
|
2793
|
+
// Place the tail breakpoint before any untruncated tool-result so
|
|
2794
|
+
// we cache only the stable prefix when prior-run tool results are
|
|
2795
|
+
// still full-fidelity. Otherwise cache at the history tail. When
|
|
2796
|
+
// `skipTailCache` is set (per-run override), don't write the tail
|
|
2797
|
+
// breakpoint at all. The 1-hour static-prefix breakpoint is added
|
|
2798
|
+
// separately when assembling the final messages array.
|
|
2799
|
+
const cachedMessages = skipTailCache
|
|
2800
|
+
? coreMessages
|
|
2801
|
+
: addPromptCacheBreakpoints(
|
|
2802
|
+
coreMessages,
|
|
2803
|
+
modelInstance,
|
|
2804
|
+
hasFullToolResults
|
|
2805
|
+
? findLastStableCacheIndex(coreMessages)
|
|
2806
|
+
: coreMessages.length - 1,
|
|
2807
|
+
);
|
|
2808
|
+
|
|
2809
|
+
// Anthropic: split system into two blocks with a 1-hour cache
|
|
2810
|
+
// breakpoint at the boundary between the static portion (agent
|
|
2811
|
+
// body + skills + browser/fs/isolate context — stable across many
|
|
2812
|
+
// turns and jobs) and the dynamic tail (memory, todos, time).
|
|
2813
|
+
// The static block becomes a hot cache that every later turn and
|
|
2814
|
+
// job in the hour reads at 0.1× — much bigger payoff than the
|
|
2815
|
+
// 5-min tail breakpoint, which only survives active back-and-forth.
|
|
2816
|
+
// For non-Anthropic models, fall back to the single concatenated
|
|
2817
|
+
// string via `system:` — those providers auto-cache.
|
|
2818
|
+
const useStaticCache = isAnthropicModel(modelInstance);
|
|
2819
|
+
const finalMessages: ModelMessage[] = useStaticCache
|
|
2820
|
+
? [
|
|
2821
|
+
{
|
|
2822
|
+
role: "system",
|
|
2823
|
+
content: staticSystemPart,
|
|
2824
|
+
providerOptions: {
|
|
2825
|
+
anthropic: { cacheControl: { type: "ephemeral", ttl: "1h" } },
|
|
2826
|
+
},
|
|
2827
|
+
},
|
|
2828
|
+
...(dynamicSystemPart.length > 0
|
|
2829
|
+
? [{ role: "system" as const, content: dynamicSystemPart }]
|
|
2830
|
+
: []),
|
|
2831
|
+
...cachedMessages,
|
|
2832
|
+
]
|
|
2833
|
+
: cachedMessages;
|
|
2786
2834
|
|
|
2787
2835
|
const telemetryEnabled = this.loadedConfig?.telemetry?.enabled !== false;
|
|
2788
2836
|
|
|
2789
2837
|
|
|
2790
2838
|
const result = await streamText({
|
|
2791
2839
|
model: modelInstance,
|
|
2792
|
-
system: systemPrompt,
|
|
2793
|
-
messages:
|
|
2840
|
+
...(useStaticCache ? {} : { system: systemPrompt }),
|
|
2841
|
+
messages: finalMessages,
|
|
2794
2842
|
tools,
|
|
2795
2843
|
temperature,
|
|
2796
2844
|
abortSignal: input.abortSignal,
|
|
@@ -3532,7 +3580,9 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
|
|
|
3532
3580
|
agent = this.parsedAgent as ParsedAgent;
|
|
3533
3581
|
const currentFingerprint = `${this.agentFileFingerprint}\n${this.skillFingerprint}`;
|
|
3534
3582
|
if (currentFingerprint !== lastPromptFingerprint) {
|
|
3535
|
-
|
|
3583
|
+
({ staticPart: staticSystemPart, dynamicPart: dynamicSystemPart } =
|
|
3584
|
+
await buildSystemPromptParts());
|
|
3585
|
+
systemPrompt = `${staticSystemPart}${dynamicSystemPart}`;
|
|
3536
3586
|
lastPromptFingerprint = currentFingerprint;
|
|
3537
3587
|
}
|
|
3538
3588
|
}
|
|
@@ -62,6 +62,12 @@ export interface RunConversationTurnOpts {
|
|
|
62
62
|
parameters?: Record<string, unknown>;
|
|
63
63
|
abortSignal?: AbortSignal;
|
|
64
64
|
tenantId?: string | null;
|
|
65
|
+
/**
|
|
66
|
+
* Forwarded to `RunInput.disablePromptCache`. Set true for one-shot
|
|
67
|
+
* turns with no follow-up coming (cron-fired jobs, etc.) so the
|
|
68
|
+
* harness skips the Anthropic cache write.
|
|
69
|
+
*/
|
|
70
|
+
disablePromptCache?: boolean;
|
|
65
71
|
/** Per-event hook — called for every AgentEvent yielded by the run, in order. */
|
|
66
72
|
onEvent?: (event: AgentEvent) => void | Promise<void>;
|
|
67
73
|
}
|
|
@@ -203,6 +209,7 @@ export const runConversationTurn = async (
|
|
|
203
209
|
messages: harnessMessages,
|
|
204
210
|
files: opts.files && opts.files.length > 0 ? opts.files : undefined,
|
|
205
211
|
abortSignal: opts.abortSignal,
|
|
212
|
+
disablePromptCache: opts.disablePromptCache,
|
|
206
213
|
},
|
|
207
214
|
initialContextTokens: conversation.contextTokens ?? 0,
|
|
208
215
|
initialContextWindow: conversation.contextWindow ?? 0,
|
package/src/prompt-cache.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import type { ModelMessage, LanguageModel } from "ai";
|
|
2
2
|
|
|
3
|
-
function isAnthropicModel(model: LanguageModel): boolean {
|
|
3
|
+
export function isAnthropicModel(model: LanguageModel): boolean {
|
|
4
4
|
if (typeof model === "string") {
|
|
5
5
|
return model.includes("anthropic") || model.includes("claude");
|
|
6
6
|
}
|