@poncho-ai/harness 0.45.0 → 0.46.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +5 -5
- package/CHANGELOG.md +44 -0
- package/dist/index.d.ts +6 -0
- package/dist/index.js +32 -14
- package/package.json +2 -2
- package/src/harness.ts +71 -22
- package/src/orchestrator/run-conversation-turn.ts +7 -0
- package/src/prompt-cache.ts +1 -1
package/.turbo/turbo-build.log
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
|
|
2
|
-
> @poncho-ai/harness@0.
|
|
2
|
+
> @poncho-ai/harness@0.46.0 build /home/runner/work/poncho-ai/poncho-ai/packages/harness
|
|
3
3
|
> node scripts/embed-docs.js && tsup src/index.ts --format esm --dts
|
|
4
4
|
|
|
5
5
|
[embed-docs] Generated poncho-docs.ts with 4 topics
|
|
@@ -8,9 +8,9 @@
|
|
|
8
8
|
[34mCLI[39m tsup v8.5.1
|
|
9
9
|
[34mCLI[39m Target: es2022
|
|
10
10
|
[34mESM[39m Build start
|
|
11
|
+
[32mESM[39m [1mdist/index.js [22m[32m525.40 KB[39m
|
|
11
12
|
[32mESM[39m [1mdist/isolate-VY35DGLM.js [22m[32m49.43 KB[39m
|
|
12
|
-
[32mESM[39m
|
|
13
|
-
[32mESM[39m ⚡️ Build success in 230ms
|
|
13
|
+
[32mESM[39m ⚡️ Build success in 214ms
|
|
14
14
|
[34mDTS[39m Build start
|
|
15
|
-
[32mDTS[39m ⚡️ Build success in
|
|
16
|
-
[32mDTS[39m [1mdist/index.d.ts [22m[32m85.
|
|
15
|
+
[32mDTS[39m ⚡️ Build success in 7043ms
|
|
16
|
+
[32mDTS[39m [1mdist/index.d.ts [22m[32m85.30 KB[39m
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,49 @@
|
|
|
1
1
|
# @poncho-ai/harness
|
|
2
2
|
|
|
3
|
+
## 0.46.0
|
|
4
|
+
|
|
5
|
+
### Minor Changes
|
|
6
|
+
|
|
7
|
+
- [#118](https://github.com/cesr/poncho-ai/pull/118) [`e8df464`](https://github.com/cesr/poncho-ai/commit/e8df4649618cba0b408a6c143f923f0dcb2046c8) Thanks [@cesr](https://github.com/cesr)! - harness: 1h static system-prompt cache breakpoint + per-run cache kill-switch
|
|
8
|
+
|
|
9
|
+
Two related changes to Anthropic prompt caching:
|
|
10
|
+
|
|
11
|
+
**1-hour static system-prompt breakpoint.** The harness now splits the
|
|
12
|
+
assembled system prompt into a static portion (agent body + skill
|
|
13
|
+
context + browser/fs/isolate context — stable across many turns and
|
|
14
|
+
jobs within an hour) and a dynamic tail (memory, todos, time). On
|
|
15
|
+
Anthropic models, these are sent as two `role: "system"` messages with
|
|
16
|
+
`cacheControl: { ttl: "1h" }` on the static block. The existing 5-min
|
|
17
|
+
tail breakpoint on the last user/assistant/tool message is retained.
|
|
18
|
+
|
|
19
|
+
This lets later turns and job runs read ~95% of the system prompt at
|
|
20
|
+
0.1× (cache read) instead of paying 1× whenever the 5-min tail cache
|
|
21
|
+
has expired — the previous setup only cached for 5 minutes via the
|
|
22
|
+
tail breakpoint. Within-user cross-conversation and interactive-vs-job
|
|
23
|
+
all share the static cache.
|
|
24
|
+
|
|
25
|
+
**Per-run cache kill-switch.** Added `RunInput.disablePromptCache?:
|
|
26
|
+
boolean` (also exposed on `RunConversationTurnOpts.disablePromptCache`,
|
|
27
|
+
forwarded into `runInput`). When set, the harness skips the 5-min tail
|
|
28
|
+
breakpoint for that run. The 1-hour static breakpoint is still
|
|
29
|
+
applied — the run still benefits from reading the shared static cache,
|
|
30
|
+
just doesn't write a new tail entry that won't be read before TTL.
|
|
31
|
+
|
|
32
|
+
Intended for one-shot programmatic invocations (cron-fired jobs,
|
|
33
|
+
subagent dispatch) where no follow-up turn is coming within the 5-min
|
|
34
|
+
TTL window, so the 1.25× write surcharge would be pure waste.
|
|
35
|
+
|
|
36
|
+
Non-Anthropic providers fall through to the previous single concatenated
|
|
37
|
+
`system:` string with no cache control — those providers auto-cache.
|
|
38
|
+
|
|
39
|
+
Internal: `isAnthropicModel` is now exported from `prompt-cache.ts`
|
|
40
|
+
for reuse at the streamText site.
|
|
41
|
+
|
|
42
|
+
### Patch Changes
|
|
43
|
+
|
|
44
|
+
- Updated dependencies [[`e8df464`](https://github.com/cesr/poncho-ai/commit/e8df4649618cba0b408a6c143f923f0dcb2046c8)]:
|
|
45
|
+
- @poncho-ai/sdk@1.12.0
|
|
46
|
+
|
|
3
47
|
## 0.45.0
|
|
4
48
|
|
|
5
49
|
### Minor Changes
|
package/dist/index.d.ts
CHANGED
|
@@ -2036,6 +2036,12 @@ interface RunConversationTurnOpts {
|
|
|
2036
2036
|
parameters?: Record<string, unknown>;
|
|
2037
2037
|
abortSignal?: AbortSignal;
|
|
2038
2038
|
tenantId?: string | null;
|
|
2039
|
+
/**
|
|
2040
|
+
* Forwarded to `RunInput.disablePromptCache`. Set true for one-shot
|
|
2041
|
+
* turns with no follow-up coming (cron-fired jobs, etc.) so the
|
|
2042
|
+
* harness skips the Anthropic cache write.
|
|
2043
|
+
*/
|
|
2044
|
+
disablePromptCache?: boolean;
|
|
2039
2045
|
/** Per-event hook — called for every AgentEvent yielded by the run, in order. */
|
|
2040
2046
|
onEvent?: (event: AgentEvent) => void | Promise<void>;
|
|
2041
2047
|
}
|
package/dist/index.js
CHANGED
|
@@ -10129,10 +10129,13 @@ var AgentHarness = class _AgentHarness {
|
|
|
10129
10129
|
);
|
|
10130
10130
|
}
|
|
10131
10131
|
const hasFullToolResults = hasUntruncatedToolResults(messages);
|
|
10132
|
-
|
|
10133
|
-
|
|
10132
|
+
const skipTailCache = input.disablePromptCache === true;
|
|
10133
|
+
if (skipTailCache) {
|
|
10134
|
+
costLog.debug(`tail cache breakpoint skipped \u2014 disablePromptCache (run=${runId.slice(0, 12)})`);
|
|
10135
|
+
} else if (hasFullToolResults) {
|
|
10136
|
+
costLog.debug(`tail cache breakpoint before untruncated tool results (run=${runId.slice(0, 12)})`);
|
|
10134
10137
|
} else {
|
|
10135
|
-
costLog.debug(`cache breakpoint at history tail (run=${runId.slice(0, 12)})`);
|
|
10138
|
+
costLog.debug(`tail cache breakpoint at history tail (run=${runId.slice(0, 12)})`);
|
|
10136
10139
|
}
|
|
10137
10140
|
const inputMessageCount = messages.length;
|
|
10138
10141
|
const events = [];
|
|
@@ -10221,11 +10224,11 @@ ${typeStubs}
|
|
|
10221
10224
|
|
|
10222
10225
|
Code is wrapped in an async IIFE \u2014 use \`return\` to return a value to the tool result.`;
|
|
10223
10226
|
}
|
|
10224
|
-
const
|
|
10227
|
+
const buildSystemPromptParts = async () => {
|
|
10225
10228
|
const agentPrompt = renderCurrentAgentPrompt();
|
|
10226
10229
|
const tenantSkills = await this.getSkillsForTenant(input.tenantId);
|
|
10227
10230
|
const skillContextWindow = buildSkillContextWindow(tenantSkills);
|
|
10228
|
-
const
|
|
10231
|
+
const staticPart = skillContextWindow ? `${agentPrompt}${developmentContext}
|
|
10229
10232
|
|
|
10230
10233
|
${skillContextWindow}${browserContext}${fsContext}${isolateContext}` : `${agentPrompt}${developmentContext}${browserContext}${fsContext}${isolateContext}`;
|
|
10231
10234
|
const hourlyTime = (() => {
|
|
@@ -10237,9 +10240,11 @@ ${skillContextWindow}${browserContext}${fsContext}${isolateContext}` : `${agentP
|
|
|
10237
10240
|
const timeContext = this.reminderStore ? `
|
|
10238
10241
|
|
|
10239
10242
|
Current UTC time (hour precision): ${hourlyTime}` : "";
|
|
10240
|
-
|
|
10243
|
+
const dynamicPart = `${memoryContext}${todoContext}${timeContext}`;
|
|
10244
|
+
return { staticPart, dynamicPart };
|
|
10241
10245
|
};
|
|
10242
|
-
let
|
|
10246
|
+
let { staticPart: staticSystemPart, dynamicPart: dynamicSystemPart } = await buildSystemPromptParts();
|
|
10247
|
+
let systemPrompt = `${staticSystemPart}${dynamicSystemPart}`;
|
|
10243
10248
|
let lastPromptFingerprint = `${this.agentFileFingerprint}
|
|
10244
10249
|
${this.skillFingerprint}`;
|
|
10245
10250
|
const pushEvent = (event) => {
|
|
@@ -10673,17 +10678,28 @@ ${textContent}` };
|
|
|
10673
10678
|
const coreMessages = cachedCoreMessages;
|
|
10674
10679
|
const temperature = agent.frontmatter.model?.temperature ?? 0.2;
|
|
10675
10680
|
const maxTokens = agent.frontmatter.model?.maxTokens;
|
|
10676
|
-
const
|
|
10677
|
-
const cachedMessages = addPromptCacheBreakpoints(
|
|
10681
|
+
const cachedMessages = skipTailCache ? coreMessages : addPromptCacheBreakpoints(
|
|
10678
10682
|
coreMessages,
|
|
10679
10683
|
modelInstance,
|
|
10680
|
-
|
|
10684
|
+
hasFullToolResults ? findLastStableCacheIndex(coreMessages) : coreMessages.length - 1
|
|
10681
10685
|
);
|
|
10686
|
+
const useStaticCache = isAnthropicModel(modelInstance);
|
|
10687
|
+
const finalMessages = useStaticCache ? [
|
|
10688
|
+
{
|
|
10689
|
+
role: "system",
|
|
10690
|
+
content: staticSystemPart,
|
|
10691
|
+
providerOptions: {
|
|
10692
|
+
anthropic: { cacheControl: { type: "ephemeral", ttl: "1h" } }
|
|
10693
|
+
}
|
|
10694
|
+
},
|
|
10695
|
+
...dynamicSystemPart.length > 0 ? [{ role: "system", content: dynamicSystemPart }] : [],
|
|
10696
|
+
...cachedMessages
|
|
10697
|
+
] : cachedMessages;
|
|
10682
10698
|
const telemetryEnabled = this.loadedConfig?.telemetry?.enabled !== false;
|
|
10683
10699
|
const result = await streamText({
|
|
10684
10700
|
model: modelInstance,
|
|
10685
|
-
system: systemPrompt,
|
|
10686
|
-
messages:
|
|
10701
|
+
...useStaticCache ? {} : { system: systemPrompt },
|
|
10702
|
+
messages: finalMessages,
|
|
10687
10703
|
tools,
|
|
10688
10704
|
temperature,
|
|
10689
10705
|
abortSignal: input.abortSignal,
|
|
@@ -11308,7 +11324,8 @@ ${textContent}` };
|
|
|
11308
11324
|
const currentFingerprint = `${this.agentFileFingerprint}
|
|
11309
11325
|
${this.skillFingerprint}`;
|
|
11310
11326
|
if (currentFingerprint !== lastPromptFingerprint) {
|
|
11311
|
-
|
|
11327
|
+
({ staticPart: staticSystemPart, dynamicPart: dynamicSystemPart } = await buildSystemPromptParts());
|
|
11328
|
+
systemPrompt = `${staticSystemPart}${dynamicSystemPart}`;
|
|
11312
11329
|
lastPromptFingerprint = currentFingerprint;
|
|
11313
11330
|
}
|
|
11314
11331
|
}
|
|
@@ -13577,7 +13594,8 @@ var runConversationTurn = async (opts) => {
|
|
|
13577
13594
|
),
|
|
13578
13595
|
messages: harnessMessages,
|
|
13579
13596
|
files: opts.files && opts.files.length > 0 ? opts.files : void 0,
|
|
13580
|
-
abortSignal: opts.abortSignal
|
|
13597
|
+
abortSignal: opts.abortSignal,
|
|
13598
|
+
disablePromptCache: opts.disablePromptCache
|
|
13581
13599
|
},
|
|
13582
13600
|
initialContextTokens: conversation.contextTokens ?? 0,
|
|
13583
13601
|
initialContextWindow: conversation.contextWindow ?? 0,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@poncho-ai/harness",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.46.0",
|
|
4
4
|
"description": "Agent execution runtime - conversation loop, tool dispatch, streaming",
|
|
5
5
|
"repository": {
|
|
6
6
|
"type": "git",
|
|
@@ -34,7 +34,7 @@
|
|
|
34
34
|
"mustache": "^4.2.0",
|
|
35
35
|
"yaml": "^2.4.0",
|
|
36
36
|
"zod": "^3.22.0",
|
|
37
|
-
"@poncho-ai/sdk": "1.
|
|
37
|
+
"@poncho-ai/sdk": "1.12.0"
|
|
38
38
|
},
|
|
39
39
|
"peerDependencies": {
|
|
40
40
|
"esbuild": ">=0.17.0",
|
package/src/harness.ts
CHANGED
|
@@ -59,7 +59,7 @@ import {
|
|
|
59
59
|
mergeSkills,
|
|
60
60
|
} from "./skill-context.js";
|
|
61
61
|
import { generateText, streamText, type ModelMessage } from "ai";
|
|
62
|
-
import { addPromptCacheBreakpoints } from "./prompt-cache.js";
|
|
62
|
+
import { addPromptCacheBreakpoints, isAnthropicModel } from "./prompt-cache.js";
|
|
63
63
|
import { jsonSchemaToZod } from "./schema-converter.js";
|
|
64
64
|
import type { SkillMetadata } from "./skill-context.js";
|
|
65
65
|
import { createSkillTools, normalizeScriptPolicyPath } from "./skill-tools.js";
|
|
@@ -2104,10 +2104,17 @@ export class AgentHarness {
|
|
|
2104
2104
|
);
|
|
2105
2105
|
}
|
|
2106
2106
|
const hasFullToolResults = hasUntruncatedToolResults(messages);
|
|
2107
|
-
|
|
2108
|
-
|
|
2107
|
+
// The 5-min tail breakpoint is skipped only when the caller explicitly
|
|
2108
|
+
// declares no follow-up is coming (jobs, programmatic one-shots). The
|
|
2109
|
+
// 1-hour static breakpoint on the system prompt is always on — it
|
|
2110
|
+
// amortizes across every later turn or job within the hour.
|
|
2111
|
+
const skipTailCache = input.disablePromptCache === true;
|
|
2112
|
+
if (skipTailCache) {
|
|
2113
|
+
costLog.debug(`tail cache breakpoint skipped — disablePromptCache (run=${runId.slice(0, 12)})`);
|
|
2114
|
+
} else if (hasFullToolResults) {
|
|
2115
|
+
costLog.debug(`tail cache breakpoint before untruncated tool results (run=${runId.slice(0, 12)})`);
|
|
2109
2116
|
} else {
|
|
2110
|
-
costLog.debug(`cache breakpoint at history tail (run=${runId.slice(0, 12)})`);
|
|
2117
|
+
costLog.debug(`tail cache breakpoint at history tail (run=${runId.slice(0, 12)})`);
|
|
2111
2118
|
}
|
|
2112
2119
|
const inputMessageCount = messages.length;
|
|
2113
2120
|
const events: AgentEvent[] = [];
|
|
@@ -2210,11 +2217,17 @@ ${typeStubs}
|
|
|
2210
2217
|
Code is wrapped in an async IIFE — use \`return\` to return a value to the tool result.`;
|
|
2211
2218
|
}
|
|
2212
2219
|
|
|
2213
|
-
|
|
2220
|
+
// Split the system prompt into a static portion (stable across turns
|
|
2221
|
+
// and jobs within an hour, modulo MCP connect/skill author/memory edit)
|
|
2222
|
+
// and a dynamic tail (memory, todos, time). The static portion gets a
|
|
2223
|
+
// 1-hour Anthropic cache breakpoint downstream; the tail rides the
|
|
2224
|
+
// existing 5-min message-level breakpoint. See the streamText site for
|
|
2225
|
+
// the breakpoint wiring.
|
|
2226
|
+
const buildSystemPromptParts = async (): Promise<{ staticPart: string; dynamicPart: string }> => {
|
|
2214
2227
|
const agentPrompt = renderCurrentAgentPrompt();
|
|
2215
2228
|
const tenantSkills = await this.getSkillsForTenant(input.tenantId);
|
|
2216
2229
|
const skillContextWindow = buildSkillContextWindow(tenantSkills);
|
|
2217
|
-
const
|
|
2230
|
+
const staticPart = skillContextWindow
|
|
2218
2231
|
? `${agentPrompt}${developmentContext}\n\n${skillContextWindow}${browserContext}${fsContext}${isolateContext}`
|
|
2219
2232
|
: `${agentPrompt}${developmentContext}${browserContext}${fsContext}${isolateContext}`;
|
|
2220
2233
|
// Quantize to the hour so the system prompt is stable across runs
|
|
@@ -2230,9 +2243,13 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
|
|
|
2230
2243
|
const timeContext = this.reminderStore
|
|
2231
2244
|
? `\n\nCurrent UTC time (hour precision): ${hourlyTime}`
|
|
2232
2245
|
: "";
|
|
2233
|
-
|
|
2246
|
+
const dynamicPart = `${memoryContext}${todoContext}${timeContext}`;
|
|
2247
|
+
return { staticPart, dynamicPart };
|
|
2234
2248
|
};
|
|
2235
|
-
let
|
|
2249
|
+
let { staticPart: staticSystemPart, dynamicPart: dynamicSystemPart } =
|
|
2250
|
+
await buildSystemPromptParts();
|
|
2251
|
+
// Concatenated form for legacy consumers (token estimation, telemetry).
|
|
2252
|
+
let systemPrompt = `${staticSystemPart}${dynamicSystemPart}`;
|
|
2236
2253
|
let lastPromptFingerprint = `${this.agentFileFingerprint}\n${this.skillFingerprint}`;
|
|
2237
2254
|
|
|
2238
2255
|
const pushEvent = (event: AgentEvent): AgentEvent => {
|
|
@@ -2772,25 +2789,55 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
|
|
|
2772
2789
|
|
|
2773
2790
|
const temperature = agent.frontmatter.model?.temperature ?? 0.2;
|
|
2774
2791
|
const maxTokens = agent.frontmatter.model?.maxTokens;
|
|
2775
|
-
// Place the breakpoint before any untruncated tool-result so
|
|
2776
|
-
// cache only the stable prefix when prior-run tool results are
|
|
2777
|
-
// still full-fidelity. Otherwise cache at the history tail.
|
|
2778
|
-
|
|
2779
|
-
|
|
2780
|
-
|
|
2781
|
-
const cachedMessages =
|
|
2782
|
-
coreMessages
|
|
2783
|
-
|
|
2784
|
-
|
|
2785
|
-
|
|
2792
|
+
// Place the tail breakpoint before any untruncated tool-result so
|
|
2793
|
+
// we cache only the stable prefix when prior-run tool results are
|
|
2794
|
+
// still full-fidelity. Otherwise cache at the history tail. When
|
|
2795
|
+
// `skipTailCache` is set (per-run override), don't write the tail
|
|
2796
|
+
// breakpoint at all. The 1-hour static-prefix breakpoint is added
|
|
2797
|
+
// separately when assembling the final messages array.
|
|
2798
|
+
const cachedMessages = skipTailCache
|
|
2799
|
+
? coreMessages
|
|
2800
|
+
: addPromptCacheBreakpoints(
|
|
2801
|
+
coreMessages,
|
|
2802
|
+
modelInstance,
|
|
2803
|
+
hasFullToolResults
|
|
2804
|
+
? findLastStableCacheIndex(coreMessages)
|
|
2805
|
+
: coreMessages.length - 1,
|
|
2806
|
+
);
|
|
2807
|
+
|
|
2808
|
+
// Anthropic: split system into two blocks with a 1-hour cache
|
|
2809
|
+
// breakpoint at the boundary between the static portion (agent
|
|
2810
|
+
// body + skills + browser/fs/isolate context — stable across many
|
|
2811
|
+
// turns and jobs) and the dynamic tail (memory, todos, time).
|
|
2812
|
+
// The static block becomes a hot cache that every later turn and
|
|
2813
|
+
// job in the hour reads at 0.1× — much bigger payoff than the
|
|
2814
|
+
// 5-min tail breakpoint, which only survives active back-and-forth.
|
|
2815
|
+
// For non-Anthropic models, fall back to the single concatenated
|
|
2816
|
+
// string via `system:` — those providers auto-cache.
|
|
2817
|
+
const useStaticCache = isAnthropicModel(modelInstance);
|
|
2818
|
+
const finalMessages: ModelMessage[] = useStaticCache
|
|
2819
|
+
? [
|
|
2820
|
+
{
|
|
2821
|
+
role: "system",
|
|
2822
|
+
content: staticSystemPart,
|
|
2823
|
+
providerOptions: {
|
|
2824
|
+
anthropic: { cacheControl: { type: "ephemeral", ttl: "1h" } },
|
|
2825
|
+
},
|
|
2826
|
+
},
|
|
2827
|
+
...(dynamicSystemPart.length > 0
|
|
2828
|
+
? [{ role: "system" as const, content: dynamicSystemPart }]
|
|
2829
|
+
: []),
|
|
2830
|
+
...cachedMessages,
|
|
2831
|
+
]
|
|
2832
|
+
: cachedMessages;
|
|
2786
2833
|
|
|
2787
2834
|
const telemetryEnabled = this.loadedConfig?.telemetry?.enabled !== false;
|
|
2788
2835
|
|
|
2789
2836
|
|
|
2790
2837
|
const result = await streamText({
|
|
2791
2838
|
model: modelInstance,
|
|
2792
|
-
system: systemPrompt,
|
|
2793
|
-
messages:
|
|
2839
|
+
...(useStaticCache ? {} : { system: systemPrompt }),
|
|
2840
|
+
messages: finalMessages,
|
|
2794
2841
|
tools,
|
|
2795
2842
|
temperature,
|
|
2796
2843
|
abortSignal: input.abortSignal,
|
|
@@ -3532,7 +3579,9 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
|
|
|
3532
3579
|
agent = this.parsedAgent as ParsedAgent;
|
|
3533
3580
|
const currentFingerprint = `${this.agentFileFingerprint}\n${this.skillFingerprint}`;
|
|
3534
3581
|
if (currentFingerprint !== lastPromptFingerprint) {
|
|
3535
|
-
|
|
3582
|
+
({ staticPart: staticSystemPart, dynamicPart: dynamicSystemPart } =
|
|
3583
|
+
await buildSystemPromptParts());
|
|
3584
|
+
systemPrompt = `${staticSystemPart}${dynamicSystemPart}`;
|
|
3536
3585
|
lastPromptFingerprint = currentFingerprint;
|
|
3537
3586
|
}
|
|
3538
3587
|
}
|
|
@@ -62,6 +62,12 @@ export interface RunConversationTurnOpts {
|
|
|
62
62
|
parameters?: Record<string, unknown>;
|
|
63
63
|
abortSignal?: AbortSignal;
|
|
64
64
|
tenantId?: string | null;
|
|
65
|
+
/**
|
|
66
|
+
* Forwarded to `RunInput.disablePromptCache`. Set true for one-shot
|
|
67
|
+
* turns with no follow-up coming (cron-fired jobs, etc.) so the
|
|
68
|
+
* harness skips the Anthropic cache write.
|
|
69
|
+
*/
|
|
70
|
+
disablePromptCache?: boolean;
|
|
65
71
|
/** Per-event hook — called for every AgentEvent yielded by the run, in order. */
|
|
66
72
|
onEvent?: (event: AgentEvent) => void | Promise<void>;
|
|
67
73
|
}
|
|
@@ -203,6 +209,7 @@ export const runConversationTurn = async (
|
|
|
203
209
|
messages: harnessMessages,
|
|
204
210
|
files: opts.files && opts.files.length > 0 ? opts.files : undefined,
|
|
205
211
|
abortSignal: opts.abortSignal,
|
|
212
|
+
disablePromptCache: opts.disablePromptCache,
|
|
206
213
|
},
|
|
207
214
|
initialContextTokens: conversation.contextTokens ?? 0,
|
|
208
215
|
initialContextWindow: conversation.contextWindow ?? 0,
|
package/src/prompt-cache.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import type { ModelMessage, LanguageModel } from "ai";
|
|
2
2
|
|
|
3
|
-
function isAnthropicModel(model: LanguageModel): boolean {
|
|
3
|
+
export function isAnthropicModel(model: LanguageModel): boolean {
|
|
4
4
|
if (typeof model === "string") {
|
|
5
5
|
return model.includes("anthropic") || model.includes("claude");
|
|
6
6
|
}
|