@dungle-scrubs/tallow 0.8.13 → 0.8.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/config.d.ts +1 -1
- package/dist/config.js +1 -1
- package/dist/interactive-mode-patch.d.ts +14 -4
- package/dist/interactive-mode-patch.d.ts.map +1 -1
- package/dist/interactive-mode-patch.js +103 -2
- package/dist/interactive-mode-patch.js.map +1 -1
- package/dist/sdk.d.ts +80 -0
- package/dist/sdk.d.ts.map +1 -1
- package/dist/sdk.js +481 -31
- package/dist/sdk.js.map +1 -1
- package/extensions/__integration__/context-budget-guard.test.ts +236 -0
- package/extensions/_shared/context-budget-interop.ts +162 -0
- package/extensions/ask-user-question-tool/__tests__/render-regression.test.ts +203 -0
- package/extensions/ask-user-question-tool/index.ts +70 -9
- package/extensions/background-task-tool/index.ts +10 -2
- package/extensions/bash-tool-enhanced/index.ts +10 -2
- package/extensions/plan-mode-tool/__tests__/utils.test.ts +180 -0
- package/extensions/plan-mode-tool/extension.json +1 -0
- package/extensions/plan-mode-tool/index.ts +33 -0
- package/extensions/plan-mode-tool/utils.ts +60 -0
- package/extensions/web-fetch-tool/__tests__/adaptive-cap.test.ts +148 -0
- package/extensions/web-fetch-tool/index.ts +140 -9
- package/extensions/wezterm-pane-control/__tests__/index.test.ts +23 -2
- package/extensions/wezterm-pane-control/index.ts +65 -1
- package/package.json +4 -4
- package/skills/tallow-expert/SKILL.md +1 -0
package/dist/sdk.js
CHANGED
|
@@ -17,6 +17,8 @@ import { normalizeStartupProfile } from "./startup-profile.js";
|
|
|
17
17
|
import { emitStartupTiming, isStartupTimingEnabled } from "./startup-timing.js";
|
|
18
18
|
/** Marker key used on summarized historical tool results. */
|
|
19
19
|
export const TOOL_RESULT_RETENTION_MARKER = "__tallow_summarized_tool_result__";
|
|
20
|
+
/** Marker key used on ingestion-time budget-guarded tool results. */
|
|
21
|
+
export const TOOL_RESULT_BUDGET_GUARD_MARKER = "__tallow_budget_guard__";
|
|
20
22
|
/** Default retention policy for historical tool-result payloads. */
|
|
21
23
|
const DEFAULT_TOOL_RESULT_RETENTION_POLICY = {
|
|
22
24
|
enabled: true,
|
|
@@ -24,6 +26,23 @@ const DEFAULT_TOOL_RESULT_RETENTION_POLICY = {
|
|
|
24
26
|
maxRetainedBytesPerResult: 48 * 1024,
|
|
25
27
|
previewChars: 600,
|
|
26
28
|
};
|
|
29
|
+
// ─── Context Budget Policy ───────────────────────────────────────────────────
|
|
30
|
+
/** Default context-budget thresholds and caps. */
|
|
31
|
+
const DEFAULT_CONTEXT_BUDGET_POLICY = {
|
|
32
|
+
softThresholdPercent: 75,
|
|
33
|
+
hardThresholdPercent: 90,
|
|
34
|
+
minPerToolBytes: 4 * 1024,
|
|
35
|
+
maxPerToolBytes: 512 * 1024,
|
|
36
|
+
perTurnReserveTokens: 8_000,
|
|
37
|
+
unknownUsageFallbackCapBytes: 32 * 1024,
|
|
38
|
+
};
|
|
39
|
+
/** Event channels for context-budget planner ↔ tool API handshake. */
|
|
40
|
+
const CONTEXT_BUDGET_API_CHANNELS = {
|
|
41
|
+
budgetApi: "interop.api.v1.context-budget.api",
|
|
42
|
+
budgetApiRequest: "interop.api.v1.context-budget.api-request",
|
|
43
|
+
};
|
|
44
|
+
/** Default TTL for per-tool context-budget envelopes. */
|
|
45
|
+
const CONTEXT_BUDGET_ENVELOPE_TTL_MS = 30_000;
|
|
27
46
|
/** Map of tool name → tool object for --tools flag resolution. */
|
|
28
47
|
const TOOL_MAP = {
|
|
29
48
|
read: readTool,
|
|
@@ -108,6 +127,116 @@ export function resolveToolResultRetentionPolicy(params) {
|
|
|
108
127
|
previewChars: toNonNegativeInt(merged.previewChars, DEFAULT_TOOL_RESULT_RETENTION_POLICY.previewChars, 10_000),
|
|
109
128
|
};
|
|
110
129
|
}
|
|
130
|
+
/**
|
|
131
|
+
* Resolve the effective context-budget policy from layered settings.
|
|
132
|
+
*
|
|
133
|
+
* Precedence: global settings < project settings < runtime overrides.
|
|
134
|
+
* Any unset field falls back to the compiled default.
|
|
135
|
+
*
|
|
136
|
+
* @param params - Layered settings inputs
|
|
137
|
+
* @returns Resolved context-budget policy with validated numeric bounds
|
|
138
|
+
*/
|
|
139
|
+
export function resolveContextBudgetPolicy(params) {
|
|
140
|
+
const globalConfig = readContextBudgetConfig(params.globalSettings);
|
|
141
|
+
const projectConfig = readContextBudgetConfig(params.projectSettings);
|
|
142
|
+
const runtimeConfig = readContextBudgetConfig(params.runtimeSettings);
|
|
143
|
+
const merged = {
|
|
144
|
+
...globalConfig,
|
|
145
|
+
...projectConfig,
|
|
146
|
+
...runtimeConfig,
|
|
147
|
+
};
|
|
148
|
+
return {
|
|
149
|
+
softThresholdPercent: toNonNegativeInt(merged.softThresholdPercent, DEFAULT_CONTEXT_BUDGET_POLICY.softThresholdPercent, 100),
|
|
150
|
+
hardThresholdPercent: toNonNegativeInt(merged.hardThresholdPercent, DEFAULT_CONTEXT_BUDGET_POLICY.hardThresholdPercent, 100),
|
|
151
|
+
minPerToolBytes: toNonNegativeInt(merged.minPerToolBytes, DEFAULT_CONTEXT_BUDGET_POLICY.minPerToolBytes, 10 * 1024 * 1024),
|
|
152
|
+
maxPerToolBytes: toNonNegativeInt(merged.maxPerToolBytes, DEFAULT_CONTEXT_BUDGET_POLICY.maxPerToolBytes, 10 * 1024 * 1024),
|
|
153
|
+
perTurnReserveTokens: toNonNegativeInt(merged.perTurnReserveTokens, DEFAULT_CONTEXT_BUDGET_POLICY.perTurnReserveTokens, 200_000),
|
|
154
|
+
unknownUsageFallbackCapBytes: toNonNegativeInt(merged.unknownUsageFallbackCapBytes, DEFAULT_CONTEXT_BUDGET_POLICY.unknownUsageFallbackCapBytes, 10 * 1024 * 1024),
|
|
155
|
+
};
|
|
156
|
+
}
|
|
157
|
+
/**
|
|
158
|
+
* Estimate remaining tokens available for tool output.
|
|
159
|
+
*
|
|
160
|
+
* When usage.tokens is null (e.g. right after compaction), returns 0
|
|
161
|
+
* to signal that callers should use the unknown-usage fallback path.
|
|
162
|
+
*
|
|
163
|
+
* @param usage - Context usage snapshot from the framework
|
|
164
|
+
* @param reserveTokens - Tokens to hold back for the model response
|
|
165
|
+
* @returns Non-negative remaining token count, or 0 when unknown
|
|
166
|
+
*/
|
|
167
|
+
export function estimateRemainingTokens(usage, reserveTokens) {
|
|
168
|
+
if (usage.tokens === null)
|
|
169
|
+
return 0;
|
|
170
|
+
return Math.max(0, usage.contextWindow - usage.tokens - reserveTokens);
|
|
171
|
+
}
|
|
172
|
+
/**
|
|
173
|
+
* Convert a token count to an approximate byte budget.
|
|
174
|
+
*
|
|
175
|
+
* Uses a conservative 4-bytes-per-token heuristic suitable for English
|
|
176
|
+
* text and JSON payloads. Non-Latin scripts use more bytes per token;
|
|
177
|
+
* callers should treat this as an upper-bound estimate.
|
|
178
|
+
*
|
|
179
|
+
* @param tokens - Token count to convert
|
|
180
|
+
* @returns Approximate byte budget
|
|
181
|
+
*/
|
|
182
|
+
export function tokensToBytes(tokens) {
|
|
183
|
+
return Math.max(0, Math.floor(tokens * 4));
|
|
184
|
+
}
|
|
185
|
+
/**
|
|
186
|
+
* Build a compact one-line budget status string for system prompt injection.
|
|
187
|
+
*
|
|
188
|
+
* Format when known: `Context budget: 67% used, ~66k tokens remaining`
|
|
189
|
+
* Format when unknown: `Context budget: unknown (waiting for fresh usage sample)`
|
|
190
|
+
*
|
|
191
|
+
* @param usage - Context usage snapshot
|
|
192
|
+
* @param policy - Resolved context-budget policy
|
|
193
|
+
* @returns Deterministic single-line status string
|
|
194
|
+
*/
|
|
195
|
+
export function formatBudgetStatusLine(usage, policy) {
|
|
196
|
+
if (usage.tokens === null || usage.contextWindow <= 0) {
|
|
197
|
+
return "Context budget: unknown (waiting for fresh usage sample)";
|
|
198
|
+
}
|
|
199
|
+
const pct = usage.percent !== null ? usage.percent : Math.round((usage.tokens / usage.contextWindow) * 100);
|
|
200
|
+
const remaining = estimateRemainingTokens(usage, policy.perTurnReserveTokens);
|
|
201
|
+
const remainingK = Math.max(0, Math.round(remaining / 1000));
|
|
202
|
+
return `Context budget: ${pct}% used, ~${remainingK}k tokens remaining`;
|
|
203
|
+
}
|
|
204
|
+
/**
|
|
205
|
+
* Return the fallback byte cap used when context usage is unknown.
|
|
206
|
+
*
|
|
207
|
+
* This applies after compaction or before the first LLM response when
|
|
208
|
+
* the framework reports tokens as null.
|
|
209
|
+
*
|
|
210
|
+
* @param policy - Resolved context-budget policy
|
|
211
|
+
* @returns Byte cap for unknown-usage scenarios
|
|
212
|
+
*/
|
|
213
|
+
export function unknownUsageFallbackBudget(policy) {
|
|
214
|
+
return policy.unknownUsageFallbackCapBytes;
|
|
215
|
+
}
|
|
216
|
+
/**
|
|
217
|
+
* Normalize framework context-usage output into a stable snapshot.
|
|
218
|
+
*
|
|
219
|
+
* Missing or partial usage values are treated as unknown, which triggers
|
|
220
|
+
* conservative fallback behavior in budget planning/guarding.
|
|
221
|
+
*
|
|
222
|
+
* @param usage - Raw usage object returned by `ctx.getContextUsage()`
|
|
223
|
+
* @returns Normalized snapshot
|
|
224
|
+
*/
|
|
225
|
+
function normalizeContextUsageSnapshot(usage) {
|
|
226
|
+
if (!isObjectRecord(usage)) {
|
|
227
|
+
return { contextWindow: 0, percent: null, tokens: null };
|
|
228
|
+
}
|
|
229
|
+
const contextWindow = typeof usage.contextWindow === "number" && Number.isFinite(usage.contextWindow)
|
|
230
|
+
? usage.contextWindow
|
|
231
|
+
: 0;
|
|
232
|
+
const tokens = typeof usage.tokens === "number" && Number.isFinite(usage.tokens) ? usage.tokens : null;
|
|
233
|
+
const percent = typeof usage.percent === "number" && Number.isFinite(usage.percent) ? usage.percent : null;
|
|
234
|
+
return {
|
|
235
|
+
contextWindow,
|
|
236
|
+
percent,
|
|
237
|
+
tokens,
|
|
238
|
+
};
|
|
239
|
+
}
|
|
111
240
|
/**
|
|
112
241
|
* Summarize older oversized tool results in-place while keeping the newest N full.
|
|
113
242
|
*
|
|
@@ -162,6 +291,18 @@ function readToolResultRetentionConfig(settings) {
|
|
|
162
291
|
const config = settings.toolResultRetention;
|
|
163
292
|
return isObjectRecord(config) ? config : {};
|
|
164
293
|
}
|
|
294
|
+
/**
|
|
295
|
+
* Read context-budget config from an arbitrary settings object.
|
|
296
|
+
*
|
|
297
|
+
* @param settings - Settings record that may include `contextBudget`
|
|
298
|
+
* @returns Partial context-budget config when present
|
|
299
|
+
*/
|
|
300
|
+
function readContextBudgetConfig(settings) {
|
|
301
|
+
if (!settings)
|
|
302
|
+
return {};
|
|
303
|
+
const config = settings.contextBudget;
|
|
304
|
+
return isObjectRecord(config) ? config : {};
|
|
305
|
+
}
|
|
165
306
|
/**
|
|
166
307
|
* Clamp a numeric setting to a safe non-negative integer range.
|
|
167
308
|
*
|
|
@@ -710,6 +851,11 @@ export async function createTallowSession(options = {}) {
|
|
|
710
851
|
projectSettings: settingsManager.getProjectSettings(),
|
|
711
852
|
runtimeSettings: options.settings,
|
|
712
853
|
});
|
|
854
|
+
const contextBudgetPolicy = resolveContextBudgetPolicy({
|
|
855
|
+
globalSettings: settingsManager.getGlobalSettings(),
|
|
856
|
+
projectSettings: settingsManager.getProjectSettings(),
|
|
857
|
+
runtimeSettings: options.settings,
|
|
858
|
+
});
|
|
713
859
|
// ── Resource Loader ──────────────────────────────────────────────────────
|
|
714
860
|
const additionalExtensionPaths = [];
|
|
715
861
|
const additionalSkillPaths = [];
|
|
@@ -823,9 +969,10 @@ export async function createTallowSession(options = {}) {
|
|
|
823
969
|
additionalPromptTemplatePaths: additionalPromptPaths,
|
|
824
970
|
additionalThemePaths,
|
|
825
971
|
extensionFactories: [
|
|
826
|
-
|
|
972
|
+
createRebrandSystemPromptExtension(contextBudgetPolicy),
|
|
827
973
|
injectImageFilePaths,
|
|
828
|
-
createToolResultRetentionExtension(toolResultRetentionPolicy),
|
|
974
|
+
createToolResultRetentionExtension(toolResultRetentionPolicy, contextBudgetPolicy),
|
|
975
|
+
createContextBudgetPlannerExtension(contextBudgetPolicy),
|
|
829
976
|
detectOutputTruncation,
|
|
830
977
|
createProjectTrustExtension(cwd, projectTrust),
|
|
831
978
|
...(options.extensionFactories ?? []),
|
|
@@ -1162,35 +1309,46 @@ function createProjectTrustExtension(cwd, initialTrust) {
|
|
|
1162
1309
|
};
|
|
1163
1310
|
}
|
|
1164
1311
|
/**
|
|
1165
|
-
*
|
|
1312
|
+
* Create a built-in extension factory that rebrands the pi system prompt for tallow
|
|
1313
|
+
* and appends a compact context-budget status line each turn.
|
|
1314
|
+
*
|
|
1166
1315
|
* Registered as a factory so it cannot be overridden or removed by users.
|
|
1316
|
+
*
|
|
1317
|
+
* @param budgetPolicy - Resolved context-budget policy for status line generation
|
|
1318
|
+
* @returns Extension factory
|
|
1167
1319
|
*/
|
|
1168
|
-
function
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
|
|
1178
|
-
|
|
1179
|
-
|
|
1180
|
-
|
|
1181
|
-
|
|
1182
|
-
|
|
1183
|
-
|
|
1184
|
-
|
|
1185
|
-
|
|
1186
|
-
|
|
1187
|
-
|
|
1188
|
-
|
|
1189
|
-
|
|
1190
|
-
|
|
1191
|
-
|
|
1192
|
-
|
|
1193
|
-
|
|
1320
|
+
function createRebrandSystemPromptExtension(budgetPolicy) {
|
|
1321
|
+
return (pi) => {
|
|
1322
|
+
pi.on("before_agent_start", async (event, ctx) => {
|
|
1323
|
+
let prompt = event.systemPrompt
|
|
1324
|
+
.replace("You are an expert coding assistant operating inside pi, a coding agent harness.", "You are an expert coding assistant operating inside tallow, a coding agent harness.")
|
|
1325
|
+
.replace(/Pi documentation/g, "Tallow documentation")
|
|
1326
|
+
.replace(/When working on pi topics/g, "When working on tallow topics")
|
|
1327
|
+
.replace(/read pi \.md files/g, "read tallow .md files")
|
|
1328
|
+
.replace(/the user asks about pi itself/g, "the user asks about tallow itself");
|
|
1329
|
+
// Core guidelines baked into every tallow session
|
|
1330
|
+
prompt +=
|
|
1331
|
+
"\n\nLLM intelligence is not always the answer. When a well-designed algorithm, heuristic, or deterministic approach can solve the problem reliably, prefer that over reaching for another LLM call. Reserve model inference for tasks that genuinely require reasoning, creativity, or natural-language understanding.";
|
|
1332
|
+
// Communicate strategy changes proactively
|
|
1333
|
+
prompt +=
|
|
1334
|
+
"\n\nIf you hit an internal limit (thinking budget, output length, or planning complexity) that forces you to change approach — say so immediately. Never silently pivot from planning to execution, or drop planned items, without telling the user what happened and why.";
|
|
1335
|
+
// Detect unexpected workspace changes
|
|
1336
|
+
prompt +=
|
|
1337
|
+
"\n\nWhile you are working, if you notice unexpected changes in the workspace that you didn't make — STOP IMMEDIATELY and tell the user what you found. Do not attempt to revert, overwrite, or work around them. Ask the user how they would like to proceed.";
|
|
1338
|
+
// Review mindset
|
|
1339
|
+
prompt +=
|
|
1340
|
+
"\n\nWhen the user asks for a review, default to a code-review mindset. Prioritize identifying bugs, risks, behavioral regressions, and missing tests. Present findings first, ordered by severity, with file and line references where possible. State explicitly if no issues were found and call out any residual risks or test gaps.";
|
|
1341
|
+
// Inject model identity so non-Claude models don't confabulate their identity
|
|
1342
|
+
if (ctx.model) {
|
|
1343
|
+
prompt += `\n\nYou are running as ${ctx.model.name} (${ctx.model.provider}/${ctx.model.id}).`;
|
|
1344
|
+
}
|
|
1345
|
+
// Append compact budget status line for context awareness
|
|
1346
|
+
const usage = normalizeContextUsageSnapshot(ctx.getContextUsage?.());
|
|
1347
|
+
const budgetLine = formatBudgetStatusLine(usage, budgetPolicy);
|
|
1348
|
+
prompt += `\n\n[${budgetLine}]`;
|
|
1349
|
+
return { systemPrompt: prompt };
|
|
1350
|
+
});
|
|
1351
|
+
};
|
|
1194
1352
|
}
|
|
1195
1353
|
/**
|
|
1196
1354
|
* Injects file paths into Image components for clickable OSC 8 links.
|
|
@@ -1224,10 +1382,57 @@ function injectImageFilePaths(pi) {
|
|
|
1224
1382
|
* @param policy - Resolved retention policy
|
|
1225
1383
|
* @returns Extension factory
|
|
1226
1384
|
*/
|
|
1227
|
-
function createToolResultRetentionExtension(policy) {
|
|
1385
|
+
function createToolResultRetentionExtension(policy, budgetPolicy) {
|
|
1228
1386
|
return (pi) => {
|
|
1229
|
-
|
|
1387
|
+
// ── Ingestion-time guard on tool_result ──────────────────────────────
|
|
1388
|
+
// Truncate oversized textual payloads before persistence when context
|
|
1389
|
+
// budget is tight or usage is unknown. Compatibility invariants:
|
|
1390
|
+
// - Never change toolCallId, toolName, or isError
|
|
1391
|
+
// - Preserve existing details fields
|
|
1392
|
+
// - Add guard metadata only under namespaced key
|
|
1393
|
+
// - Leave non-text blocks structurally unchanged
|
|
1394
|
+
pi.on("tool_result", async (event, ctx) => {
|
|
1395
|
+
const usage = normalizeContextUsageSnapshot(ctx.getContextUsage?.());
|
|
1396
|
+
const usageUnknown = usage.tokens === null || usage.contextWindow <= 0;
|
|
1397
|
+
const usagePercent = usage.percent !== null
|
|
1398
|
+
? usage.percent
|
|
1399
|
+
: usage.tokens === null || usage.contextWindow <= 0
|
|
1400
|
+
? null
|
|
1401
|
+
: Math.round((usage.tokens / usage.contextWindow) * 100);
|
|
1402
|
+
const safeBudgetBytes = (() => {
|
|
1403
|
+
if (usageUnknown) {
|
|
1404
|
+
return unknownUsageFallbackBudget(budgetPolicy);
|
|
1405
|
+
}
|
|
1406
|
+
const remainingTokens = estimateRemainingTokens(usage, budgetPolicy.perTurnReserveTokens);
|
|
1407
|
+
const baseline = Math.min(budgetPolicy.maxPerToolBytes, Math.max(budgetPolicy.minPerToolBytes, tokensToBytes(remainingTokens)));
|
|
1408
|
+
if (usagePercent !== null && usagePercent >= budgetPolicy.hardThresholdPercent) {
|
|
1409
|
+
return budgetPolicy.minPerToolBytes;
|
|
1410
|
+
}
|
|
1411
|
+
if (usagePercent !== null && usagePercent >= budgetPolicy.softThresholdPercent) {
|
|
1412
|
+
return Math.max(budgetPolicy.minPerToolBytes, Math.floor(baseline * 0.5));
|
|
1413
|
+
}
|
|
1414
|
+
return baseline;
|
|
1415
|
+
})();
|
|
1416
|
+
const guarded = guardToolResultContent(event.content, safeBudgetBytes);
|
|
1417
|
+
if (!guarded.wasGuarded) {
|
|
1418
|
+
return;
|
|
1419
|
+
}
|
|
1420
|
+
const guardMeta = {
|
|
1421
|
+
guardedAt: new Date().toISOString(),
|
|
1422
|
+
originalContentBytes: guarded.originalTextBytes,
|
|
1423
|
+
truncatedToBytes: guarded.truncatedToBytes,
|
|
1424
|
+
reason: usageUnknown ? "unknown_usage" : "over_budget",
|
|
1425
|
+
};
|
|
1426
|
+
const existingDetails = isObjectRecord(event.details) ? event.details : {};
|
|
1427
|
+
return {
|
|
1428
|
+
content: guarded.content,
|
|
1429
|
+
details: { ...existingDetails, [TOOL_RESULT_BUDGET_GUARD_MARKER]: guardMeta },
|
|
1430
|
+
};
|
|
1431
|
+
});
|
|
1432
|
+
// ── Historical turn_end retention (unchanged) ────────────────────────
|
|
1433
|
+
if (!policy.enabled) {
|
|
1230
1434
|
return;
|
|
1435
|
+
}
|
|
1231
1436
|
pi.on("turn_end", async (_event, ctx) => {
|
|
1232
1437
|
const messages = [];
|
|
1233
1438
|
for (const entry of ctx.sessionManager.getBranch()) {
|
|
@@ -1243,6 +1448,251 @@ function createToolResultRetentionExtension(policy) {
|
|
|
1243
1448
|
});
|
|
1244
1449
|
};
|
|
1245
1450
|
}
|
|
1451
|
+
/**
|
|
1452
|
+
* Apply ingestion-time guardrails to a tool-result content array.
|
|
1453
|
+
*
|
|
1454
|
+
* Only textual blocks are truncated. Non-text blocks are preserved in place
|
|
1455
|
+
* to avoid breaking renderer contracts.
|
|
1456
|
+
*
|
|
1457
|
+
* @param content - Tool-result content blocks
|
|
1458
|
+
* @param maxTextBytes - Maximum allowed bytes across all text blocks
|
|
1459
|
+
* @returns Guarded content payload metadata
|
|
1460
|
+
*/
|
|
1461
|
+
function guardToolResultContent(content, maxTextBytes) {
|
|
1462
|
+
let originalTextBytes = 0;
|
|
1463
|
+
let totalContentBytes = 0;
|
|
1464
|
+
for (const block of content) {
|
|
1465
|
+
if (block.type === "text") {
|
|
1466
|
+
const text = block.text ?? "";
|
|
1467
|
+
const textBytes = Buffer.byteLength(text, "utf-8");
|
|
1468
|
+
originalTextBytes += textBytes;
|
|
1469
|
+
totalContentBytes += textBytes;
|
|
1470
|
+
continue;
|
|
1471
|
+
}
|
|
1472
|
+
totalContentBytes += Buffer.byteLength(JSON.stringify(block), "utf-8");
|
|
1473
|
+
}
|
|
1474
|
+
if (originalTextBytes > 0 && originalTextBytes <= maxTextBytes) {
|
|
1475
|
+
return {
|
|
1476
|
+
content,
|
|
1477
|
+
originalTextBytes,
|
|
1478
|
+
truncatedToBytes: originalTextBytes,
|
|
1479
|
+
wasGuarded: false,
|
|
1480
|
+
};
|
|
1481
|
+
}
|
|
1482
|
+
if (originalTextBytes === 0) {
|
|
1483
|
+
if (totalContentBytes <= maxTextBytes) {
|
|
1484
|
+
return {
|
|
1485
|
+
content,
|
|
1486
|
+
originalTextBytes,
|
|
1487
|
+
truncatedToBytes: 0,
|
|
1488
|
+
wasGuarded: false,
|
|
1489
|
+
};
|
|
1490
|
+
}
|
|
1491
|
+
const fallbackText = "[non-text tool output exceeds context budget; payload preserved without structural rewrite]";
|
|
1492
|
+
return {
|
|
1493
|
+
content: [{ type: "text", text: fallbackText }, ...content],
|
|
1494
|
+
originalTextBytes,
|
|
1495
|
+
truncatedToBytes: 0,
|
|
1496
|
+
wasGuarded: true,
|
|
1497
|
+
};
|
|
1498
|
+
}
|
|
1499
|
+
const nextContent = [];
|
|
1500
|
+
let bytesUsed = 0;
|
|
1501
|
+
let truncated = false;
|
|
1502
|
+
for (const block of content) {
|
|
1503
|
+
if (block.type !== "text") {
|
|
1504
|
+
nextContent.push(block);
|
|
1505
|
+
continue;
|
|
1506
|
+
}
|
|
1507
|
+
if (truncated) {
|
|
1508
|
+
continue;
|
|
1509
|
+
}
|
|
1510
|
+
const text = block.text ?? "";
|
|
1511
|
+
const blockBytes = Buffer.byteLength(text, "utf-8");
|
|
1512
|
+
if (bytesUsed + blockBytes <= maxTextBytes) {
|
|
1513
|
+
nextContent.push({ ...block, text });
|
|
1514
|
+
bytesUsed += blockBytes;
|
|
1515
|
+
continue;
|
|
1516
|
+
}
|
|
1517
|
+
const remaining = Math.max(0, maxTextBytes - bytesUsed);
|
|
1518
|
+
const truncatedText = remaining > 0
|
|
1519
|
+
? truncateTextToBytes(text, remaining)
|
|
1520
|
+
: "[output truncated by context-budget guard]";
|
|
1521
|
+
const marker = remaining > 0
|
|
1522
|
+
? `\n\n[output truncated by context-budget guard — ${formatBytesForSummary(originalTextBytes)} → ${formatBytesForSummary(maxTextBytes)}]`
|
|
1523
|
+
: `\n\n[output truncated by context-budget guard — ${formatBytesForSummary(originalTextBytes)} original]`;
|
|
1524
|
+
nextContent.push({ type: "text", text: `${truncatedText}${marker}` });
|
|
1525
|
+
bytesUsed = maxTextBytes;
|
|
1526
|
+
truncated = true;
|
|
1527
|
+
}
|
|
1528
|
+
if (!truncated) {
|
|
1529
|
+
return {
|
|
1530
|
+
content,
|
|
1531
|
+
originalTextBytes,
|
|
1532
|
+
truncatedToBytes: originalTextBytes,
|
|
1533
|
+
wasGuarded: false,
|
|
1534
|
+
};
|
|
1535
|
+
}
|
|
1536
|
+
return {
|
|
1537
|
+
content: nextContent,
|
|
1538
|
+
originalTextBytes,
|
|
1539
|
+
truncatedToBytes: Math.min(originalTextBytes, maxTextBytes),
|
|
1540
|
+
wasGuarded: true,
|
|
1541
|
+
};
|
|
1542
|
+
}
|
|
1543
|
+
/**
|
|
1544
|
+
* Truncate a string to fit within a byte budget (UTF-8 safe).
|
|
1545
|
+
*
|
|
1546
|
+
* Walks backward from an estimated character position to find a safe
|
|
1547
|
+
* cut point that does not split a multi-byte character.
|
|
1548
|
+
*
|
|
1549
|
+
* @param text - Source text to truncate
|
|
1550
|
+
* @param maxBytes - Maximum UTF-8 byte length
|
|
1551
|
+
* @returns Truncated string guaranteed to be at most maxBytes
|
|
1552
|
+
*/
|
|
1553
|
+
function truncateTextToBytes(text, maxBytes) {
|
|
1554
|
+
if (Buffer.byteLength(text, "utf-8") <= maxBytes)
|
|
1555
|
+
return text;
|
|
1556
|
+
// Start from an optimistic char position (ASCII-equivalent)
|
|
1557
|
+
let end = Math.min(text.length, maxBytes);
|
|
1558
|
+
while (end > 0 && Buffer.byteLength(text.slice(0, end), "utf-8") > maxBytes) {
|
|
1559
|
+
end -= 1;
|
|
1560
|
+
}
|
|
1561
|
+
return text.slice(0, end);
|
|
1562
|
+
}
|
|
1563
|
+
/**
|
|
1564
|
+
* Create a batch planner extension that computes per-tool byte envelopes
|
|
1565
|
+
* from assistant tool calls and publishes them via the event bus.
|
|
1566
|
+
*
|
|
1567
|
+
* On `message_end` for assistant messages, inspects tool calls in the
|
|
1568
|
+
* message content and allocates a budget envelope for each one, keyed
|
|
1569
|
+
* by toolCallId. Envelopes are single-use (consumed via the API) and
|
|
1570
|
+
* automatically cleaned up on turn_end, agent_end, session_before_switch,
|
|
1571
|
+
* and session_switch events.
|
|
1572
|
+
*
|
|
1573
|
+
* @param budgetPolicy - Resolved context-budget policy
|
|
1574
|
+
* @returns Extension factory
|
|
1575
|
+
*/
|
|
1576
|
+
function createContextBudgetPlannerExtension(budgetPolicy) {
|
|
1577
|
+
return (pi) => {
|
|
1578
|
+
const envelopeStore = new Map();
|
|
1579
|
+
let currentTurnIndex = 0;
|
|
1580
|
+
/** Clamp a per-tool envelope to policy min/max bounds. */
|
|
1581
|
+
const clampPerToolBytes = (value) => Math.min(budgetPolicy.maxPerToolBytes, Math.max(budgetPolicy.minPerToolBytes, value));
|
|
1582
|
+
/** Drop stale envelopes so stale calls cannot reuse old budgets. */
|
|
1583
|
+
const pruneStaleEnvelopes = (nowMs) => {
|
|
1584
|
+
for (const [toolCallId, entry] of envelopeStore) {
|
|
1585
|
+
const expired = nowMs - entry.metadata.createdAtMs > entry.metadata.ttlMs;
|
|
1586
|
+
const wrongTurn = entry.metadata.turnIndex !== currentTurnIndex;
|
|
1587
|
+
if (expired || wrongTurn) {
|
|
1588
|
+
envelopeStore.delete(toolCallId);
|
|
1589
|
+
}
|
|
1590
|
+
}
|
|
1591
|
+
};
|
|
1592
|
+
/** Clear all envelopes from the planner state. */
|
|
1593
|
+
const clearEnvelopes = () => {
|
|
1594
|
+
envelopeStore.clear();
|
|
1595
|
+
};
|
|
1596
|
+
/** Resolve per-tool budget for one tool-call batch. */
|
|
1597
|
+
const resolvePerToolBudget = (usage, batchSize) => {
|
|
1598
|
+
if (usage.tokens === null || usage.contextWindow <= 0) {
|
|
1599
|
+
return clampPerToolBytes(unknownUsageFallbackBudget(budgetPolicy));
|
|
1600
|
+
}
|
|
1601
|
+
const usagePercent = usage.percent !== null
|
|
1602
|
+
? usage.percent
|
|
1603
|
+
: Math.round((usage.tokens / usage.contextWindow) * 100);
|
|
1604
|
+
const remainingTokens = estimateRemainingTokens(usage, budgetPolicy.perTurnReserveTokens);
|
|
1605
|
+
const totalBytes = tokensToBytes(remainingTokens);
|
|
1606
|
+
const rawPerTool = Math.floor(totalBytes / Math.max(1, batchSize));
|
|
1607
|
+
// Apply additional pressure once the turn is near the hard threshold.
|
|
1608
|
+
if (usagePercent >= budgetPolicy.hardThresholdPercent) {
|
|
1609
|
+
return clampPerToolBytes(Math.min(rawPerTool, budgetPolicy.minPerToolBytes));
|
|
1610
|
+
}
|
|
1611
|
+
if (usagePercent >= budgetPolicy.softThresholdPercent) {
|
|
1612
|
+
const cautiousPerTool = Math.floor(rawPerTool * 0.5);
|
|
1613
|
+
return clampPerToolBytes(cautiousPerTool);
|
|
1614
|
+
}
|
|
1615
|
+
return clampPerToolBytes(rawPerTool);
|
|
1616
|
+
};
|
|
1617
|
+
/** Publish the planner API for tool extensions. */
|
|
1618
|
+
const publishBudgetApi = () => {
|
|
1619
|
+
pi.events.emit(CONTEXT_BUDGET_API_CHANNELS.budgetApi, { api: budgetApi });
|
|
1620
|
+
};
|
|
1621
|
+
// Track turn index and evict stale envelopes at turn boundaries.
|
|
1622
|
+
pi.on("turn_start", async (event) => {
|
|
1623
|
+
currentTurnIndex = event.turnIndex;
|
|
1624
|
+
pruneStaleEnvelopes(Date.now());
|
|
1625
|
+
});
|
|
1626
|
+
// Compute envelopes on assistant message_end.
|
|
1627
|
+
pi.on("message_end", async (event, ctx) => {
|
|
1628
|
+
if (!event.message || event.message.role !== "assistant") {
|
|
1629
|
+
return;
|
|
1630
|
+
}
|
|
1631
|
+
const content = event.message.content;
|
|
1632
|
+
if (!Array.isArray(content)) {
|
|
1633
|
+
return;
|
|
1634
|
+
}
|
|
1635
|
+
const toolCalls = content.filter((block) => isObjectRecord(block) &&
|
|
1636
|
+
block.type === "toolCall" &&
|
|
1637
|
+
typeof block.id === "string" &&
|
|
1638
|
+
typeof block.name === "string" &&
|
|
1639
|
+
isObjectRecord(block.arguments));
|
|
1640
|
+
if (toolCalls.length === 0) {
|
|
1641
|
+
return;
|
|
1642
|
+
}
|
|
1643
|
+
const nowMs = Date.now();
|
|
1644
|
+
pruneStaleEnvelopes(nowMs);
|
|
1645
|
+
const usage = normalizeContextUsageSnapshot(ctx.getContextUsage?.());
|
|
1646
|
+
const batchSize = toolCalls.length;
|
|
1647
|
+
const perToolBytes = resolvePerToolBudget(usage, batchSize);
|
|
1648
|
+
for (const toolCall of toolCalls) {
|
|
1649
|
+
envelopeStore.set(toolCall.id, {
|
|
1650
|
+
envelope: { batchSize, maxBytes: perToolBytes },
|
|
1651
|
+
metadata: {
|
|
1652
|
+
createdAtMs: nowMs,
|
|
1653
|
+
ttlMs: CONTEXT_BUDGET_ENVELOPE_TTL_MS,
|
|
1654
|
+
turnIndex: currentTurnIndex,
|
|
1655
|
+
},
|
|
1656
|
+
});
|
|
1657
|
+
}
|
|
1658
|
+
});
|
|
1659
|
+
const budgetApi = {
|
|
1660
|
+
take(toolCallId) {
|
|
1661
|
+
const nowMs = Date.now();
|
|
1662
|
+
pruneStaleEnvelopes(nowMs);
|
|
1663
|
+
const entry = envelopeStore.get(toolCallId);
|
|
1664
|
+
if (!entry) {
|
|
1665
|
+
return undefined;
|
|
1666
|
+
}
|
|
1667
|
+
const expired = nowMs - entry.metadata.createdAtMs > entry.metadata.ttlMs;
|
|
1668
|
+
const wrongTurn = entry.metadata.turnIndex !== currentTurnIndex;
|
|
1669
|
+
envelopeStore.delete(toolCallId);
|
|
1670
|
+
if (expired || wrongTurn) {
|
|
1671
|
+
return undefined;
|
|
1672
|
+
}
|
|
1673
|
+
return entry.envelope;
|
|
1674
|
+
},
|
|
1675
|
+
};
|
|
1676
|
+
pi.on("session_start", async () => {
|
|
1677
|
+
publishBudgetApi();
|
|
1678
|
+
});
|
|
1679
|
+
pi.events.on(CONTEXT_BUDGET_API_CHANNELS.budgetApiRequest, () => {
|
|
1680
|
+
publishBudgetApi();
|
|
1681
|
+
});
|
|
1682
|
+
pi.on("turn_end", async () => {
|
|
1683
|
+
clearEnvelopes();
|
|
1684
|
+
});
|
|
1685
|
+
pi.on("agent_end", async () => {
|
|
1686
|
+
clearEnvelopes();
|
|
1687
|
+
});
|
|
1688
|
+
pi.on("session_before_switch", async () => {
|
|
1689
|
+
clearEnvelopes();
|
|
1690
|
+
});
|
|
1691
|
+
pi.on("session_switch", async () => {
|
|
1692
|
+
clearEnvelopes();
|
|
1693
|
+
});
|
|
1694
|
+
};
|
|
1695
|
+
}
|
|
1246
1696
|
/**
|
|
1247
1697
|
* Detects when a model response was truncated due to max_tokens and notifies
|
|
1248
1698
|
* the user. Without this, truncated responses silently stop — the model may
|