@dungle-scrubs/tallow 0.8.13 → 0.8.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/sdk.js CHANGED
@@ -17,6 +17,8 @@ import { normalizeStartupProfile } from "./startup-profile.js";
17
17
  import { emitStartupTiming, isStartupTimingEnabled } from "./startup-timing.js";
18
18
  /** Marker key used on summarized historical tool results. */
19
19
  export const TOOL_RESULT_RETENTION_MARKER = "__tallow_summarized_tool_result__";
20
+ /** Marker key used on ingestion-time budget-guarded tool results. */
21
+ export const TOOL_RESULT_BUDGET_GUARD_MARKER = "__tallow_budget_guard__";
20
22
  /** Default retention policy for historical tool-result payloads. */
21
23
  const DEFAULT_TOOL_RESULT_RETENTION_POLICY = {
22
24
  enabled: true,
@@ -24,6 +26,23 @@ const DEFAULT_TOOL_RESULT_RETENTION_POLICY = {
24
26
  maxRetainedBytesPerResult: 48 * 1024,
25
27
  previewChars: 600,
26
28
  };
29
+ // ─── Context Budget Policy ───────────────────────────────────────────────────
30
+ /** Default context-budget thresholds and caps. */
31
+ const DEFAULT_CONTEXT_BUDGET_POLICY = {
32
+ softThresholdPercent: 75,
33
+ hardThresholdPercent: 90,
34
+ minPerToolBytes: 4 * 1024,
35
+ maxPerToolBytes: 512 * 1024,
36
+ perTurnReserveTokens: 8_000,
37
+ unknownUsageFallbackCapBytes: 32 * 1024,
38
+ };
39
+ /** Event channels for context-budget planner ↔ tool API handshake. */
40
+ const CONTEXT_BUDGET_API_CHANNELS = {
41
+ budgetApi: "interop.api.v1.context-budget.api",
42
+ budgetApiRequest: "interop.api.v1.context-budget.api-request",
43
+ };
44
+ /** Default TTL for per-tool context-budget envelopes. */
45
+ const CONTEXT_BUDGET_ENVELOPE_TTL_MS = 30_000;
27
46
  /** Map of tool name → tool object for --tools flag resolution. */
28
47
  const TOOL_MAP = {
29
48
  read: readTool,
@@ -108,6 +127,116 @@ export function resolveToolResultRetentionPolicy(params) {
108
127
  previewChars: toNonNegativeInt(merged.previewChars, DEFAULT_TOOL_RESULT_RETENTION_POLICY.previewChars, 10_000),
109
128
  };
110
129
  }
130
+ /**
131
+ * Resolve the effective context-budget policy from layered settings.
132
+ *
133
+ * Precedence: global settings < project settings < runtime overrides.
134
+ * Any unset field falls back to the compiled default.
135
+ *
136
+ * @param params - Layered settings inputs
137
+ * @returns Resolved context-budget policy with validated numeric bounds
138
+ */
139
+ export function resolveContextBudgetPolicy(params) {
140
+ const globalConfig = readContextBudgetConfig(params.globalSettings);
141
+ const projectConfig = readContextBudgetConfig(params.projectSettings);
142
+ const runtimeConfig = readContextBudgetConfig(params.runtimeSettings);
143
+ const merged = {
144
+ ...globalConfig,
145
+ ...projectConfig,
146
+ ...runtimeConfig,
147
+ };
148
+ return {
149
+ softThresholdPercent: toNonNegativeInt(merged.softThresholdPercent, DEFAULT_CONTEXT_BUDGET_POLICY.softThresholdPercent, 100),
150
+ hardThresholdPercent: toNonNegativeInt(merged.hardThresholdPercent, DEFAULT_CONTEXT_BUDGET_POLICY.hardThresholdPercent, 100),
151
+ minPerToolBytes: toNonNegativeInt(merged.minPerToolBytes, DEFAULT_CONTEXT_BUDGET_POLICY.minPerToolBytes, 10 * 1024 * 1024),
152
+ maxPerToolBytes: toNonNegativeInt(merged.maxPerToolBytes, DEFAULT_CONTEXT_BUDGET_POLICY.maxPerToolBytes, 10 * 1024 * 1024),
153
+ perTurnReserveTokens: toNonNegativeInt(merged.perTurnReserveTokens, DEFAULT_CONTEXT_BUDGET_POLICY.perTurnReserveTokens, 200_000),
154
+ unknownUsageFallbackCapBytes: toNonNegativeInt(merged.unknownUsageFallbackCapBytes, DEFAULT_CONTEXT_BUDGET_POLICY.unknownUsageFallbackCapBytes, 10 * 1024 * 1024),
155
+ };
156
+ }
157
+ /**
158
+ * Estimate remaining tokens available for tool output.
159
+ *
160
+ * When usage.tokens is null (e.g. right after compaction), returns 0
161
+ * to signal that callers should use the unknown-usage fallback path.
162
+ *
163
+ * @param usage - Context usage snapshot from the framework
164
+ * @param reserveTokens - Tokens to hold back for the model response
165
+ * @returns Non-negative remaining token count, or 0 when unknown
166
+ */
167
+ export function estimateRemainingTokens(usage, reserveTokens) {
168
+ if (usage.tokens === null)
169
+ return 0;
170
+ return Math.max(0, usage.contextWindow - usage.tokens - reserveTokens);
171
+ }
172
+ /**
173
+ * Convert a token count to an approximate byte budget.
174
+ *
175
+ * Uses a conservative 4-bytes-per-token heuristic suitable for English
176
+ * text and JSON payloads. Non-Latin scripts use more bytes per token;
177
+ * callers should treat this as an upper-bound estimate.
178
+ *
179
+ * @param tokens - Token count to convert
180
+ * @returns Approximate byte budget
181
+ */
182
+ export function tokensToBytes(tokens) {
183
+ return Math.max(0, Math.floor(tokens * 4));
184
+ }
185
+ /**
186
+ * Build a compact one-line budget status string for system prompt injection.
187
+ *
188
+ * Format when known: `Context budget: 67% used, ~66k tokens remaining`
189
+ * Format when unknown: `Context budget: unknown (waiting for fresh usage sample)`
190
+ *
191
+ * @param usage - Context usage snapshot
192
+ * @param policy - Resolved context-budget policy
193
+ * @returns Deterministic single-line status string
194
+ */
195
+ export function formatBudgetStatusLine(usage, policy) {
196
+ if (usage.tokens === null || usage.contextWindow <= 0) {
197
+ return "Context budget: unknown (waiting for fresh usage sample)";
198
+ }
199
+ const pct = usage.percent !== null ? usage.percent : Math.round((usage.tokens / usage.contextWindow) * 100);
200
+ const remaining = estimateRemainingTokens(usage, policy.perTurnReserveTokens);
201
+ const remainingK = Math.max(0, Math.round(remaining / 1000));
202
+ return `Context budget: ${pct}% used, ~${remainingK}k tokens remaining`;
203
+ }
204
+ /**
205
+ * Return the fallback byte cap used when context usage is unknown.
206
+ *
207
+ * This applies after compaction or before the first LLM response when
208
+ * the framework reports tokens as null.
209
+ *
210
+ * @param policy - Resolved context-budget policy
211
+ * @returns Byte cap for unknown-usage scenarios
212
+ */
213
+ export function unknownUsageFallbackBudget(policy) {
214
+ return policy.unknownUsageFallbackCapBytes;
215
+ }
216
+ /**
217
+ * Normalize framework context-usage output into a stable snapshot.
218
+ *
219
+ * Missing or partial usage values are treated as unknown, which triggers
220
+ * conservative fallback behavior in budget planning/guarding.
221
+ *
222
+ * @param usage - Raw usage object returned by `ctx.getContextUsage()`
223
+ * @returns Normalized snapshot
224
+ */
225
+ function normalizeContextUsageSnapshot(usage) {
226
+ if (!isObjectRecord(usage)) {
227
+ return { contextWindow: 0, percent: null, tokens: null };
228
+ }
229
+ const contextWindow = typeof usage.contextWindow === "number" && Number.isFinite(usage.contextWindow)
230
+ ? usage.contextWindow
231
+ : 0;
232
+ const tokens = typeof usage.tokens === "number" && Number.isFinite(usage.tokens) ? usage.tokens : null;
233
+ const percent = typeof usage.percent === "number" && Number.isFinite(usage.percent) ? usage.percent : null;
234
+ return {
235
+ contextWindow,
236
+ percent,
237
+ tokens,
238
+ };
239
+ }
111
240
  /**
112
241
  * Summarize older oversized tool results in-place while keeping the newest N full.
113
242
  *
@@ -162,6 +291,18 @@ function readToolResultRetentionConfig(settings) {
162
291
  const config = settings.toolResultRetention;
163
292
  return isObjectRecord(config) ? config : {};
164
293
  }
294
+ /**
295
+ * Read context-budget config from an arbitrary settings object.
296
+ *
297
+ * @param settings - Settings record that may include `contextBudget`
298
+ * @returns Partial context-budget config when present
299
+ */
300
+ function readContextBudgetConfig(settings) {
301
+ if (!settings)
302
+ return {};
303
+ const config = settings.contextBudget;
304
+ return isObjectRecord(config) ? config : {};
305
+ }
165
306
  /**
166
307
  * Clamp a numeric setting to a safe non-negative integer range.
167
308
  *
@@ -710,6 +851,11 @@ export async function createTallowSession(options = {}) {
710
851
  projectSettings: settingsManager.getProjectSettings(),
711
852
  runtimeSettings: options.settings,
712
853
  });
854
+ const contextBudgetPolicy = resolveContextBudgetPolicy({
855
+ globalSettings: settingsManager.getGlobalSettings(),
856
+ projectSettings: settingsManager.getProjectSettings(),
857
+ runtimeSettings: options.settings,
858
+ });
713
859
  // ── Resource Loader ──────────────────────────────────────────────────────
714
860
  const additionalExtensionPaths = [];
715
861
  const additionalSkillPaths = [];
@@ -823,9 +969,10 @@ export async function createTallowSession(options = {}) {
823
969
  additionalPromptTemplatePaths: additionalPromptPaths,
824
970
  additionalThemePaths,
825
971
  extensionFactories: [
826
- rebrandSystemPrompt,
972
+ createRebrandSystemPromptExtension(contextBudgetPolicy),
827
973
  injectImageFilePaths,
828
- createToolResultRetentionExtension(toolResultRetentionPolicy),
974
+ createToolResultRetentionExtension(toolResultRetentionPolicy, contextBudgetPolicy),
975
+ createContextBudgetPlannerExtension(contextBudgetPolicy),
829
976
  detectOutputTruncation,
830
977
  createProjectTrustExtension(cwd, projectTrust),
831
978
  ...(options.extensionFactories ?? []),
@@ -1162,35 +1309,46 @@ function createProjectTrustExtension(cwd, initialTrust) {
1162
1309
  };
1163
1310
  }
1164
1311
  /**
1165
- * Built-in extension factory that rebrands the pi system prompt for tallow.
1312
+ * Create a built-in extension factory that rebrands the pi system prompt for tallow
1313
+ * and appends a compact context-budget status line each turn.
1314
+ *
1166
1315
  * Registered as a factory so it cannot be overridden or removed by users.
1316
+ *
1317
+ * @param budgetPolicy - Resolved context-budget policy for status line generation
1318
+ * @returns Extension factory
1167
1319
  */
1168
- function rebrandSystemPrompt(pi) {
1169
- pi.on("before_agent_start", async (event, ctx) => {
1170
- let prompt = event.systemPrompt
1171
- .replace("You are an expert coding assistant operating inside pi, a coding agent harness.", "You are an expert coding assistant operating inside tallow, a coding agent harness.")
1172
- .replace(/Pi documentation/g, "Tallow documentation")
1173
- .replace(/When working on pi topics/g, "When working on tallow topics")
1174
- .replace(/read pi \.md files/g, "read tallow .md files")
1175
- .replace(/the user asks about pi itself/g, "the user asks about tallow itself");
1176
- // Core guidelines baked into every tallow session
1177
- prompt +=
1178
- "\n\nLLM intelligence is not always the answer. When a well-designed algorithm, heuristic, or deterministic approach can solve the problem reliably, prefer that over reaching for another LLM call. Reserve model inference for tasks that genuinely require reasoning, creativity, or natural-language understanding.";
1179
- // Communicate strategy changes proactively
1180
- prompt +=
1181
- "\n\nIf you hit an internal limit (thinking budget, output length, or planning complexity) that forces you to change approach — say so immediately. Never silently pivot from planning to execution, or drop planned items, without telling the user what happened and why.";
1182
- // Detect unexpected workspace changes
1183
- prompt +=
1184
- "\n\nWhile you are working, if you notice unexpected changes in the workspace that you didn't make — STOP IMMEDIATELY and tell the user what you found. Do not attempt to revert, overwrite, or work around them. Ask the user how they would like to proceed.";
1185
- // Review mindset
1186
- prompt +=
1187
- "\n\nWhen the user asks for a review, default to a code-review mindset. Prioritize identifying bugs, risks, behavioral regressions, and missing tests. Present findings first, ordered by severity, with file and line references where possible. State explicitly if no issues were found and call out any residual risks or test gaps.";
1188
- // Inject model identity so non-Claude models don't confabulate their identity
1189
- if (ctx.model) {
1190
- prompt += `\n\nYou are running as ${ctx.model.name} (${ctx.model.provider}/${ctx.model.id}).`;
1191
- }
1192
- return { systemPrompt: prompt };
1193
- });
1320
+ function createRebrandSystemPromptExtension(budgetPolicy) {
1321
+ return (pi) => {
1322
+ pi.on("before_agent_start", async (event, ctx) => {
1323
+ let prompt = event.systemPrompt
1324
+ .replace("You are an expert coding assistant operating inside pi, a coding agent harness.", "You are an expert coding assistant operating inside tallow, a coding agent harness.")
1325
+ .replace(/Pi documentation/g, "Tallow documentation")
1326
+ .replace(/When working on pi topics/g, "When working on tallow topics")
1327
+ .replace(/read pi \.md files/g, "read tallow .md files")
1328
+ .replace(/the user asks about pi itself/g, "the user asks about tallow itself");
1329
+ // Core guidelines baked into every tallow session
1330
+ prompt +=
1331
+ "\n\nLLM intelligence is not always the answer. When a well-designed algorithm, heuristic, or deterministic approach can solve the problem reliably, prefer that over reaching for another LLM call. Reserve model inference for tasks that genuinely require reasoning, creativity, or natural-language understanding.";
1332
+ // Communicate strategy changes proactively
1333
+ prompt +=
1334
+ "\n\nIf you hit an internal limit (thinking budget, output length, or planning complexity) that forces you to change approach — say so immediately. Never silently pivot from planning to execution, or drop planned items, without telling the user what happened and why.";
1335
+ // Detect unexpected workspace changes
1336
+ prompt +=
1337
+ "\n\nWhile you are working, if you notice unexpected changes in the workspace that you didn't make — STOP IMMEDIATELY and tell the user what you found. Do not attempt to revert, overwrite, or work around them. Ask the user how they would like to proceed.";
1338
+ // Review mindset
1339
+ prompt +=
1340
+ "\n\nWhen the user asks for a review, default to a code-review mindset. Prioritize identifying bugs, risks, behavioral regressions, and missing tests. Present findings first, ordered by severity, with file and line references where possible. State explicitly if no issues were found and call out any residual risks or test gaps.";
1341
+ // Inject model identity so non-Claude models don't confabulate their identity
1342
+ if (ctx.model) {
1343
+ prompt += `\n\nYou are running as ${ctx.model.name} (${ctx.model.provider}/${ctx.model.id}).`;
1344
+ }
1345
+ // Append compact budget status line for context awareness
1346
+ const usage = normalizeContextUsageSnapshot(ctx.getContextUsage?.());
1347
+ const budgetLine = formatBudgetStatusLine(usage, budgetPolicy);
1348
+ prompt += `\n\n[${budgetLine}]`;
1349
+ return { systemPrompt: prompt };
1350
+ });
1351
+ };
1194
1352
  }
1195
1353
  /**
1196
1354
  * Injects file paths into Image components for clickable OSC 8 links.
@@ -1224,10 +1382,57 @@ function injectImageFilePaths(pi) {
1224
1382
  * @param policy - Resolved retention policy
1225
1383
  * @returns Extension factory
1226
1384
  */
1227
- function createToolResultRetentionExtension(policy) {
1385
+ function createToolResultRetentionExtension(policy, budgetPolicy) {
1228
1386
  return (pi) => {
1229
- if (!policy.enabled)
1387
+ // ── Ingestion-time guard on tool_result ──────────────────────────────
1388
+ // Truncate oversized textual payloads before persistence when context
1389
+ // budget is tight or usage is unknown. Compatibility invariants:
1390
+ // - Never change toolCallId, toolName, or isError
1391
+ // - Preserve existing details fields
1392
+ // - Add guard metadata only under namespaced key
1393
+ // - Leave non-text blocks structurally unchanged
1394
+ pi.on("tool_result", async (event, ctx) => {
1395
+ const usage = normalizeContextUsageSnapshot(ctx.getContextUsage?.());
1396
+ const usageUnknown = usage.tokens === null || usage.contextWindow <= 0;
1397
+ const usagePercent = usage.percent !== null
1398
+ ? usage.percent
1399
+ : usage.tokens === null || usage.contextWindow <= 0
1400
+ ? null
1401
+ : Math.round((usage.tokens / usage.contextWindow) * 100);
1402
+ const safeBudgetBytes = (() => {
1403
+ if (usageUnknown) {
1404
+ return unknownUsageFallbackBudget(budgetPolicy);
1405
+ }
1406
+ const remainingTokens = estimateRemainingTokens(usage, budgetPolicy.perTurnReserveTokens);
1407
+ const baseline = Math.min(budgetPolicy.maxPerToolBytes, Math.max(budgetPolicy.minPerToolBytes, tokensToBytes(remainingTokens)));
1408
+ if (usagePercent !== null && usagePercent >= budgetPolicy.hardThresholdPercent) {
1409
+ return budgetPolicy.minPerToolBytes;
1410
+ }
1411
+ if (usagePercent !== null && usagePercent >= budgetPolicy.softThresholdPercent) {
1412
+ return Math.max(budgetPolicy.minPerToolBytes, Math.floor(baseline * 0.5));
1413
+ }
1414
+ return baseline;
1415
+ })();
1416
+ const guarded = guardToolResultContent(event.content, safeBudgetBytes);
1417
+ if (!guarded.wasGuarded) {
1418
+ return;
1419
+ }
1420
+ const guardMeta = {
1421
+ guardedAt: new Date().toISOString(),
1422
+ originalContentBytes: guarded.originalTextBytes,
1423
+ truncatedToBytes: guarded.truncatedToBytes,
1424
+ reason: usageUnknown ? "unknown_usage" : "over_budget",
1425
+ };
1426
+ const existingDetails = isObjectRecord(event.details) ? event.details : {};
1427
+ return {
1428
+ content: guarded.content,
1429
+ details: { ...existingDetails, [TOOL_RESULT_BUDGET_GUARD_MARKER]: guardMeta },
1430
+ };
1431
+ });
1432
+ // ── Historical turn_end retention (unchanged) ────────────────────────
1433
+ if (!policy.enabled) {
1230
1434
  return;
1435
+ }
1231
1436
  pi.on("turn_end", async (_event, ctx) => {
1232
1437
  const messages = [];
1233
1438
  for (const entry of ctx.sessionManager.getBranch()) {
@@ -1243,6 +1448,251 @@ function createToolResultRetentionExtension(policy) {
1243
1448
  });
1244
1449
  };
1245
1450
  }
1451
+ /**
1452
+ * Apply ingestion-time guardrails to a tool-result content array.
1453
+ *
1454
+ * Only textual blocks are truncated. Non-text blocks are preserved in place
1455
+ * to avoid breaking renderer contracts.
1456
+ *
1457
+ * @param content - Tool-result content blocks
1458
+ * @param maxTextBytes - Maximum allowed bytes across all text blocks
1459
+ * @returns Guarded content payload metadata
1460
+ */
1461
+ function guardToolResultContent(content, maxTextBytes) {
1462
+ let originalTextBytes = 0;
1463
+ let totalContentBytes = 0;
1464
+ for (const block of content) {
1465
+ if (block.type === "text") {
1466
+ const text = block.text ?? "";
1467
+ const textBytes = Buffer.byteLength(text, "utf-8");
1468
+ originalTextBytes += textBytes;
1469
+ totalContentBytes += textBytes;
1470
+ continue;
1471
+ }
1472
+ totalContentBytes += Buffer.byteLength(JSON.stringify(block), "utf-8");
1473
+ }
1474
+ if (originalTextBytes > 0 && originalTextBytes <= maxTextBytes) {
1475
+ return {
1476
+ content,
1477
+ originalTextBytes,
1478
+ truncatedToBytes: originalTextBytes,
1479
+ wasGuarded: false,
1480
+ };
1481
+ }
1482
+ if (originalTextBytes === 0) {
1483
+ if (totalContentBytes <= maxTextBytes) {
1484
+ return {
1485
+ content,
1486
+ originalTextBytes,
1487
+ truncatedToBytes: 0,
1488
+ wasGuarded: false,
1489
+ };
1490
+ }
1491
+ const fallbackText = "[non-text tool output exceeds context budget; payload preserved without structural rewrite]";
1492
+ return {
1493
+ content: [{ type: "text", text: fallbackText }, ...content],
1494
+ originalTextBytes,
1495
+ truncatedToBytes: 0,
1496
+ wasGuarded: true,
1497
+ };
1498
+ }
1499
+ const nextContent = [];
1500
+ let bytesUsed = 0;
1501
+ let truncated = false;
1502
+ for (const block of content) {
1503
+ if (block.type !== "text") {
1504
+ nextContent.push(block);
1505
+ continue;
1506
+ }
1507
+ if (truncated) {
1508
+ continue;
1509
+ }
1510
+ const text = block.text ?? "";
1511
+ const blockBytes = Buffer.byteLength(text, "utf-8");
1512
+ if (bytesUsed + blockBytes <= maxTextBytes) {
1513
+ nextContent.push({ ...block, text });
1514
+ bytesUsed += blockBytes;
1515
+ continue;
1516
+ }
1517
+ const remaining = Math.max(0, maxTextBytes - bytesUsed);
1518
+ const truncatedText = remaining > 0
1519
+ ? truncateTextToBytes(text, remaining)
1520
+ : "[output truncated by context-budget guard]";
1521
+ const marker = remaining > 0
1522
+ ? `\n\n[output truncated by context-budget guard — ${formatBytesForSummary(originalTextBytes)} → ${formatBytesForSummary(maxTextBytes)}]`
1523
+ : `\n\n[output truncated by context-budget guard — ${formatBytesForSummary(originalTextBytes)} original]`;
1524
+ nextContent.push({ type: "text", text: `${truncatedText}${marker}` });
1525
+ bytesUsed = maxTextBytes;
1526
+ truncated = true;
1527
+ }
1528
+ if (!truncated) {
1529
+ return {
1530
+ content,
1531
+ originalTextBytes,
1532
+ truncatedToBytes: originalTextBytes,
1533
+ wasGuarded: false,
1534
+ };
1535
+ }
1536
+ return {
1537
+ content: nextContent,
1538
+ originalTextBytes,
1539
+ truncatedToBytes: Math.min(originalTextBytes, maxTextBytes),
1540
+ wasGuarded: true,
1541
+ };
1542
+ }
1543
+ /**
1544
+ * Truncate a string to fit within a byte budget (UTF-8 safe).
1545
+ *
1546
+ * Walks backward from an estimated character position to find a safe
1547
+ * cut point that does not split a multi-byte character.
1548
+ *
1549
+ * @param text - Source text to truncate
1550
+ * @param maxBytes - Maximum UTF-8 byte length
1551
+ * @returns Truncated string guaranteed to be at most maxBytes
1552
+ */
1553
+ function truncateTextToBytes(text, maxBytes) {
1554
+ if (Buffer.byteLength(text, "utf-8") <= maxBytes)
1555
+ return text;
1556
+ // Start from an optimistic char position (ASCII-equivalent)
1557
+ let end = Math.min(text.length, maxBytes);
1558
+ while (end > 0 && Buffer.byteLength(text.slice(0, end), "utf-8") > maxBytes) {
1559
+ end -= 1;
1560
+ }
1561
+ return text.slice(0, end);
1562
+ }
1563
+ /**
1564
+ * Create a batch planner extension that computes per-tool byte envelopes
1565
+ * from assistant tool calls and publishes them via the event bus.
1566
+ *
1567
+ * On `message_end` for assistant messages, inspects tool calls in the
1568
+ * message content and allocates a budget envelope for each one, keyed
1569
+ * by toolCallId. Envelopes are single-use (consumed via the API) and
1570
+ * automatically cleaned up on turn_end, agent_end, session_before_switch,
1571
+ * and session_switch events.
1572
+ *
1573
+ * @param budgetPolicy - Resolved context-budget policy
1574
+ * @returns Extension factory
1575
+ */
1576
+ function createContextBudgetPlannerExtension(budgetPolicy) {
1577
+ return (pi) => {
1578
+ const envelopeStore = new Map();
1579
+ let currentTurnIndex = 0;
1580
+ /** Clamp a per-tool envelope to policy min/max bounds. */
1581
+ const clampPerToolBytes = (value) => Math.min(budgetPolicy.maxPerToolBytes, Math.max(budgetPolicy.minPerToolBytes, value));
1582
+ /** Drop stale envelopes so stale calls cannot reuse old budgets. */
1583
+ const pruneStaleEnvelopes = (nowMs) => {
1584
+ for (const [toolCallId, entry] of envelopeStore) {
1585
+ const expired = nowMs - entry.metadata.createdAtMs > entry.metadata.ttlMs;
1586
+ const wrongTurn = entry.metadata.turnIndex !== currentTurnIndex;
1587
+ if (expired || wrongTurn) {
1588
+ envelopeStore.delete(toolCallId);
1589
+ }
1590
+ }
1591
+ };
1592
+ /** Clear all envelopes from the planner state. */
1593
+ const clearEnvelopes = () => {
1594
+ envelopeStore.clear();
1595
+ };
1596
+ /** Resolve per-tool budget for one tool-call batch. */
1597
+ const resolvePerToolBudget = (usage, batchSize) => {
1598
+ if (usage.tokens === null || usage.contextWindow <= 0) {
1599
+ return clampPerToolBytes(unknownUsageFallbackBudget(budgetPolicy));
1600
+ }
1601
+ const usagePercent = usage.percent !== null
1602
+ ? usage.percent
1603
+ : Math.round((usage.tokens / usage.contextWindow) * 100);
1604
+ const remainingTokens = estimateRemainingTokens(usage, budgetPolicy.perTurnReserveTokens);
1605
+ const totalBytes = tokensToBytes(remainingTokens);
1606
+ const rawPerTool = Math.floor(totalBytes / Math.max(1, batchSize));
1607
+ // Apply additional pressure once the turn is near the hard threshold.
1608
+ if (usagePercent >= budgetPolicy.hardThresholdPercent) {
1609
+ return clampPerToolBytes(Math.min(rawPerTool, budgetPolicy.minPerToolBytes));
1610
+ }
1611
+ if (usagePercent >= budgetPolicy.softThresholdPercent) {
1612
+ const cautiousPerTool = Math.floor(rawPerTool * 0.5);
1613
+ return clampPerToolBytes(cautiousPerTool);
1614
+ }
1615
+ return clampPerToolBytes(rawPerTool);
1616
+ };
1617
+ /** Publish the planner API for tool extensions. */
1618
+ const publishBudgetApi = () => {
1619
+ pi.events.emit(CONTEXT_BUDGET_API_CHANNELS.budgetApi, { api: budgetApi });
1620
+ };
1621
+ // Track turn index and evict stale envelopes at turn boundaries.
1622
+ pi.on("turn_start", async (event) => {
1623
+ currentTurnIndex = event.turnIndex;
1624
+ pruneStaleEnvelopes(Date.now());
1625
+ });
1626
+ // Compute envelopes on assistant message_end.
1627
+ pi.on("message_end", async (event, ctx) => {
1628
+ if (!event.message || event.message.role !== "assistant") {
1629
+ return;
1630
+ }
1631
+ const content = event.message.content;
1632
+ if (!Array.isArray(content)) {
1633
+ return;
1634
+ }
1635
+ const toolCalls = content.filter((block) => isObjectRecord(block) &&
1636
+ block.type === "toolCall" &&
1637
+ typeof block.id === "string" &&
1638
+ typeof block.name === "string" &&
1639
+ isObjectRecord(block.arguments));
1640
+ if (toolCalls.length === 0) {
1641
+ return;
1642
+ }
1643
+ const nowMs = Date.now();
1644
+ pruneStaleEnvelopes(nowMs);
1645
+ const usage = normalizeContextUsageSnapshot(ctx.getContextUsage?.());
1646
+ const batchSize = toolCalls.length;
1647
+ const perToolBytes = resolvePerToolBudget(usage, batchSize);
1648
+ for (const toolCall of toolCalls) {
1649
+ envelopeStore.set(toolCall.id, {
1650
+ envelope: { batchSize, maxBytes: perToolBytes },
1651
+ metadata: {
1652
+ createdAtMs: nowMs,
1653
+ ttlMs: CONTEXT_BUDGET_ENVELOPE_TTL_MS,
1654
+ turnIndex: currentTurnIndex,
1655
+ },
1656
+ });
1657
+ }
1658
+ });
1659
+ const budgetApi = {
1660
+ take(toolCallId) {
1661
+ const nowMs = Date.now();
1662
+ pruneStaleEnvelopes(nowMs);
1663
+ const entry = envelopeStore.get(toolCallId);
1664
+ if (!entry) {
1665
+ return undefined;
1666
+ }
1667
+ const expired = nowMs - entry.metadata.createdAtMs > entry.metadata.ttlMs;
1668
+ const wrongTurn = entry.metadata.turnIndex !== currentTurnIndex;
1669
+ envelopeStore.delete(toolCallId);
1670
+ if (expired || wrongTurn) {
1671
+ return undefined;
1672
+ }
1673
+ return entry.envelope;
1674
+ },
1675
+ };
1676
+ pi.on("session_start", async () => {
1677
+ publishBudgetApi();
1678
+ });
1679
+ pi.events.on(CONTEXT_BUDGET_API_CHANNELS.budgetApiRequest, () => {
1680
+ publishBudgetApi();
1681
+ });
1682
+ pi.on("turn_end", async () => {
1683
+ clearEnvelopes();
1684
+ });
1685
+ pi.on("agent_end", async () => {
1686
+ clearEnvelopes();
1687
+ });
1688
+ pi.on("session_before_switch", async () => {
1689
+ clearEnvelopes();
1690
+ });
1691
+ pi.on("session_switch", async () => {
1692
+ clearEnvelopes();
1693
+ });
1694
+ };
1695
+ }
1246
1696
  /**
1247
1697
  * Detects when a model response was truncated due to max_tokens and notifies
1248
1698
  * the user. Without this, truncated responses silently stop — the model may