@bubblebrain-ai/bubble 0.0.8 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/dist/agent/categories.d.ts +34 -0
  2. package/dist/agent/categories.js +98 -0
  3. package/dist/agent/profiles.d.ts +4 -0
  4. package/dist/agent/profiles.js +2 -3
  5. package/dist/agent/subagent-control.d.ts +5 -0
  6. package/dist/agent/subagent-control.js +4 -0
  7. package/dist/agent/subagent-lifecycle-reminder.d.ts +3 -0
  8. package/dist/agent/subagent-lifecycle-reminder.js +102 -0
  9. package/dist/agent/subagent-route-format.d.ts +8 -0
  10. package/dist/agent/subagent-route-format.js +18 -0
  11. package/dist/agent/subtask-policy.d.ts +0 -1
  12. package/dist/agent/subtask-policy.js +0 -4
  13. package/dist/agent.d.ts +12 -0
  14. package/dist/agent.js +152 -13
  15. package/dist/config.d.ts +23 -3
  16. package/dist/config.js +59 -6
  17. package/dist/context/budget.d.ts +3 -3
  18. package/dist/context/budget.js +29 -15
  19. package/dist/context/compact.d.ts +23 -0
  20. package/dist/context/compact.js +129 -0
  21. package/dist/context/llm-compactor.d.ts +19 -0
  22. package/dist/context/llm-compactor.js +200 -0
  23. package/dist/context/projector.js +28 -12
  24. package/dist/context/token-estimator.d.ts +14 -0
  25. package/dist/context/token-estimator.js +106 -0
  26. package/dist/context/tool-output-truncate.d.ts +8 -0
  27. package/dist/context/tool-output-truncate.js +59 -0
  28. package/dist/context/usage.js +9 -9
  29. package/dist/main.js +43 -6
  30. package/dist/model-catalog.d.ts +9 -0
  31. package/dist/model-catalog.js +16 -0
  32. package/dist/orchestrator/default-hooks.js +18 -0
  33. package/dist/provider-openai-codex.d.ts +13 -2
  34. package/dist/provider-openai-codex.js +81 -32
  35. package/dist/provider-registry.js +20 -4
  36. package/dist/slash-commands/commands.js +24 -0
  37. package/dist/slash-commands/types.d.ts +7 -0
  38. package/dist/tools/agent-lifecycle.js +22 -4
  39. package/dist/tools/edit.js +2 -2
  40. package/dist/tools/glob.js +2 -1
  41. package/dist/tools/grep.js +2 -2
  42. package/dist/tools/lsp.js +2 -2
  43. package/dist/tools/path-utils.d.ts +2 -0
  44. package/dist/tools/path-utils.js +16 -0
  45. package/dist/tools/read.js +117 -5
  46. package/dist/tools/write.js +3 -2
  47. package/dist/tui-ink/app.d.ts +11 -2
  48. package/dist/tui-ink/app.js +191 -78
  49. package/dist/tui-ink/approval/approval-dialog.js +4 -1
  50. package/dist/tui-ink/approval/diff-view.js +2 -1
  51. package/dist/tui-ink/approval/select.js +2 -1
  52. package/dist/tui-ink/code-highlight.d.ts +2 -0
  53. package/dist/tui-ink/code-highlight.js +30 -2
  54. package/dist/tui-ink/detect-theme.d.ts +19 -0
  55. package/dist/tui-ink/detect-theme.js +123 -0
  56. package/dist/tui-ink/footer.js +4 -3
  57. package/dist/tui-ink/input-box.js +83 -26
  58. package/dist/tui-ink/input-history.d.ts +16 -0
  59. package/dist/tui-ink/input-history.js +81 -0
  60. package/dist/tui-ink/markdown.js +30 -20
  61. package/dist/tui-ink/message-list.js +112 -16
  62. package/dist/tui-ink/model-picker.js +6 -1
  63. package/dist/tui-ink/plan-confirm.js +2 -1
  64. package/dist/tui-ink/question-dialog.js +2 -1
  65. package/dist/tui-ink/run.d.ts +5 -1
  66. package/dist/tui-ink/run.js +30 -2
  67. package/dist/tui-ink/theme.d.ts +64 -35
  68. package/dist/tui-ink/theme.js +81 -8
  69. package/dist/tui-ink/todos.js +5 -3
  70. package/dist/tui-ink/trace-groups.d.ts +3 -1
  71. package/dist/tui-ink/trace-groups.js +93 -14
  72. package/dist/tui-ink/welcome.js +23 -4
  73. package/dist/types.d.ts +6 -0
  74. package/package.json +2 -1
@@ -76,6 +76,135 @@ export function compactMessages(messages, options = {}) {
76
76
  droppedEntries: oldMessages.length,
77
77
  };
78
78
  }
79
+ export function compactCurrentTurnToolGroups(messages, options = {}) {
80
+ const keepRecentGroups = options.keepRecentGroups ?? 2;
81
+ const maxSummaryItems = options.maxSummaryItems ?? 8;
82
+ const preserved = messages.filter((m) => m.role === "system" || m.role === "meta");
83
+ const body = messages.filter((m) => m.role !== "system" && m.role !== "meta");
84
+ let lastUserIndex = -1;
85
+ for (let i = body.length - 1; i >= 0; i--) {
86
+ if (body[i].role === "user") {
87
+ lastUserIndex = i;
88
+ break;
89
+ }
90
+ }
91
+ if (lastUserIndex < 0)
92
+ return { compacted: false };
93
+ const preTurn = body.slice(0, lastUserIndex + 1);
94
+ const turnBody = body.slice(lastUserIndex + 1);
95
+ const groups = [];
96
+ let current = null;
97
+ for (const msg of turnBody) {
98
+ if (msg.role === "assistant") {
99
+ if (current)
100
+ groups.push(current);
101
+ current = { assistant: msg, toolResults: [] };
102
+ }
103
+ else if (msg.role === "tool" && current) {
104
+ current.toolResults.push(msg);
105
+ }
106
+ }
107
+ if (current)
108
+ groups.push(current);
109
+ if (groups.length <= keepRecentGroups)
110
+ return { compacted: false };
111
+ // Only drop groups that have tool_calls — text-only assistant messages don't
112
+ // free much context, and dropping them confuses the conversation flow.
113
+ const evictable = groups.slice(0, groups.length - keepRecentGroups)
114
+ .filter((g) => g.assistant.role === "assistant" && (g.assistant.toolCalls?.length ?? 0) > 0);
115
+ if (evictable.length === 0)
116
+ return { compacted: false };
117
+ const summary = buildToolGroupsSummary(evictable, maxSummaryItems);
118
+ if (!summary)
119
+ return { compacted: false };
120
+ const survivingGroups = groups.filter((g) => !evictable.includes(g));
121
+ const flatSurvivors = [];
122
+ for (const g of survivingGroups) {
123
+ flatSurvivors.push(cloneMessage(g.assistant));
124
+ for (const t of g.toolResults)
125
+ flatSurvivors.push(cloneMessage(t));
126
+ }
127
+ const compactedMessages = [
128
+ ...preserved.map(cloneMessage),
129
+ ...preTurn.map(cloneMessage),
130
+ {
131
+ role: "system",
132
+ content: `Earlier in this turn (compacted to free context):\n${summary}`,
133
+ },
134
+ ...flatSurvivors,
135
+ ];
136
+ return {
137
+ compacted: true,
138
+ summary,
139
+ messages: compactedMessages,
140
+ droppedEntries: evictable.length,
141
+ };
142
+ }
143
+ function buildToolGroupsSummary(groups, maxItems) {
144
+ const toolCounts = new Map();
145
+ const fileSet = new Set();
146
+ let totalResultChars = 0;
147
+ const findings = [];
148
+ for (const group of groups) {
149
+ if (group.assistant.role !== "assistant" || !group.assistant.toolCalls)
150
+ continue;
151
+ const toolNameByCallId = new Map();
152
+ for (const tc of group.assistant.toolCalls) {
153
+ toolCounts.set(tc.name, (toolCounts.get(tc.name) ?? 0) + 1);
154
+ toolNameByCallId.set(tc.id, tc.name);
155
+ try {
156
+ const parsed = JSON.parse(tc.arguments || "{}");
157
+ for (const key of ["file_path", "path", "paths", "file"]) {
158
+ const v = parsed[key];
159
+ if (typeof v === "string" && v)
160
+ fileSet.add(v);
161
+ else if (Array.isArray(v)) {
162
+ for (const item of v)
163
+ if (typeof item === "string" && item)
164
+ fileSet.add(item);
165
+ }
166
+ }
167
+ }
168
+ catch {
169
+ // ignore unparseable args
170
+ }
171
+ }
172
+ for (const r of group.toolResults) {
173
+ if (r.role !== "tool")
174
+ continue;
175
+ const content = typeof r.content === "string" ? r.content : "";
176
+ totalResultChars += content.length;
177
+ if (findings.length < maxItems) {
178
+ const toolName = toolNameByCallId.get(r.toolCallId) ?? "tool";
179
+ findings.push(`${toolName}: ${summarizeText(content)}`);
180
+ }
181
+ }
182
+ }
183
+ const lines = [];
184
+ const toolList = [...toolCounts.entries()]
185
+ .map(([name, n]) => (n > 1 ? `${name}×${n}` : name))
186
+ .join(", ");
187
+ lines.push(`Tools used: ${toolList || "none"}`);
188
+ if (fileSet.size > 0) {
189
+ const fileList = [...fileSet].slice(0, 12);
190
+ lines.push(`Files touched: ${fileList.join(", ")}${fileSet.size > 12 ? ` (+${fileSet.size - 12} more)` : ""}`);
191
+ }
192
+ lines.push(`Discarded ~${formatChars(totalResultChars)} of earlier tool output. Re-run the relevant tool if you need specifics.`);
193
+ if (findings.length > 0) {
194
+ lines.push("");
195
+ lines.push("Earlier findings:");
196
+ for (const f of findings)
197
+ lines.push(`- ${f}`);
198
+ }
199
+ return lines.join("\n");
200
+ }
201
+ function formatChars(count) {
202
+ if (count < 1000)
203
+ return `${count} chars`;
204
+ if (count < 1_000_000)
205
+ return `${(count / 1000).toFixed(1)}K chars`;
206
+ return `${(count / 1_000_000).toFixed(2)}M chars`;
207
+ }
79
208
  function buildCompactionSummary(entries, maxSummaryItems) {
80
209
  const messages = entriesToMessages(entries);
81
210
  return buildMessageSummary(messages, maxSummaryItems);
@@ -0,0 +1,19 @@
1
+ import type { Message, Provider } from "../types.js";
2
+ export declare const LLM_COMPACTION_PROMPT = "You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.\n\nInclude:\n- Current progress and key decisions made\n- Important context, constraints, or user preferences\n- What remains to be done (clear next steps)\n- Any critical data, examples, or references needed to continue\n\nBe concise, structured, and focused on helping the next LLM seamlessly continue the work.";
3
+ export declare const LLM_SUMMARY_PREFIX = "Another language model previously worked on this task and produced this handoff summary. Build on what's already done; avoid re-running the same investigation. Summary:";
4
+ export interface LLMCompactOptions {
5
+ provider: Provider;
6
+ modelId: string;
7
+ /** Compactor model call must complete within this token-cost ceiling. */
8
+ maxInputTokens?: number;
9
+ /** Number of trailing (assistant + tool-results) groups in the current turn to keep verbatim. */
10
+ keepRecentGroups?: number;
11
+ abortSignal?: AbortSignal;
12
+ }
13
+ export interface LLMCompactResult {
14
+ compacted: boolean;
15
+ summary?: string;
16
+ messages?: Message[];
17
+ reason?: string;
18
+ }
19
+ export declare function compactWithLLM(messages: Message[], options: LLMCompactOptions): Promise<LLMCompactResult>;
@@ -0,0 +1,200 @@
1
+ // LLM-driven context compaction.
2
+ //
3
+ // When the budget says we're approaching the context window, ask the model to
4
+ // produce a handoff summary of the conversation so far. Replace the bulky middle
5
+ // of history with that summary while keeping the initial system context and the
6
+ // user's latest ask intact. Architecturally this mirrors Codex CLI's approach
7
+ // (codex-rs/core/src/compact.rs + templates/compact/prompt.md): trust the model
8
+ // to pick what matters instead of writing a template.
9
+ //
10
+ // Failure modes are explicit: returns { compacted: false, reason } so the
11
+ // caller can fall back to algorithmic compaction without an exception.
12
+ import { estimateContextTokens } from "./budget.js";
13
+ export const LLM_COMPACTION_PROMPT = `You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.
14
+
15
+ Include:
16
+ - Current progress and key decisions made
17
+ - Important context, constraints, or user preferences
18
+ - What remains to be done (clear next steps)
19
+ - Any critical data, examples, or references needed to continue
20
+
21
+ Be concise, structured, and focused on helping the next LLM seamlessly continue the work.`;
22
+ export const LLM_SUMMARY_PREFIX = `Another language model previously worked on this task and produced this handoff summary. Build on what's already done; avoid re-running the same investigation. Summary:`;
23
+ export async function compactWithLLM(messages, options) {
24
+ const { provider, modelId, abortSignal } = options;
25
+ const maxInputTokens = options.maxInputTokens ?? 100_000;
26
+ const keepRecentGroups = options.keepRecentGroups ?? 2;
27
+ const preserved = messages.filter((m) => m.role === "system" || m.role === "meta");
28
+ const body = messages.filter((m) => m.role !== "system" && m.role !== "meta");
29
+ let lastUserIndex = -1;
30
+ for (let i = body.length - 1; i >= 0; i--) {
31
+ if (body[i].role === "user") {
32
+ lastUserIndex = i;
33
+ break;
34
+ }
35
+ }
36
+ if (lastUserIndex < 0) {
37
+ return { compacted: false, reason: "no user message in history" };
38
+ }
39
+ // Pivot the body around the last user message:
40
+ // priorTurns: everything from earlier user turns (multi-turn case)
41
+ // lastUser: the user's current ask (always kept verbatim)
42
+ // currentTurn: the assistant + tool groups produced in response so far
43
+ const priorTurns = body.slice(0, lastUserIndex);
44
+ const lastUser = body[lastUserIndex];
45
+ const currentTurn = body.slice(lastUserIndex + 1);
46
+ const groups = [];
47
+ let active = null;
48
+ for (const msg of currentTurn) {
49
+ if (msg.role === "assistant") {
50
+ if (active)
51
+ groups.push(active);
52
+ active = { assistant: msg, toolResults: [] };
53
+ }
54
+ else if (msg.role === "tool" && active) {
55
+ active.toolResults.push(msg);
56
+ }
57
+ }
58
+ if (active)
59
+ groups.push(active);
60
+ const keptGroupCount = Math.min(keepRecentGroups, groups.length);
61
+ const evictedGroups = groups.slice(0, groups.length - keptGroupCount);
62
+ const keptGroups = groups.slice(groups.length - keptGroupCount);
63
+ // What we'll send to the model to summarize: prior turns + the older groups
64
+ // in the current turn (everything we're about to evict).
65
+ const toSummarize = [
66
+ ...priorTurns,
67
+ ...evictedGroups.flatMap((g) => [g.assistant, ...g.toolResults]),
68
+ ];
69
+ if (toSummarize.length === 0) {
70
+ return { compacted: false, reason: "nothing to evict" };
71
+ }
72
+ const trimmedHistory = trimToFitTokenBudget(toSummarize, maxInputTokens);
73
+ const historyText = serializeHistoryAsText(trimmedHistory);
74
+ const summaryInput = [
75
+ { role: "system", content: LLM_COMPACTION_PROMPT },
76
+ { role: "user", content: historyText },
77
+ ];
78
+ let summaryText;
79
+ try {
80
+ summaryText = await provider.complete(summaryInput, {
81
+ model: modelId,
82
+ temperature: 0.2,
83
+ abortSignal,
84
+ });
85
+ }
86
+ catch (err) {
87
+ return { compacted: false, reason: `compactor call failed: ${err.message}` };
88
+ }
89
+ if (!summaryText || summaryText.trim().length === 0) {
90
+ return { compacted: false, reason: "compactor returned empty summary" };
91
+ }
92
+ // New history shape (prefix-cache-friendly: preserved system+meta stay at the
93
+ // absolute prefix unchanged; summary is injected after as a user-role envelope
94
+ // so it can't pollute the cacheable system-prompt prefix):
95
+ //
96
+ // [...preserved system+meta] ← stable prefix
97
+ // user: "<SUMMARY_PREFIX>\n<summary>" ← evicted history compressed
98
+ // user: <original last user message> ← the current ask
99
+ // [...kept current-turn (assistant + tool) groups] ← recent tool work
100
+ const flatKept = [];
101
+ for (const g of keptGroups) {
102
+ flatKept.push(cloneMessage(g.assistant));
103
+ for (const t of g.toolResults)
104
+ flatKept.push(cloneMessage(t));
105
+ }
106
+ const compacted = [
107
+ ...preserved.map(cloneMessage),
108
+ {
109
+ role: "user",
110
+ content: `${LLM_SUMMARY_PREFIX}\n${summaryText.trim()}`,
111
+ },
112
+ cloneMessage(lastUser),
113
+ ...flatKept,
114
+ ];
115
+ return {
116
+ compacted: true,
117
+ summary: summaryText,
118
+ messages: compacted,
119
+ };
120
+ }
121
+ function trimToFitTokenBudget(messages, maxTokens) {
122
+ // Drop from the front (oldest first) until estimate fits. Front-trim matches
123
+ // Codex's pattern and preserves the most recent context the user cares about.
124
+ let working = [...messages];
125
+ while (working.length > 0 && estimateContextTokens(working) > maxTokens) {
126
+ working = working.slice(1);
127
+ }
128
+ return working;
129
+ }
130
+ function serializeHistoryAsText(messages) {
131
+ const lines = [];
132
+ const toolNameByCallId = new Map();
133
+ for (const msg of messages) {
134
+ switch (msg.role) {
135
+ case "user": {
136
+ const text = typeof msg.content === "string"
137
+ ? msg.content
138
+ : msg.content.filter((p) => p.type === "text").map((p) => p.text).join(" ");
139
+ lines.push(`USER: ${text}`);
140
+ break;
141
+ }
142
+ case "assistant": {
143
+ if (msg.content.trim()) {
144
+ lines.push(`ASSISTANT: ${msg.content}`);
145
+ }
146
+ if (msg.toolCalls && msg.toolCalls.length > 0) {
147
+ for (const tc of msg.toolCalls) {
148
+ toolNameByCallId.set(tc.id, tc.name);
149
+ lines.push(`TOOL_CALL[${tc.name}]: ${summarizeToolCallArgs(tc)}`);
150
+ }
151
+ }
152
+ break;
153
+ }
154
+ case "tool": {
155
+ const name = toolNameByCallId.get(msg.toolCallId) ?? "tool";
156
+ lines.push(`TOOL_RESULT[${name}]: ${truncateInline(msg.content, 1500)}`);
157
+ break;
158
+ }
159
+ default:
160
+ break;
161
+ }
162
+ }
163
+ return lines.join("\n\n");
164
+ }
165
+ function summarizeToolCallArgs(tc) {
166
+ try {
167
+ const parsed = JSON.parse(tc.arguments || "{}");
168
+ const pairs = Object.entries(parsed)
169
+ .filter(([, v]) => typeof v === "string" || typeof v === "number" || typeof v === "boolean")
170
+ .map(([k, v]) => `${k}=${JSON.stringify(v).slice(0, 200)}`);
171
+ return pairs.join(" ") || "(no args)";
172
+ }
173
+ catch {
174
+ return truncateInline(tc.arguments || "", 200);
175
+ }
176
+ }
177
+ function truncateInline(text, max) {
178
+ const normalized = text.replace(/\s+/g, " ").trim();
179
+ if (normalized.length <= max)
180
+ return normalized;
181
+ return `${normalized.slice(0, max - 3)}...`;
182
+ }
183
+ function cloneMessage(message) {
184
+ if (message.role === "assistant") {
185
+ return {
186
+ ...message,
187
+ toolCalls: message.toolCalls?.map((toolCall) => ({ ...toolCall })),
188
+ };
189
+ }
190
+ if (message.role === "user" && Array.isArray(message.content)) {
191
+ return {
192
+ ...message,
193
+ content: message.content.map((part) => ({
194
+ ...part,
195
+ ...(part.type === "image_url" ? { image_url: { ...part.image_url } } : {}),
196
+ })),
197
+ };
198
+ }
199
+ return { ...message };
200
+ }
@@ -1,6 +1,14 @@
1
1
  import { getContextBudget } from "./budget.js";
2
- import { compactMessages } from "./compact.js";
2
+ import { compactCurrentTurnToolGroups, compactMessages } from "./compact.js";
3
3
  import { pruneMessages } from "./prune.js";
4
+ // Prefix-cache invariant: every projected output starts with the concatenation
5
+ // of (in order) system + meta messages from the input, followed by the
6
+ // conversational body. Compactors (compactMessages, compactCurrentTurnToolGroups,
7
+ // compactMessagesWithLLM, compactWithLLM) MUST preserve every existing
8
+ // system/meta message in its original position so the cacheable prefix
9
+ // stays byte-identical across turns where compaction didn't fire. Inserting
10
+ // new dynamic content (summaries, etc.) AFTER system+meta is safe; inserting
11
+ // it within or before them is not.
4
12
  export function projectMessages(messages, options = {}) {
5
13
  const mode = options.mode ?? "full";
6
14
  const projectedBody = [];
@@ -48,18 +56,26 @@ export function projectMessages(messages, options = {}) {
48
56
  if (!budget.shouldCompact) {
49
57
  return pruned;
50
58
  }
51
- const compacted = compactMessages(pruned, { keepRecentTurns: 2 });
52
- if (!compacted.compacted || !compacted.messages) {
53
- return pruned;
54
- }
55
- const compactedMessages = compacted.messages;
56
- const afterFirstPass = getContextBudget(options.providerId, options.modelId, compactedMessages);
57
- if (!afterFirstPass.shouldCompact) {
58
- return repairToolCallChains(compactedMessages);
59
+ // Escalating compaction: turn-level passes first, then sub-turn (single-turn
60
+ // bloat from many tool calls) as a finer-grained fallback. Each step only
61
+ // advances `working` if compaction actually fired, and re-checks the budget
62
+ // before deciding to escalate further.
63
+ let working = pruned;
64
+ const passes = [
65
+ () => compactMessages(working, { keepRecentTurns: 2 }).messages,
66
+ () => compactMessages(working, { keepRecentTurns: 1 }).messages,
67
+ () => compactCurrentTurnToolGroups(working, { keepRecentGroups: 2 }).messages,
68
+ () => compactCurrentTurnToolGroups(working, { keepRecentGroups: 1 }).messages,
69
+ ];
70
+ for (const pass of passes) {
71
+ const next = pass();
72
+ if (next)
73
+ working = next;
74
+ const after = getContextBudget(options.providerId, options.modelId, working);
75
+ if (!after.shouldCompact)
76
+ break;
59
77
  }
60
- const tighter = compactMessages(pruned, { keepRecentTurns: 1 });
61
- const finalMessages = (tighter.compacted && tighter.messages ? tighter.messages : compactedMessages);
62
- return repairToolCallChains(finalMessages);
78
+ return repairToolCallChains(working);
63
79
  }
64
80
  return repaired;
65
81
  }
@@ -0,0 +1,14 @@
1
+ export interface TokenEstimator {
2
+ estimate(text: string): number;
3
+ }
4
+ export declare class HeuristicEstimator implements TokenEstimator {
5
+ estimate(text: string): number;
6
+ }
7
+ export declare class TiktokenEstimator implements TokenEstimator {
8
+ private encoder;
9
+ private initFailed;
10
+ private readonly fallback;
11
+ estimate(text: string): number;
12
+ private getEncoder;
13
+ }
14
+ export declare function getTokenEstimator(providerId?: string): TokenEstimator;
@@ -0,0 +1,106 @@
1
+ // Token estimation strategy layer.
2
+ //
3
+ // Different providers use different tokenizers; a single "chars/4" rule under-counts
4
+ // dense content (HTML, CJK) and lets auto-compact fire too late. This module routes
5
+ // per-provider: OpenAI / OpenAI-Codex uses js-tiktoken with the o200k_base BPE; every
6
+ // other provider uses a CJK-aware heuristic. Drop in a new strategy per provider as
7
+ // their tokenizers become important without touching call sites.
8
+ // Tiktoken's pre-tokenization regex is catastrophic on inputs with long single-char
9
+ // runs ("x".repeat(4000) → 1.4s; bigger → minutes/hang). Two guards: a hard length
10
+ // cap, and a cheap scan for any run ≥ MAX_RUN_LEN of the same code unit. Both catch
11
+ // production hazards (binary blobs, base64 dumps, leaked buffers) and synthetic test
12
+ // fixtures alike. Normal prose / code / markdown stays well under both.
13
+ const TIKTOKEN_MAX_CHARS = 80_000;
14
+ const MAX_RUN_LEN = 64;
15
+ function hasPathologicalRun(text) {
16
+ if (text.length < MAX_RUN_LEN)
17
+ return false;
18
+ let last = text.charCodeAt(0);
19
+ let run = 1;
20
+ for (let i = 1; i < text.length; i++) {
21
+ const code = text.charCodeAt(i);
22
+ if (code === last) {
23
+ run++;
24
+ if (run >= MAX_RUN_LEN)
25
+ return true;
26
+ }
27
+ else {
28
+ last = code;
29
+ run = 1;
30
+ }
31
+ }
32
+ return false;
33
+ }
34
+ // Cheap codepoint check: CJK ideographs + Hiragana/Katakana + Hangul.
35
+ // Each such char is roughly 1 token (vs ~0.25 token for ASCII), so weighting them
36
+ // 1.0 cuts the heuristic's CJK undercount by ~4x without needing a real tokenizer.
37
+ function isCjkCodePoint(code) {
38
+ return ((code >= 0x3000 && code <= 0x9fff) || // CJK symbols + unified ideographs (incl. Hiragana/Katakana)
39
+ (code >= 0xac00 && code <= 0xd7af) || // Hangul syllables
40
+ (code >= 0xf900 && code <= 0xfaff) || // CJK compatibility ideographs
41
+ (code >= 0x20000 && code <= 0x2ffff) // CJK extensions B–F (surrogate pairs)
42
+ );
43
+ }
44
+ export class HeuristicEstimator {
45
+ estimate(text) {
46
+ if (!text)
47
+ return 0;
48
+ let cjk = 0;
49
+ let other = 0;
50
+ for (let i = 0; i < text.length; i++) {
51
+ const code = text.codePointAt(i);
52
+ if (code > 0xffff)
53
+ i++; // skip surrogate low half
54
+ if (isCjkCodePoint(code))
55
+ cjk++;
56
+ else
57
+ other++;
58
+ }
59
+ return Math.ceil(cjk + other / 4);
60
+ }
61
+ }
62
+ export class TiktokenEstimator {
63
+ encoder = null;
64
+ initFailed = false;
65
+ fallback = new HeuristicEstimator();
66
+ estimate(text) {
67
+ if (!text)
68
+ return 0;
69
+ if (text.length > TIKTOKEN_MAX_CHARS)
70
+ return this.fallback.estimate(text);
71
+ if (hasPathologicalRun(text))
72
+ return this.fallback.estimate(text);
73
+ const enc = this.getEncoder();
74
+ if (!enc)
75
+ return this.fallback.estimate(text);
76
+ try {
77
+ return enc.encode(text).length;
78
+ }
79
+ catch {
80
+ return this.fallback.estimate(text);
81
+ }
82
+ }
83
+ getEncoder() {
84
+ if (this.encoder)
85
+ return this.encoder;
86
+ if (this.initFailed)
87
+ return null;
88
+ try {
89
+ // Lazy require: defers ~1MB of BPE table load until OpenAI is actually used.
90
+ const tiktoken = require("js-tiktoken");
91
+ this.encoder = tiktoken.getEncoding("o200k_base");
92
+ return this.encoder;
93
+ }
94
+ catch {
95
+ this.initFailed = true;
96
+ return null;
97
+ }
98
+ }
99
+ }
100
+ const HEURISTIC = new HeuristicEstimator();
101
+ const TIKTOKEN = new TiktokenEstimator();
102
+ export function getTokenEstimator(providerId) {
103
+ if (providerId === "openai" || providerId === "openai-codex")
104
+ return TIKTOKEN;
105
+ return HEURISTIC;
106
+ }
@@ -0,0 +1,8 @@
1
+ export interface ToolTruncationResult {
2
+ content: string;
3
+ truncated: boolean;
4
+ originalTokens: number;
5
+ finalTokens: number;
6
+ limit: number | undefined;
7
+ }
8
+ export declare function truncateToolOutputForModel(content: string, providerId: string, modelId: string): ToolTruncationResult;
@@ -0,0 +1,59 @@
1
+ // Tool-output truncation honoring the model's server-declared limit.
2
+ //
3
+ // Codex backend's /models endpoint reports per-model `truncation_policy.limit`
4
+ // (e.g. 10000 tokens for gpt-5.5). The expectation is that the CLIENT truncates
5
+ // each tool result to that budget before adding it to history; sending raw
6
+ // 50-100k tool dumps will blow through the input window after a handful of
7
+ // calls. Codex CLI does this via TruncationPolicy::Tokens; mirror it here.
8
+ //
9
+ // Strategy: middle-truncate (preserve head + tail with an explicit marker in
10
+ // between). Heads usually carry structure/headers; tails often carry totals,
11
+ // errors, or conclusions — losing either is worse than losing the middle.
12
+ import { getToolOutputTokenLimit } from "../model-catalog.js";
13
+ import { estimateTextTokens } from "./budget.js";
14
+ export function truncateToolOutputForModel(content, providerId, modelId) {
15
+ const limit = getToolOutputTokenLimit(providerId, modelId);
16
+ const originalTokens = estimateTextTokens(content, providerId);
17
+ if (!limit || originalTokens <= limit) {
18
+ return { content, truncated: false, originalTokens, finalTokens: originalTokens, limit };
19
+ }
20
+ const truncated = middleTruncateToTokenBudget(content, limit, providerId);
21
+ const finalTokens = estimateTextTokens(truncated, providerId);
22
+ return { content: truncated, truncated: true, originalTokens, finalTokens, limit };
23
+ }
24
+ function middleTruncateToTokenBudget(content, tokenBudget, providerId) {
25
+ // Convert token budget to a char budget via the estimator's effective ratio.
26
+ // The estimator may under/overcount, so we iterate one round if needed.
27
+ const tokensAll = estimateTextTokens(content, providerId);
28
+ if (tokensAll <= tokenBudget)
29
+ return content;
30
+ const charsPerToken = content.length / Math.max(1, tokensAll);
31
+ let charBudget = Math.floor(tokenBudget * charsPerToken);
32
+ const marker = (dropped) => `\n\n[... middle ${formatChars(dropped)} truncated by model policy (${tokenBudget}-token cap) ...]\n\n`;
33
+ // Reserve some room for the marker itself.
34
+ const reserveForMarker = 200;
35
+ charBudget = Math.max(200, charBudget - reserveForMarker);
36
+ const half = Math.floor(charBudget / 2);
37
+ const head = content.slice(0, half);
38
+ const tail = content.slice(content.length - (charBudget - half));
39
+ const droppedChars = content.length - head.length - tail.length;
40
+ let truncated = `${head}${marker(droppedChars)}${tail}`;
41
+ // Tighten if our estimate of charsPerToken undercounts and we're still over.
42
+ let safety = 3;
43
+ while (estimateTextTokens(truncated, providerId) > tokenBudget && safety-- > 0) {
44
+ const newHalf = Math.floor(head.length * 0.8);
45
+ const newTailLen = Math.floor(tail.length * 0.8);
46
+ const newHead = content.slice(0, newHalf);
47
+ const newTail = content.slice(content.length - newTailLen);
48
+ const newDropped = content.length - newHead.length - newTail.length;
49
+ truncated = `${newHead}${marker(newDropped)}${newTail}`;
50
+ }
51
+ return truncated;
52
+ }
53
+ function formatChars(count) {
54
+ if (count < 1000)
55
+ return `${count} chars`;
56
+ if (count < 1_000_000)
57
+ return `${(count / 1000).toFixed(1)}K chars`;
58
+ return `${(count / 1_000_000).toFixed(2)}M chars`;
59
+ }
@@ -9,11 +9,11 @@ export function buildContextUsageSnapshot(input) {
9
9
  const systemContent = systemMessages.map((message) => message.content).join("\n\n");
10
10
  const skillsPrompt = formatSkillsPrompt(input.skills);
11
11
  const skillsInSystemPrompt = !!skillsPrompt && systemContent.includes(skillsPrompt);
12
- const skillsTokens = skillsInSystemPrompt ? estimateTextTokens(skillsPrompt) : 0;
13
- const systemPromptTokens = Math.max(0, estimateTextTokens(systemContent) - skillsTokens);
14
- const toolsTokens = estimateToolEntriesTokens(input.toolEntries);
15
- const deferredToolsTokens = estimateDeferredToolsReminderTokens(deferredToolEntries);
16
- const rawOtherTokens = otherMessages.reduce((sum, message) => sum + estimateMessageTokens(message), 0);
12
+ const skillsTokens = skillsInSystemPrompt ? estimateTextTokens(skillsPrompt, input.providerId) : 0;
13
+ const systemPromptTokens = Math.max(0, estimateTextTokens(systemContent, input.providerId) - skillsTokens);
14
+ const toolsTokens = estimateToolEntriesTokens(input.toolEntries, input.providerId);
15
+ const deferredToolsTokens = estimateDeferredToolsReminderTokens(deferredToolEntries, input.providerId);
16
+ const rawOtherTokens = otherMessages.reduce((sum, message) => sum + estimateMessageTokens(message, input.providerId), 0);
17
17
  const otherTokens = Math.max(0, rawOtherTokens - deferredToolsTokens);
18
18
  const usedTokens = systemPromptTokens + toolsTokens + skillsTokens + deferredToolsTokens + otherTokens;
19
19
  const contextWindow = getModelContextWindow(input.providerId, input.modelId);
@@ -97,20 +97,20 @@ export function formatContextUsage(snapshot) {
97
97
  ];
98
98
  return lines.join("\n");
99
99
  }
100
- function estimateToolEntriesTokens(entries) {
100
+ function estimateToolEntriesTokens(entries, providerId) {
101
101
  return entries.reduce((sum, entry) => {
102
102
  const payload = JSON.stringify({
103
103
  name: entry.name,
104
104
  description: entry.description,
105
105
  parameters: entry.parameters,
106
106
  });
107
- return sum + estimateTextTokens(payload) + 8;
107
+ return sum + estimateTextTokens(payload, providerId) + 8;
108
108
  }, 0);
109
109
  }
110
- function estimateDeferredToolsReminderTokens(entries) {
110
+ function estimateDeferredToolsReminderTokens(entries, providerId) {
111
111
  if (entries.length === 0)
112
112
  return 0;
113
- return estimateTextTokens(buildDeferredToolsReminder(entries.map((entry) => entry.name)));
113
+ return estimateTextTokens(buildDeferredToolsReminder(entries.map((entry) => entry.name)), providerId);
114
114
  }
115
115
  function buildSegmentedBar(rows, totalTokens) {
116
116
  const width = 54;