@bubblebrain-ai/bubble 0.0.7 → 0.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/categories.d.ts +34 -0
- package/dist/agent/categories.js +98 -0
- package/dist/agent/profiles.d.ts +4 -0
- package/dist/agent/profiles.js +2 -3
- package/dist/agent/subagent-control.d.ts +5 -0
- package/dist/agent/subagent-control.js +4 -0
- package/dist/agent/subagent-lifecycle-reminder.d.ts +3 -0
- package/dist/agent/subagent-lifecycle-reminder.js +102 -0
- package/dist/agent/subagent-route-format.d.ts +8 -0
- package/dist/agent/subagent-route-format.js +18 -0
- package/dist/agent/subtask-policy.d.ts +0 -1
- package/dist/agent/subtask-policy.js +0 -4
- package/dist/agent.d.ts +18 -0
- package/dist/agent.js +188 -16
- package/dist/config.d.ts +23 -3
- package/dist/config.js +59 -6
- package/dist/context/budget.d.ts +3 -2
- package/dist/context/budget.js +29 -15
- package/dist/context/compact.d.ts +23 -0
- package/dist/context/compact.js +129 -0
- package/dist/context/llm-compactor.d.ts +19 -0
- package/dist/context/llm-compactor.js +200 -0
- package/dist/context/projector.js +28 -12
- package/dist/context/token-estimator.d.ts +14 -0
- package/dist/context/token-estimator.js +106 -0
- package/dist/context/tool-output-truncate.d.ts +8 -0
- package/dist/context/tool-output-truncate.js +59 -0
- package/dist/context/usage.d.ts +34 -0
- package/dist/context/usage.js +213 -0
- package/dist/diff-stats.d.ts +5 -0
- package/dist/diff-stats.js +21 -0
- package/dist/main.js +68 -7
- package/dist/mcp/transports.d.ts +1 -0
- package/dist/mcp/transports.js +8 -0
- package/dist/model-catalog.d.ts +9 -0
- package/dist/model-catalog.js +17 -1
- package/dist/orchestrator/default-hooks.js +24 -18
- package/dist/prompt/compose.js +2 -1
- package/dist/prompt/provider-prompts/kimi.js +3 -1
- package/dist/provider-openai-codex.d.ts +13 -2
- package/dist/provider-openai-codex.js +81 -32
- package/dist/provider-registry.js +22 -6
- package/dist/provider-transform.d.ts +3 -1
- package/dist/provider-transform.js +15 -0
- package/dist/provider.d.ts +4 -1
- package/dist/provider.js +89 -4
- package/dist/reasoning-debug.d.ts +7 -0
- package/dist/reasoning-debug.js +30 -0
- package/dist/session-log.js +13 -2
- package/dist/session-types.d.ts +1 -1
- package/dist/slash-commands/commands.js +60 -2
- package/dist/slash-commands/types.d.ts +7 -0
- package/dist/tools/agent-lifecycle.js +22 -4
- package/dist/tools/edit.js +7 -2
- package/dist/tools/file-state.d.ts +19 -0
- package/dist/tools/file-state.js +15 -0
- package/dist/tools/glob.js +2 -1
- package/dist/tools/grep.js +2 -2
- package/dist/tools/lsp.js +2 -2
- package/dist/tools/path-utils.d.ts +2 -0
- package/dist/tools/path-utils.js +16 -0
- package/dist/tools/read.d.ts +1 -1
- package/dist/tools/read.js +207 -14
- package/dist/tools/write.js +3 -2
- package/dist/tui/escape-confirmation.d.ts +15 -0
- package/dist/tui/escape-confirmation.js +30 -0
- package/dist/tui/run.js +93 -23
- package/dist/tui-ink/app.d.ts +52 -0
- package/dist/tui-ink/app.js +1129 -0
- package/dist/tui-ink/approval/approval-dialog.d.ts +13 -0
- package/dist/tui-ink/approval/approval-dialog.js +132 -0
- package/dist/tui-ink/approval/diff-view.d.ts +7 -0
- package/dist/tui-ink/approval/diff-view.js +44 -0
- package/dist/tui-ink/approval/select.d.ts +35 -0
- package/dist/tui-ink/approval/select.js +88 -0
- package/dist/tui-ink/code-highlight.d.ts +8 -0
- package/dist/tui-ink/code-highlight.js +122 -0
- package/dist/tui-ink/detect-theme.d.ts +19 -0
- package/dist/tui-ink/detect-theme.js +123 -0
- package/dist/tui-ink/display-history.d.ts +38 -0
- package/dist/tui-ink/display-history.js +130 -0
- package/dist/tui-ink/edit-diff.d.ts +11 -0
- package/dist/tui-ink/edit-diff.js +52 -0
- package/dist/tui-ink/file-mentions.d.ts +29 -0
- package/dist/tui-ink/file-mentions.js +174 -0
- package/dist/tui-ink/footer.d.ts +19 -0
- package/dist/tui-ink/footer.js +45 -0
- package/dist/tui-ink/image-paste.d.ts +54 -0
- package/dist/tui-ink/image-paste.js +288 -0
- package/dist/tui-ink/input-box.d.ts +41 -0
- package/dist/tui-ink/input-box.js +694 -0
- package/dist/tui-ink/input-history.d.ts +16 -0
- package/dist/tui-ink/input-history.js +81 -0
- package/dist/tui-ink/markdown.d.ts +38 -0
- package/dist/tui-ink/markdown.js +394 -0
- package/dist/tui-ink/message-list.d.ts +33 -0
- package/dist/tui-ink/message-list.js +667 -0
- package/dist/tui-ink/model-picker.d.ts +43 -0
- package/dist/tui-ink/model-picker.js +331 -0
- package/dist/tui-ink/plan-confirm.d.ts +7 -0
- package/dist/tui-ink/plan-confirm.js +105 -0
- package/dist/tui-ink/question-dialog.d.ts +8 -0
- package/dist/tui-ink/question-dialog.js +99 -0
- package/dist/tui-ink/recent-activity.d.ts +8 -0
- package/dist/tui-ink/recent-activity.js +71 -0
- package/dist/tui-ink/run.d.ts +37 -0
- package/dist/tui-ink/run.js +53 -0
- package/dist/tui-ink/theme.d.ts +66 -0
- package/dist/tui-ink/theme.js +115 -0
- package/dist/tui-ink/todos.d.ts +7 -0
- package/dist/tui-ink/todos.js +46 -0
- package/dist/tui-ink/trace-groups.d.ts +27 -0
- package/dist/tui-ink/trace-groups.js +389 -0
- package/dist/tui-ink/use-terminal-size.d.ts +4 -0
- package/dist/tui-ink/use-terminal-size.js +21 -0
- package/dist/tui-ink/welcome.d.ts +18 -0
- package/dist/tui-ink/welcome.js +138 -0
- package/dist/types.d.ts +10 -0
- package/package.json +7 -1
package/dist/context/compact.js
CHANGED
|
@@ -76,6 +76,135 @@ export function compactMessages(messages, options = {}) {
|
|
|
76
76
|
droppedEntries: oldMessages.length,
|
|
77
77
|
};
|
|
78
78
|
}
|
|
79
|
+
export function compactCurrentTurnToolGroups(messages, options = {}) {
|
|
80
|
+
const keepRecentGroups = options.keepRecentGroups ?? 2;
|
|
81
|
+
const maxSummaryItems = options.maxSummaryItems ?? 8;
|
|
82
|
+
const preserved = messages.filter((m) => m.role === "system" || m.role === "meta");
|
|
83
|
+
const body = messages.filter((m) => m.role !== "system" && m.role !== "meta");
|
|
84
|
+
let lastUserIndex = -1;
|
|
85
|
+
for (let i = body.length - 1; i >= 0; i--) {
|
|
86
|
+
if (body[i].role === "user") {
|
|
87
|
+
lastUserIndex = i;
|
|
88
|
+
break;
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
if (lastUserIndex < 0)
|
|
92
|
+
return { compacted: false };
|
|
93
|
+
const preTurn = body.slice(0, lastUserIndex + 1);
|
|
94
|
+
const turnBody = body.slice(lastUserIndex + 1);
|
|
95
|
+
const groups = [];
|
|
96
|
+
let current = null;
|
|
97
|
+
for (const msg of turnBody) {
|
|
98
|
+
if (msg.role === "assistant") {
|
|
99
|
+
if (current)
|
|
100
|
+
groups.push(current);
|
|
101
|
+
current = { assistant: msg, toolResults: [] };
|
|
102
|
+
}
|
|
103
|
+
else if (msg.role === "tool" && current) {
|
|
104
|
+
current.toolResults.push(msg);
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
if (current)
|
|
108
|
+
groups.push(current);
|
|
109
|
+
if (groups.length <= keepRecentGroups)
|
|
110
|
+
return { compacted: false };
|
|
111
|
+
// Only drop groups that have tool_calls — text-only assistant messages don't
|
|
112
|
+
// free much context, and dropping them confuses the conversation flow.
|
|
113
|
+
const evictable = groups.slice(0, groups.length - keepRecentGroups)
|
|
114
|
+
.filter((g) => g.assistant.role === "assistant" && (g.assistant.toolCalls?.length ?? 0) > 0);
|
|
115
|
+
if (evictable.length === 0)
|
|
116
|
+
return { compacted: false };
|
|
117
|
+
const summary = buildToolGroupsSummary(evictable, maxSummaryItems);
|
|
118
|
+
if (!summary)
|
|
119
|
+
return { compacted: false };
|
|
120
|
+
const survivingGroups = groups.filter((g) => !evictable.includes(g));
|
|
121
|
+
const flatSurvivors = [];
|
|
122
|
+
for (const g of survivingGroups) {
|
|
123
|
+
flatSurvivors.push(cloneMessage(g.assistant));
|
|
124
|
+
for (const t of g.toolResults)
|
|
125
|
+
flatSurvivors.push(cloneMessage(t));
|
|
126
|
+
}
|
|
127
|
+
const compactedMessages = [
|
|
128
|
+
...preserved.map(cloneMessage),
|
|
129
|
+
...preTurn.map(cloneMessage),
|
|
130
|
+
{
|
|
131
|
+
role: "system",
|
|
132
|
+
content: `Earlier in this turn (compacted to free context):\n${summary}`,
|
|
133
|
+
},
|
|
134
|
+
...flatSurvivors,
|
|
135
|
+
];
|
|
136
|
+
return {
|
|
137
|
+
compacted: true,
|
|
138
|
+
summary,
|
|
139
|
+
messages: compactedMessages,
|
|
140
|
+
droppedEntries: evictable.length,
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
function buildToolGroupsSummary(groups, maxItems) {
|
|
144
|
+
const toolCounts = new Map();
|
|
145
|
+
const fileSet = new Set();
|
|
146
|
+
let totalResultChars = 0;
|
|
147
|
+
const findings = [];
|
|
148
|
+
for (const group of groups) {
|
|
149
|
+
if (group.assistant.role !== "assistant" || !group.assistant.toolCalls)
|
|
150
|
+
continue;
|
|
151
|
+
const toolNameByCallId = new Map();
|
|
152
|
+
for (const tc of group.assistant.toolCalls) {
|
|
153
|
+
toolCounts.set(tc.name, (toolCounts.get(tc.name) ?? 0) + 1);
|
|
154
|
+
toolNameByCallId.set(tc.id, tc.name);
|
|
155
|
+
try {
|
|
156
|
+
const parsed = JSON.parse(tc.arguments || "{}");
|
|
157
|
+
for (const key of ["file_path", "path", "paths", "file"]) {
|
|
158
|
+
const v = parsed[key];
|
|
159
|
+
if (typeof v === "string" && v)
|
|
160
|
+
fileSet.add(v);
|
|
161
|
+
else if (Array.isArray(v)) {
|
|
162
|
+
for (const item of v)
|
|
163
|
+
if (typeof item === "string" && item)
|
|
164
|
+
fileSet.add(item);
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
catch {
|
|
169
|
+
// ignore unparseable args
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
for (const r of group.toolResults) {
|
|
173
|
+
if (r.role !== "tool")
|
|
174
|
+
continue;
|
|
175
|
+
const content = typeof r.content === "string" ? r.content : "";
|
|
176
|
+
totalResultChars += content.length;
|
|
177
|
+
if (findings.length < maxItems) {
|
|
178
|
+
const toolName = toolNameByCallId.get(r.toolCallId) ?? "tool";
|
|
179
|
+
findings.push(`${toolName}: ${summarizeText(content)}`);
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
const lines = [];
|
|
184
|
+
const toolList = [...toolCounts.entries()]
|
|
185
|
+
.map(([name, n]) => (n > 1 ? `${name}×${n}` : name))
|
|
186
|
+
.join(", ");
|
|
187
|
+
lines.push(`Tools used: ${toolList || "none"}`);
|
|
188
|
+
if (fileSet.size > 0) {
|
|
189
|
+
const fileList = [...fileSet].slice(0, 12);
|
|
190
|
+
lines.push(`Files touched: ${fileList.join(", ")}${fileSet.size > 12 ? ` (+${fileSet.size - 12} more)` : ""}`);
|
|
191
|
+
}
|
|
192
|
+
lines.push(`Discarded ~${formatChars(totalResultChars)} of earlier tool output. Re-run the relevant tool if you need specifics.`);
|
|
193
|
+
if (findings.length > 0) {
|
|
194
|
+
lines.push("");
|
|
195
|
+
lines.push("Earlier findings:");
|
|
196
|
+
for (const f of findings)
|
|
197
|
+
lines.push(`- ${f}`);
|
|
198
|
+
}
|
|
199
|
+
return lines.join("\n");
|
|
200
|
+
}
|
|
201
|
+
function formatChars(count) {
|
|
202
|
+
if (count < 1000)
|
|
203
|
+
return `${count} chars`;
|
|
204
|
+
if (count < 1_000_000)
|
|
205
|
+
return `${(count / 1000).toFixed(1)}K chars`;
|
|
206
|
+
return `${(count / 1_000_000).toFixed(2)}M chars`;
|
|
207
|
+
}
|
|
79
208
|
function buildCompactionSummary(entries, maxSummaryItems) {
|
|
80
209
|
const messages = entriesToMessages(entries);
|
|
81
210
|
return buildMessageSummary(messages, maxSummaryItems);
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import type { Message, Provider } from "../types.js";
|
|
2
|
+
export declare const LLM_COMPACTION_PROMPT = "You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.\n\nInclude:\n- Current progress and key decisions made\n- Important context, constraints, or user preferences\n- What remains to be done (clear next steps)\n- Any critical data, examples, or references needed to continue\n\nBe concise, structured, and focused on helping the next LLM seamlessly continue the work.";
|
|
3
|
+
export declare const LLM_SUMMARY_PREFIX = "Another language model previously worked on this task and produced this handoff summary. Build on what's already done; avoid re-running the same investigation. Summary:";
|
|
4
|
+
export interface LLMCompactOptions {
|
|
5
|
+
provider: Provider;
|
|
6
|
+
modelId: string;
|
|
7
|
+
/** Compactor model call must complete within this token-cost ceiling. */
|
|
8
|
+
maxInputTokens?: number;
|
|
9
|
+
/** Number of trailing (assistant + tool-results) groups in the current turn to keep verbatim. */
|
|
10
|
+
keepRecentGroups?: number;
|
|
11
|
+
abortSignal?: AbortSignal;
|
|
12
|
+
}
|
|
13
|
+
export interface LLMCompactResult {
|
|
14
|
+
compacted: boolean;
|
|
15
|
+
summary?: string;
|
|
16
|
+
messages?: Message[];
|
|
17
|
+
reason?: string;
|
|
18
|
+
}
|
|
19
|
+
export declare function compactWithLLM(messages: Message[], options: LLMCompactOptions): Promise<LLMCompactResult>;
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
// LLM-driven context compaction.
|
|
2
|
+
//
|
|
3
|
+
// When the budget says we're approaching the context window, ask the model to
|
|
4
|
+
// produce a handoff summary of the conversation so far. Replace the bulky middle
|
|
5
|
+
// of history with that summary while keeping the initial system context and the
|
|
6
|
+
// user's latest ask intact. Architecturally this mirrors Codex CLI's approach
|
|
7
|
+
// (codex-rs/core/src/compact.rs + templates/compact/prompt.md): trust the model
|
|
8
|
+
// to pick what matters instead of writing a template.
|
|
9
|
+
//
|
|
10
|
+
// Failure modes are explicit: returns { compacted: false, reason } so the
|
|
11
|
+
// caller can fall back to algorithmic compaction without an exception.
|
|
12
|
+
import { estimateContextTokens } from "./budget.js";
|
|
13
|
+
export const LLM_COMPACTION_PROMPT = `You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.
|
|
14
|
+
|
|
15
|
+
Include:
|
|
16
|
+
- Current progress and key decisions made
|
|
17
|
+
- Important context, constraints, or user preferences
|
|
18
|
+
- What remains to be done (clear next steps)
|
|
19
|
+
- Any critical data, examples, or references needed to continue
|
|
20
|
+
|
|
21
|
+
Be concise, structured, and focused on helping the next LLM seamlessly continue the work.`;
|
|
22
|
+
export const LLM_SUMMARY_PREFIX = `Another language model previously worked on this task and produced this handoff summary. Build on what's already done; avoid re-running the same investigation. Summary:`;
|
|
23
|
+
export async function compactWithLLM(messages, options) {
|
|
24
|
+
const { provider, modelId, abortSignal } = options;
|
|
25
|
+
const maxInputTokens = options.maxInputTokens ?? 100_000;
|
|
26
|
+
const keepRecentGroups = options.keepRecentGroups ?? 2;
|
|
27
|
+
const preserved = messages.filter((m) => m.role === "system" || m.role === "meta");
|
|
28
|
+
const body = messages.filter((m) => m.role !== "system" && m.role !== "meta");
|
|
29
|
+
let lastUserIndex = -1;
|
|
30
|
+
for (let i = body.length - 1; i >= 0; i--) {
|
|
31
|
+
if (body[i].role === "user") {
|
|
32
|
+
lastUserIndex = i;
|
|
33
|
+
break;
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
if (lastUserIndex < 0) {
|
|
37
|
+
return { compacted: false, reason: "no user message in history" };
|
|
38
|
+
}
|
|
39
|
+
// Pivot the body around the last user message:
|
|
40
|
+
// priorTurns: everything from earlier user turns (multi-turn case)
|
|
41
|
+
// lastUser: the user's current ask (always kept verbatim)
|
|
42
|
+
// currentTurn: the assistant + tool groups produced in response so far
|
|
43
|
+
const priorTurns = body.slice(0, lastUserIndex);
|
|
44
|
+
const lastUser = body[lastUserIndex];
|
|
45
|
+
const currentTurn = body.slice(lastUserIndex + 1);
|
|
46
|
+
const groups = [];
|
|
47
|
+
let active = null;
|
|
48
|
+
for (const msg of currentTurn) {
|
|
49
|
+
if (msg.role === "assistant") {
|
|
50
|
+
if (active)
|
|
51
|
+
groups.push(active);
|
|
52
|
+
active = { assistant: msg, toolResults: [] };
|
|
53
|
+
}
|
|
54
|
+
else if (msg.role === "tool" && active) {
|
|
55
|
+
active.toolResults.push(msg);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
if (active)
|
|
59
|
+
groups.push(active);
|
|
60
|
+
const keptGroupCount = Math.min(keepRecentGroups, groups.length);
|
|
61
|
+
const evictedGroups = groups.slice(0, groups.length - keptGroupCount);
|
|
62
|
+
const keptGroups = groups.slice(groups.length - keptGroupCount);
|
|
63
|
+
// What we'll send to the model to summarize: prior turns + the older groups
|
|
64
|
+
// in the current turn (everything we're about to evict).
|
|
65
|
+
const toSummarize = [
|
|
66
|
+
...priorTurns,
|
|
67
|
+
...evictedGroups.flatMap((g) => [g.assistant, ...g.toolResults]),
|
|
68
|
+
];
|
|
69
|
+
if (toSummarize.length === 0) {
|
|
70
|
+
return { compacted: false, reason: "nothing to evict" };
|
|
71
|
+
}
|
|
72
|
+
const trimmedHistory = trimToFitTokenBudget(toSummarize, maxInputTokens);
|
|
73
|
+
const historyText = serializeHistoryAsText(trimmedHistory);
|
|
74
|
+
const summaryInput = [
|
|
75
|
+
{ role: "system", content: LLM_COMPACTION_PROMPT },
|
|
76
|
+
{ role: "user", content: historyText },
|
|
77
|
+
];
|
|
78
|
+
let summaryText;
|
|
79
|
+
try {
|
|
80
|
+
summaryText = await provider.complete(summaryInput, {
|
|
81
|
+
model: modelId,
|
|
82
|
+
temperature: 0.2,
|
|
83
|
+
abortSignal,
|
|
84
|
+
});
|
|
85
|
+
}
|
|
86
|
+
catch (err) {
|
|
87
|
+
return { compacted: false, reason: `compactor call failed: ${err.message}` };
|
|
88
|
+
}
|
|
89
|
+
if (!summaryText || summaryText.trim().length === 0) {
|
|
90
|
+
return { compacted: false, reason: "compactor returned empty summary" };
|
|
91
|
+
}
|
|
92
|
+
// New history shape (prefix-cache-friendly: preserved system+meta stay at the
|
|
93
|
+
// absolute prefix unchanged; summary is injected after as a user-role envelope
|
|
94
|
+
// so it can't pollute the cacheable system-prompt prefix):
|
|
95
|
+
//
|
|
96
|
+
// [...preserved system+meta] ← stable prefix
|
|
97
|
+
// user: "<SUMMARY_PREFIX>\n<summary>" ← evicted history compressed
|
|
98
|
+
// user: <original last user message> ← the current ask
|
|
99
|
+
// [...kept current-turn (assistant + tool) groups] ← recent tool work
|
|
100
|
+
const flatKept = [];
|
|
101
|
+
for (const g of keptGroups) {
|
|
102
|
+
flatKept.push(cloneMessage(g.assistant));
|
|
103
|
+
for (const t of g.toolResults)
|
|
104
|
+
flatKept.push(cloneMessage(t));
|
|
105
|
+
}
|
|
106
|
+
const compacted = [
|
|
107
|
+
...preserved.map(cloneMessage),
|
|
108
|
+
{
|
|
109
|
+
role: "user",
|
|
110
|
+
content: `${LLM_SUMMARY_PREFIX}\n${summaryText.trim()}`,
|
|
111
|
+
},
|
|
112
|
+
cloneMessage(lastUser),
|
|
113
|
+
...flatKept,
|
|
114
|
+
];
|
|
115
|
+
return {
|
|
116
|
+
compacted: true,
|
|
117
|
+
summary: summaryText,
|
|
118
|
+
messages: compacted,
|
|
119
|
+
};
|
|
120
|
+
}
|
|
121
|
+
function trimToFitTokenBudget(messages, maxTokens) {
|
|
122
|
+
// Drop from the front (oldest first) until estimate fits. Front-trim matches
|
|
123
|
+
// Codex's pattern and preserves the most recent context the user cares about.
|
|
124
|
+
let working = [...messages];
|
|
125
|
+
while (working.length > 0 && estimateContextTokens(working) > maxTokens) {
|
|
126
|
+
working = working.slice(1);
|
|
127
|
+
}
|
|
128
|
+
return working;
|
|
129
|
+
}
|
|
130
|
+
function serializeHistoryAsText(messages) {
|
|
131
|
+
const lines = [];
|
|
132
|
+
const toolNameByCallId = new Map();
|
|
133
|
+
for (const msg of messages) {
|
|
134
|
+
switch (msg.role) {
|
|
135
|
+
case "user": {
|
|
136
|
+
const text = typeof msg.content === "string"
|
|
137
|
+
? msg.content
|
|
138
|
+
: msg.content.filter((p) => p.type === "text").map((p) => p.text).join(" ");
|
|
139
|
+
lines.push(`USER: ${text}`);
|
|
140
|
+
break;
|
|
141
|
+
}
|
|
142
|
+
case "assistant": {
|
|
143
|
+
if (msg.content.trim()) {
|
|
144
|
+
lines.push(`ASSISTANT: ${msg.content}`);
|
|
145
|
+
}
|
|
146
|
+
if (msg.toolCalls && msg.toolCalls.length > 0) {
|
|
147
|
+
for (const tc of msg.toolCalls) {
|
|
148
|
+
toolNameByCallId.set(tc.id, tc.name);
|
|
149
|
+
lines.push(`TOOL_CALL[${tc.name}]: ${summarizeToolCallArgs(tc)}`);
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
break;
|
|
153
|
+
}
|
|
154
|
+
case "tool": {
|
|
155
|
+
const name = toolNameByCallId.get(msg.toolCallId) ?? "tool";
|
|
156
|
+
lines.push(`TOOL_RESULT[${name}]: ${truncateInline(msg.content, 1500)}`);
|
|
157
|
+
break;
|
|
158
|
+
}
|
|
159
|
+
default:
|
|
160
|
+
break;
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
return lines.join("\n\n");
|
|
164
|
+
}
|
|
165
|
+
function summarizeToolCallArgs(tc) {
|
|
166
|
+
try {
|
|
167
|
+
const parsed = JSON.parse(tc.arguments || "{}");
|
|
168
|
+
const pairs = Object.entries(parsed)
|
|
169
|
+
.filter(([, v]) => typeof v === "string" || typeof v === "number" || typeof v === "boolean")
|
|
170
|
+
.map(([k, v]) => `${k}=${JSON.stringify(v).slice(0, 200)}`);
|
|
171
|
+
return pairs.join(" ") || "(no args)";
|
|
172
|
+
}
|
|
173
|
+
catch {
|
|
174
|
+
return truncateInline(tc.arguments || "", 200);
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
function truncateInline(text, max) {
|
|
178
|
+
const normalized = text.replace(/\s+/g, " ").trim();
|
|
179
|
+
if (normalized.length <= max)
|
|
180
|
+
return normalized;
|
|
181
|
+
return `${normalized.slice(0, max - 3)}...`;
|
|
182
|
+
}
|
|
183
|
+
function cloneMessage(message) {
|
|
184
|
+
if (message.role === "assistant") {
|
|
185
|
+
return {
|
|
186
|
+
...message,
|
|
187
|
+
toolCalls: message.toolCalls?.map((toolCall) => ({ ...toolCall })),
|
|
188
|
+
};
|
|
189
|
+
}
|
|
190
|
+
if (message.role === "user" && Array.isArray(message.content)) {
|
|
191
|
+
return {
|
|
192
|
+
...message,
|
|
193
|
+
content: message.content.map((part) => ({
|
|
194
|
+
...part,
|
|
195
|
+
...(part.type === "image_url" ? { image_url: { ...part.image_url } } : {}),
|
|
196
|
+
})),
|
|
197
|
+
};
|
|
198
|
+
}
|
|
199
|
+
return { ...message };
|
|
200
|
+
}
|
|
@@ -1,6 +1,14 @@
|
|
|
1
1
|
import { getContextBudget } from "./budget.js";
|
|
2
|
-
import { compactMessages } from "./compact.js";
|
|
2
|
+
import { compactCurrentTurnToolGroups, compactMessages } from "./compact.js";
|
|
3
3
|
import { pruneMessages } from "./prune.js";
|
|
4
|
+
// Prefix-cache invariant: every projected output starts with the concatenation
|
|
5
|
+
// of (in order) system + meta messages from the input, followed by the
|
|
6
|
+
// conversational body. Compactors (compactMessages, compactCurrentTurnToolGroups,
|
|
7
|
+
// compactMessagesWithLLM, compactWithLLM) MUST preserve every existing
|
|
8
|
+
// system/meta message in its original position so the cacheable prefix
|
|
9
|
+
// stays byte-identical across turns where compaction didn't fire. Inserting
|
|
10
|
+
// new dynamic content (summaries, etc.) AFTER system+meta is safe; inserting
|
|
11
|
+
// it within or before them is not.
|
|
4
12
|
export function projectMessages(messages, options = {}) {
|
|
5
13
|
const mode = options.mode ?? "full";
|
|
6
14
|
const projectedBody = [];
|
|
@@ -48,18 +56,26 @@ export function projectMessages(messages, options = {}) {
|
|
|
48
56
|
if (!budget.shouldCompact) {
|
|
49
57
|
return pruned;
|
|
50
58
|
}
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
const
|
|
57
|
-
|
|
58
|
-
|
|
59
|
+
// Escalating compaction: turn-level passes first, then sub-turn (single-turn
|
|
60
|
+
// bloat from many tool calls) as a finer-grained fallback. Each step only
|
|
61
|
+
// advances `working` if compaction actually fired, and re-checks the budget
|
|
62
|
+
// before deciding to escalate further.
|
|
63
|
+
let working = pruned;
|
|
64
|
+
const passes = [
|
|
65
|
+
() => compactMessages(working, { keepRecentTurns: 2 }).messages,
|
|
66
|
+
() => compactMessages(working, { keepRecentTurns: 1 }).messages,
|
|
67
|
+
() => compactCurrentTurnToolGroups(working, { keepRecentGroups: 2 }).messages,
|
|
68
|
+
() => compactCurrentTurnToolGroups(working, { keepRecentGroups: 1 }).messages,
|
|
69
|
+
];
|
|
70
|
+
for (const pass of passes) {
|
|
71
|
+
const next = pass();
|
|
72
|
+
if (next)
|
|
73
|
+
working = next;
|
|
74
|
+
const after = getContextBudget(options.providerId, options.modelId, working);
|
|
75
|
+
if (!after.shouldCompact)
|
|
76
|
+
break;
|
|
59
77
|
}
|
|
60
|
-
|
|
61
|
-
const finalMessages = (tighter.compacted && tighter.messages ? tighter.messages : compactedMessages);
|
|
62
|
-
return repairToolCallChains(finalMessages);
|
|
78
|
+
return repairToolCallChains(working);
|
|
63
79
|
}
|
|
64
80
|
return repaired;
|
|
65
81
|
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
export interface TokenEstimator {
|
|
2
|
+
estimate(text: string): number;
|
|
3
|
+
}
|
|
4
|
+
export declare class HeuristicEstimator implements TokenEstimator {
|
|
5
|
+
estimate(text: string): number;
|
|
6
|
+
}
|
|
7
|
+
export declare class TiktokenEstimator implements TokenEstimator {
|
|
8
|
+
private encoder;
|
|
9
|
+
private initFailed;
|
|
10
|
+
private readonly fallback;
|
|
11
|
+
estimate(text: string): number;
|
|
12
|
+
private getEncoder;
|
|
13
|
+
}
|
|
14
|
+
export declare function getTokenEstimator(providerId?: string): TokenEstimator;
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
// Token estimation strategy layer.
|
|
2
|
+
//
|
|
3
|
+
// Different providers use different tokenizers; a single "chars/4" rule under-counts
|
|
4
|
+
// dense content (HTML, CJK) and lets auto-compact fire too late. This module routes
|
|
5
|
+
// per-provider: OpenAI / OpenAI-Codex uses js-tiktoken with the o200k_base BPE; every
|
|
6
|
+
// other provider uses a CJK-aware heuristic. Drop in a new strategy per provider as
|
|
7
|
+
// their tokenizers become important without touching call sites.
|
|
8
|
+
// Tiktoken's pre-tokenization regex is catastrophic on inputs with long single-char
|
|
9
|
+
// runs ("x".repeat(4000) → 1.4s; bigger → minutes/hang). Two guards: a hard length
|
|
10
|
+
// cap, and a cheap scan for any run ≥ MAX_RUN_LEN of the same code unit. Both catch
|
|
11
|
+
// production hazards (binary blobs, base64 dumps, leaked buffers) and synthetic test
|
|
12
|
+
// fixtures alike. Normal prose / code / markdown stays well under both.
|
|
13
|
+
const TIKTOKEN_MAX_CHARS = 80_000;
|
|
14
|
+
const MAX_RUN_LEN = 64;
|
|
15
|
+
function hasPathologicalRun(text) {
|
|
16
|
+
if (text.length < MAX_RUN_LEN)
|
|
17
|
+
return false;
|
|
18
|
+
let last = text.charCodeAt(0);
|
|
19
|
+
let run = 1;
|
|
20
|
+
for (let i = 1; i < text.length; i++) {
|
|
21
|
+
const code = text.charCodeAt(i);
|
|
22
|
+
if (code === last) {
|
|
23
|
+
run++;
|
|
24
|
+
if (run >= MAX_RUN_LEN)
|
|
25
|
+
return true;
|
|
26
|
+
}
|
|
27
|
+
else {
|
|
28
|
+
last = code;
|
|
29
|
+
run = 1;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
return false;
|
|
33
|
+
}
|
|
34
|
+
// Cheap codepoint check: CJK ideographs + Hiragana/Katakana + Hangul.
|
|
35
|
+
// Each such char is roughly 1 token (vs ~0.25 token for ASCII), so weighting them
|
|
36
|
+
// 1.0 cuts the heuristic's CJK undercount by ~4x without needing a real tokenizer.
|
|
37
|
+
function isCjkCodePoint(code) {
|
|
38
|
+
return ((code >= 0x3000 && code <= 0x9fff) || // CJK symbols + unified ideographs (incl. Hiragana/Katakana)
|
|
39
|
+
(code >= 0xac00 && code <= 0xd7af) || // Hangul syllables
|
|
40
|
+
(code >= 0xf900 && code <= 0xfaff) || // CJK compatibility ideographs
|
|
41
|
+
(code >= 0x20000 && code <= 0x2ffff) // CJK extensions B–F (surrogate pairs)
|
|
42
|
+
);
|
|
43
|
+
}
|
|
44
|
+
export class HeuristicEstimator {
|
|
45
|
+
estimate(text) {
|
|
46
|
+
if (!text)
|
|
47
|
+
return 0;
|
|
48
|
+
let cjk = 0;
|
|
49
|
+
let other = 0;
|
|
50
|
+
for (let i = 0; i < text.length; i++) {
|
|
51
|
+
const code = text.codePointAt(i);
|
|
52
|
+
if (code > 0xffff)
|
|
53
|
+
i++; // skip surrogate low half
|
|
54
|
+
if (isCjkCodePoint(code))
|
|
55
|
+
cjk++;
|
|
56
|
+
else
|
|
57
|
+
other++;
|
|
58
|
+
}
|
|
59
|
+
return Math.ceil(cjk + other / 4);
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
export class TiktokenEstimator {
|
|
63
|
+
encoder = null;
|
|
64
|
+
initFailed = false;
|
|
65
|
+
fallback = new HeuristicEstimator();
|
|
66
|
+
estimate(text) {
|
|
67
|
+
if (!text)
|
|
68
|
+
return 0;
|
|
69
|
+
if (text.length > TIKTOKEN_MAX_CHARS)
|
|
70
|
+
return this.fallback.estimate(text);
|
|
71
|
+
if (hasPathologicalRun(text))
|
|
72
|
+
return this.fallback.estimate(text);
|
|
73
|
+
const enc = this.getEncoder();
|
|
74
|
+
if (!enc)
|
|
75
|
+
return this.fallback.estimate(text);
|
|
76
|
+
try {
|
|
77
|
+
return enc.encode(text).length;
|
|
78
|
+
}
|
|
79
|
+
catch {
|
|
80
|
+
return this.fallback.estimate(text);
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
getEncoder() {
|
|
84
|
+
if (this.encoder)
|
|
85
|
+
return this.encoder;
|
|
86
|
+
if (this.initFailed)
|
|
87
|
+
return null;
|
|
88
|
+
try {
|
|
89
|
+
// Lazy require: defers ~1MB of BPE table load until OpenAI is actually used.
|
|
90
|
+
const tiktoken = require("js-tiktoken");
|
|
91
|
+
this.encoder = tiktoken.getEncoding("o200k_base");
|
|
92
|
+
return this.encoder;
|
|
93
|
+
}
|
|
94
|
+
catch {
|
|
95
|
+
this.initFailed = true;
|
|
96
|
+
return null;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
const HEURISTIC = new HeuristicEstimator();
|
|
101
|
+
const TIKTOKEN = new TiktokenEstimator();
|
|
102
|
+
export function getTokenEstimator(providerId) {
|
|
103
|
+
if (providerId === "openai" || providerId === "openai-codex")
|
|
104
|
+
return TIKTOKEN;
|
|
105
|
+
return HEURISTIC;
|
|
106
|
+
}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
export interface ToolTruncationResult {
|
|
2
|
+
content: string;
|
|
3
|
+
truncated: boolean;
|
|
4
|
+
originalTokens: number;
|
|
5
|
+
finalTokens: number;
|
|
6
|
+
limit: number | undefined;
|
|
7
|
+
}
|
|
8
|
+
export declare function truncateToolOutputForModel(content: string, providerId: string, modelId: string): ToolTruncationResult;
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
// Tool-output truncation honoring the model's server-declared limit.
|
|
2
|
+
//
|
|
3
|
+
// Codex backend's /models endpoint reports per-model `truncation_policy.limit`
|
|
4
|
+
// (e.g. 10000 tokens for gpt-5.5). The expectation is that the CLIENT truncates
|
|
5
|
+
// each tool result to that budget before adding it to history; sending raw
|
|
6
|
+
// 50-100k tool dumps will blow through the input window after a handful of
|
|
7
|
+
// calls. Codex CLI does this via TruncationPolicy::Tokens; mirror it here.
|
|
8
|
+
//
|
|
9
|
+
// Strategy: middle-truncate (preserve head + tail with an explicit marker in
|
|
10
|
+
// between). Heads usually carry structure/headers; tails often carry totals,
|
|
11
|
+
// errors, or conclusions — losing either is worse than losing the middle.
|
|
12
|
+
import { getToolOutputTokenLimit } from "../model-catalog.js";
|
|
13
|
+
import { estimateTextTokens } from "./budget.js";
|
|
14
|
+
export function truncateToolOutputForModel(content, providerId, modelId) {
|
|
15
|
+
const limit = getToolOutputTokenLimit(providerId, modelId);
|
|
16
|
+
const originalTokens = estimateTextTokens(content, providerId);
|
|
17
|
+
if (!limit || originalTokens <= limit) {
|
|
18
|
+
return { content, truncated: false, originalTokens, finalTokens: originalTokens, limit };
|
|
19
|
+
}
|
|
20
|
+
const truncated = middleTruncateToTokenBudget(content, limit, providerId);
|
|
21
|
+
const finalTokens = estimateTextTokens(truncated, providerId);
|
|
22
|
+
return { content: truncated, truncated: true, originalTokens, finalTokens, limit };
|
|
23
|
+
}
|
|
24
|
+
function middleTruncateToTokenBudget(content, tokenBudget, providerId) {
|
|
25
|
+
// Convert token budget to a char budget via the estimator's effective ratio.
|
|
26
|
+
// The estimator may under/overcount, so we iterate one round if needed.
|
|
27
|
+
const tokensAll = estimateTextTokens(content, providerId);
|
|
28
|
+
if (tokensAll <= tokenBudget)
|
|
29
|
+
return content;
|
|
30
|
+
const charsPerToken = content.length / Math.max(1, tokensAll);
|
|
31
|
+
let charBudget = Math.floor(tokenBudget * charsPerToken);
|
|
32
|
+
const marker = (dropped) => `\n\n[... middle ${formatChars(dropped)} truncated by model policy (${tokenBudget}-token cap) ...]\n\n`;
|
|
33
|
+
// Reserve some room for the marker itself.
|
|
34
|
+
const reserveForMarker = 200;
|
|
35
|
+
charBudget = Math.max(200, charBudget - reserveForMarker);
|
|
36
|
+
const half = Math.floor(charBudget / 2);
|
|
37
|
+
const head = content.slice(0, half);
|
|
38
|
+
const tail = content.slice(content.length - (charBudget - half));
|
|
39
|
+
const droppedChars = content.length - head.length - tail.length;
|
|
40
|
+
let truncated = `${head}${marker(droppedChars)}${tail}`;
|
|
41
|
+
// Tighten if our estimate of charsPerToken undercounts and we're still over.
|
|
42
|
+
let safety = 3;
|
|
43
|
+
while (estimateTextTokens(truncated, providerId) > tokenBudget && safety-- > 0) {
|
|
44
|
+
const newHalf = Math.floor(head.length * 0.8);
|
|
45
|
+
const newTailLen = Math.floor(tail.length * 0.8);
|
|
46
|
+
const newHead = content.slice(0, newHalf);
|
|
47
|
+
const newTail = content.slice(content.length - newTailLen);
|
|
48
|
+
const newDropped = content.length - newHead.length - newTail.length;
|
|
49
|
+
truncated = `${newHead}${marker(newDropped)}${newTail}`;
|
|
50
|
+
}
|
|
51
|
+
return truncated;
|
|
52
|
+
}
|
|
53
|
+
function formatChars(count) {
|
|
54
|
+
if (count < 1000)
|
|
55
|
+
return `${count} chars`;
|
|
56
|
+
if (count < 1_000_000)
|
|
57
|
+
return `${(count / 1000).toFixed(1)}K chars`;
|
|
58
|
+
return `${(count / 1_000_000).toFixed(2)}M chars`;
|
|
59
|
+
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import type { SkillSummary } from "../skills/types.js";
|
|
2
|
+
import type { Message, ToolRegistryEntry } from "../types.js";
|
|
3
|
+
export interface ContextUsageBucket {
|
|
4
|
+
label: string;
|
|
5
|
+
tokens: number;
|
|
6
|
+
detail?: string;
|
|
7
|
+
}
|
|
8
|
+
export interface ContextUsageSnapshot {
|
|
9
|
+
providerId: string;
|
|
10
|
+
modelId: string;
|
|
11
|
+
contextWindow?: number;
|
|
12
|
+
usedTokens: number;
|
|
13
|
+
freeTokens?: number;
|
|
14
|
+
buckets: {
|
|
15
|
+
systemPrompt: ContextUsageBucket;
|
|
16
|
+
tools: ContextUsageBucket;
|
|
17
|
+
skills: ContextUsageBucket;
|
|
18
|
+
deferredTools: ContextUsageBucket;
|
|
19
|
+
other: ContextUsageBucket;
|
|
20
|
+
};
|
|
21
|
+
toolCount: number;
|
|
22
|
+
deferredToolCount: number;
|
|
23
|
+
skillCount: number;
|
|
24
|
+
messageCount: number;
|
|
25
|
+
}
|
|
26
|
+
export declare function buildContextUsageSnapshot(input: {
|
|
27
|
+
providerId: string;
|
|
28
|
+
modelId: string;
|
|
29
|
+
messages: Message[];
|
|
30
|
+
toolEntries: ToolRegistryEntry[];
|
|
31
|
+
deferredToolEntries?: ToolRegistryEntry[];
|
|
32
|
+
skills: SkillSummary[];
|
|
33
|
+
}): ContextUsageSnapshot;
|
|
34
|
+
export declare function formatContextUsage(snapshot: ContextUsageSnapshot): string;
|