botholomew 0.18.6 → 0.19.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +56 -2
- package/package.json +12 -9
- package/src/chat/agent.ts +175 -181
- package/src/chat/session.ts +30 -31
- package/src/chat/usage.ts +19 -20
- package/src/commands/init.ts +20 -0
- package/src/config/loader.ts +50 -10
- package/src/config/schemas.ts +48 -22
- package/src/init/index.ts +12 -5
- package/src/init/templates.ts +45 -4
- package/src/llm/abort.ts +9 -0
- package/src/llm/cache-control.ts +65 -0
- package/src/llm/capabilities.ts +155 -0
- package/src/llm/error-format.ts +95 -0
- package/src/llm/fake.ts +226 -0
- package/src/llm/index.ts +19 -0
- package/src/llm/provider-options.ts +29 -0
- package/src/llm/provider.ts +65 -0
- package/src/llm/tools.ts +24 -0
- package/src/llm/types.ts +20 -0
- package/src/llm/usage.ts +33 -0
- package/src/prompts/capabilities.ts +72 -108
- package/src/tools/membot/adapter.ts +8 -6
- package/src/tools/membot/edit.ts +1 -1
- package/src/tools/tool.ts +2 -22
- package/src/tui/components/ContextPanel.tsx +1 -1
- package/src/tui/hooks/useMessageQueue.ts +2 -1
- package/src/tui/markdown.ts +45 -2
- package/src/tui/markdownTables.ts +288 -0
- package/src/utils/title.ts +21 -22
- package/src/worker/context.ts +45 -77
- package/src/worker/llm.ts +147 -112
- package/src/worker/prompt.ts +1 -1
- package/src/worker/schedules.ts +43 -54
- package/src/worker/tick.ts +3 -3
- package/src/worker/fake-llm.ts +0 -277
- package/src/worker/llm-client.ts +0 -12
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* GFM table extraction + width-aware ANSI rendering.
|
|
3
|
+
*
|
|
4
|
+
* `Bun.markdown.ansi` renders tables at their natural width and ignores the
|
|
5
|
+
* caller's column budget, so wide tables get hard-wrapped mid-cell by
|
|
6
|
+
* `wrap-ansi` in the detail pane. We pre-extract table blocks, render them
|
|
7
|
+
* ourselves at a width that fits, and let `Bun.markdown.ansi` handle the rest.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
export type Align = "left" | "center" | "right";
|
|
11
|
+
|
|
12
|
+
export interface TableBlock {
|
|
13
|
+
/** First line index (inclusive) of the table in the original text. */
|
|
14
|
+
start: number;
|
|
15
|
+
/** Last line index (inclusive). */
|
|
16
|
+
end: number;
|
|
17
|
+
/** First row is the header. */
|
|
18
|
+
rows: string[][];
|
|
19
|
+
aligns: Align[];
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
const DIM_ON = "\x1b[2m";
|
|
23
|
+
const BOLD_ON = "\x1b[1m";
|
|
24
|
+
const RESET = "\x1b[0m";
|
|
25
|
+
|
|
26
|
+
const SEPARATOR_CELL_RE = /^\s*:?-{1,}:?\s*$/;
|
|
27
|
+
const FENCE_RE = /^\s{0,3}(```|~~~)/;
|
|
28
|
+
|
|
29
|
+
export function extractTableBlocks(text: string): TableBlock[] {
|
|
30
|
+
const lines = text.split("\n");
|
|
31
|
+
const blocks: TableBlock[] = [];
|
|
32
|
+
let inFence = false;
|
|
33
|
+
let i = 0;
|
|
34
|
+
while (i < lines.length) {
|
|
35
|
+
const line = lines[i] ?? "";
|
|
36
|
+
if (FENCE_RE.test(line)) {
|
|
37
|
+
inFence = !inFence;
|
|
38
|
+
i++;
|
|
39
|
+
continue;
|
|
40
|
+
}
|
|
41
|
+
if (inFence || !looksLikePipeRow(line)) {
|
|
42
|
+
i++;
|
|
43
|
+
continue;
|
|
44
|
+
}
|
|
45
|
+
const sep = lines[i + 1] ?? "";
|
|
46
|
+
if (!looksLikePipeRow(sep)) {
|
|
47
|
+
i++;
|
|
48
|
+
continue;
|
|
49
|
+
}
|
|
50
|
+
const sepCells = splitRow(sep);
|
|
51
|
+
if (!sepCells.every((c) => SEPARATOR_CELL_RE.test(c))) {
|
|
52
|
+
i++;
|
|
53
|
+
continue;
|
|
54
|
+
}
|
|
55
|
+
const header = splitRow(line);
|
|
56
|
+
const colCount = Math.max(header.length, sepCells.length);
|
|
57
|
+
const aligns: Align[] = sepCells.slice(0, colCount).map(parseAlignCell);
|
|
58
|
+
while (aligns.length < colCount) aligns.push("left");
|
|
59
|
+
|
|
60
|
+
const rows: string[][] = [normalizeRow(header, colCount)];
|
|
61
|
+
let j = i + 2;
|
|
62
|
+
while (j < lines.length) {
|
|
63
|
+
const body = lines[j] ?? "";
|
|
64
|
+
if (!looksLikePipeRow(body)) break;
|
|
65
|
+
// A new separator (consecutive tables) terminates this one.
|
|
66
|
+
if (splitRow(body).every((c) => SEPARATOR_CELL_RE.test(c))) break;
|
|
67
|
+
rows.push(normalizeRow(splitRow(body), colCount));
|
|
68
|
+
j++;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
blocks.push({ start: i, end: j - 1, rows, aligns });
|
|
72
|
+
i = j;
|
|
73
|
+
}
|
|
74
|
+
return blocks;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
export function renderTable(
|
|
78
|
+
rows: string[][],
|
|
79
|
+
aligns: Align[],
|
|
80
|
+
width: number,
|
|
81
|
+
): string {
|
|
82
|
+
if (rows.length === 0) return "";
|
|
83
|
+
const colCount = rows[0]?.length ?? 0;
|
|
84
|
+
if (colCount === 0) return "";
|
|
85
|
+
|
|
86
|
+
const plain = rows.map((r) => r.map(stripInlineMarkdown));
|
|
87
|
+
|
|
88
|
+
// Per-column natural width (max visible width across all cells).
|
|
89
|
+
const naturalWidths: number[] = [];
|
|
90
|
+
for (let c = 0; c < colCount; c++) {
|
|
91
|
+
let w = 1;
|
|
92
|
+
for (const row of plain) {
|
|
93
|
+
const cell = row[c] ?? "";
|
|
94
|
+
if (visibleWidth(cell) > w) w = visibleWidth(cell);
|
|
95
|
+
}
|
|
96
|
+
naturalWidths.push(w);
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// Overhead: leading "│ " + trailing " │" + " │ " between cols.
|
|
100
|
+
const borderOverhead = colCount * 3 + 1;
|
|
101
|
+
const naturalTotal =
|
|
102
|
+
naturalWidths.reduce((a, b) => a + b, 0) + borderOverhead;
|
|
103
|
+
|
|
104
|
+
let colWidths: number[];
|
|
105
|
+
if (naturalTotal <= width || width <= 0) {
|
|
106
|
+
colWidths = naturalWidths;
|
|
107
|
+
} else {
|
|
108
|
+
colWidths = shrinkColumns(naturalWidths, width - borderOverhead);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
const lines: string[] = [];
|
|
112
|
+
lines.push(borderLine("┌", "┬", "┐", colWidths));
|
|
113
|
+
for (let r = 0; r < plain.length; r++) {
|
|
114
|
+
const cells = plain[r] ?? [];
|
|
115
|
+
const isHeader = r === 0;
|
|
116
|
+
lines.push(dataLine(cells, aligns, colWidths, isHeader));
|
|
117
|
+
if (isHeader) {
|
|
118
|
+
lines.push(borderLine("├", "┼", "┤", colWidths));
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
lines.push(borderLine("└", "┴", "┘", colWidths));
|
|
122
|
+
return lines.join("\n");
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
function looksLikePipeRow(line: string): boolean {
|
|
126
|
+
// A GFM table row contains at least one unescaped pipe and (after trimming
|
|
127
|
+
// surrounding whitespace + optional pipes) is non-empty.
|
|
128
|
+
const stripped = line.trim();
|
|
129
|
+
if (stripped === "") return false;
|
|
130
|
+
if (!stripped.includes("|")) return false;
|
|
131
|
+
return true;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
function splitRow(line: string): string[] {
|
|
135
|
+
let s = line.trim();
|
|
136
|
+
if (s.startsWith("|")) s = s.slice(1);
|
|
137
|
+
if (s.endsWith("|") && !s.endsWith("\\|")) s = s.slice(0, -1);
|
|
138
|
+
const cells: string[] = [];
|
|
139
|
+
let buf = "";
|
|
140
|
+
for (let i = 0; i < s.length; i++) {
|
|
141
|
+
const ch = s[i];
|
|
142
|
+
if (ch === "\\" && s[i + 1] === "|") {
|
|
143
|
+
buf += "|";
|
|
144
|
+
i++;
|
|
145
|
+
continue;
|
|
146
|
+
}
|
|
147
|
+
if (ch === "|") {
|
|
148
|
+
cells.push(buf.trim());
|
|
149
|
+
buf = "";
|
|
150
|
+
continue;
|
|
151
|
+
}
|
|
152
|
+
buf += ch;
|
|
153
|
+
}
|
|
154
|
+
cells.push(buf.trim());
|
|
155
|
+
return cells;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
function parseAlignCell(cell: string): Align {
|
|
159
|
+
const c = cell.trim();
|
|
160
|
+
const left = c.startsWith(":");
|
|
161
|
+
const right = c.endsWith(":");
|
|
162
|
+
if (left && right) return "center";
|
|
163
|
+
if (right) return "right";
|
|
164
|
+
return "left";
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
function normalizeRow(cells: string[], colCount: number): string[] {
|
|
168
|
+
const out = cells.slice(0, colCount);
|
|
169
|
+
while (out.length < colCount) out.push("");
|
|
170
|
+
return out;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
function shrinkColumns(natural: number[], budget: number): number[] {
|
|
174
|
+
const MIN = 3;
|
|
175
|
+
const n = natural.length;
|
|
176
|
+
if (budget < n * MIN) {
|
|
177
|
+
// Not enough room even for ellipsis everywhere — give each column MIN
|
|
178
|
+
// and let the caller deal with overflow. (Detail pane minimum is much
|
|
179
|
+
// wider than this in practice.)
|
|
180
|
+
return new Array(n).fill(MIN);
|
|
181
|
+
}
|
|
182
|
+
const total = natural.reduce((a, b) => a + b, 0) || 1;
|
|
183
|
+
const raw = natural.map((w) => (w * budget) / total);
|
|
184
|
+
const floored = raw.map((v) => Math.max(MIN, Math.floor(v)));
|
|
185
|
+
let used = floored.reduce((a, b) => a + b, 0);
|
|
186
|
+
// Distribute the remainder to columns with the largest fractional part.
|
|
187
|
+
const remainders = raw
|
|
188
|
+
.map((v, i) => ({ i, frac: v - Math.floor(v) }))
|
|
189
|
+
.sort((a, b) => b.frac - a.frac);
|
|
190
|
+
let k = 0;
|
|
191
|
+
while (used < budget && k < remainders.length * 4) {
|
|
192
|
+
const idx = remainders[k % remainders.length]?.i ?? 0;
|
|
193
|
+
floored[idx] = (floored[idx] ?? MIN) + 1;
|
|
194
|
+
used++;
|
|
195
|
+
k++;
|
|
196
|
+
}
|
|
197
|
+
// If we overshot due to MIN clamping, trim from the widest column(s).
|
|
198
|
+
while (used > budget) {
|
|
199
|
+
let widest = 0;
|
|
200
|
+
for (let i = 1; i < n; i++) {
|
|
201
|
+
if ((floored[i] ?? 0) > (floored[widest] ?? 0)) widest = i;
|
|
202
|
+
}
|
|
203
|
+
if ((floored[widest] ?? 0) <= MIN) break;
|
|
204
|
+
floored[widest] = (floored[widest] ?? 0) - 1;
|
|
205
|
+
used--;
|
|
206
|
+
}
|
|
207
|
+
return floored;
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
function borderLine(
|
|
211
|
+
left: string,
|
|
212
|
+
mid: string,
|
|
213
|
+
right: string,
|
|
214
|
+
widths: number[],
|
|
215
|
+
): string {
|
|
216
|
+
const segs = widths.map((w) => "─".repeat(w + 2));
|
|
217
|
+
return DIM_ON + left + segs.join(mid) + right + RESET;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
function dataLine(
|
|
221
|
+
cells: string[],
|
|
222
|
+
aligns: Align[],
|
|
223
|
+
widths: number[],
|
|
224
|
+
bold: boolean,
|
|
225
|
+
): string {
|
|
226
|
+
const parts: string[] = [];
|
|
227
|
+
parts.push(`${DIM_ON}│${RESET}`);
|
|
228
|
+
for (let i = 0; i < widths.length; i++) {
|
|
229
|
+
const w = widths[i] ?? 0;
|
|
230
|
+
const align = aligns[i] ?? "left";
|
|
231
|
+
const raw = cells[i] ?? "";
|
|
232
|
+
const fitted = padCell(raw, w, align);
|
|
233
|
+
const styled = bold ? `${BOLD_ON}${fitted}${RESET}` : fitted;
|
|
234
|
+
parts.push(` ${styled} `);
|
|
235
|
+
parts.push(`${DIM_ON}│${RESET}`);
|
|
236
|
+
}
|
|
237
|
+
return parts.join("");
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
function padCell(text: string, width: number, align: Align): string {
|
|
241
|
+
const truncated = truncateToWidth(text, width);
|
|
242
|
+
const pad = width - visibleWidth(truncated);
|
|
243
|
+
if (pad <= 0) return truncated;
|
|
244
|
+
if (align === "right") return " ".repeat(pad) + truncated;
|
|
245
|
+
if (align === "center") {
|
|
246
|
+
const l = Math.floor(pad / 2);
|
|
247
|
+
const r = pad - l;
|
|
248
|
+
return " ".repeat(l) + truncated + " ".repeat(r);
|
|
249
|
+
}
|
|
250
|
+
return truncated + " ".repeat(pad);
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
function truncateToWidth(text: string, width: number): string {
|
|
254
|
+
if (width <= 0) return "";
|
|
255
|
+
if (visibleWidth(text) <= width) return text;
|
|
256
|
+
if (width === 1) return "…";
|
|
257
|
+
const chars = Array.from(text);
|
|
258
|
+
let out = "";
|
|
259
|
+
let used = 0;
|
|
260
|
+
for (const ch of chars) {
|
|
261
|
+
if (used + 1 > width - 1) break;
|
|
262
|
+
out += ch;
|
|
263
|
+
used++;
|
|
264
|
+
}
|
|
265
|
+
return `${out}…`;
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
function visibleWidth(text: string): number {
|
|
269
|
+
// Cell text has no ANSI (we strip markdown markers before measuring), so
|
|
270
|
+
// codepoint count is sufficient. East-Asian double-width chars would be
|
|
271
|
+
// undercounted; out of scope for v1.
|
|
272
|
+
return Array.from(text).length;
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
function stripInlineMarkdown(text: string): string {
|
|
276
|
+
// Strip a small set of inline markers so cell width measurement matches what
|
|
277
|
+
// the user sees. Order matters: longer markers first.
|
|
278
|
+
let s = text;
|
|
279
|
+
s = s.replace(/`([^`]+)`/g, "$1");
|
|
280
|
+
s = s.replace(/\*\*([^*]+)\*\*/g, "$1");
|
|
281
|
+
s = s.replace(/__([^_]+)__/g, "$1");
|
|
282
|
+
s = s.replace(/~~([^~]+)~~/g, "$1");
|
|
283
|
+
s = s.replace(/(^|[^*])\*([^*\n]+)\*/g, "$1$2");
|
|
284
|
+
s = s.replace(/(^|[^_])_([^_\n]+)_/g, "$1$2");
|
|
285
|
+
// Collapse \| escapes that survived splitRow.
|
|
286
|
+
s = s.replace(/\\\|/g, "|");
|
|
287
|
+
return s;
|
|
288
|
+
}
|
package/src/utils/title.ts
CHANGED
|
@@ -1,45 +1,44 @@
|
|
|
1
|
+
import { generateText } from "ai";
|
|
1
2
|
import type { BotholomewConfig } from "../config/schemas.ts";
|
|
3
|
+
import {
|
|
4
|
+
buildProviderOptions,
|
|
5
|
+
formatLlmError,
|
|
6
|
+
getLanguageModel,
|
|
7
|
+
getMaxInputTokens,
|
|
8
|
+
} from "../llm/index.ts";
|
|
2
9
|
import { updateThreadTitle } from "../threads/store.ts";
|
|
3
|
-
import { createLlmClient } from "../worker/llm-client.ts";
|
|
4
10
|
import { logger } from "./logger.ts";
|
|
5
11
|
|
|
6
12
|
/**
|
|
7
|
-
* Generate a short title for a thread using the chunker model
|
|
8
|
-
* Fire-and-forget — errors are logged and never propagated.
|
|
9
|
-
* title back to the thread's CSV file by rewriting the thread_meta row.
|
|
13
|
+
* Generate a short title for a thread using the chunker model.
|
|
14
|
+
* Fire-and-forget — errors are logged and never propagated.
|
|
10
15
|
*/
|
|
11
16
|
export async function generateThreadTitle(
|
|
12
|
-
config:
|
|
17
|
+
config: BotholomewConfig,
|
|
13
18
|
projectDir: string,
|
|
14
19
|
threadId: string,
|
|
15
20
|
context: string,
|
|
16
21
|
): Promise<void> {
|
|
17
22
|
try {
|
|
18
|
-
const
|
|
23
|
+
const model = getLanguageModel(config.chunker_llm);
|
|
24
|
+
const numCtx = await getMaxInputTokens(config.chunker_llm);
|
|
19
25
|
|
|
20
|
-
const
|
|
21
|
-
model
|
|
22
|
-
|
|
26
|
+
const { text } = await generateText({
|
|
27
|
+
model,
|
|
28
|
+
maxOutputTokens: 50,
|
|
23
29
|
system:
|
|
24
30
|
"You are a title generator. The user will provide the first message from a conversation. Output a short descriptive title (5-8 words). Output ONLY the title, nothing else.",
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
role: "user",
|
|
28
|
-
content: `Generate a title for this message:\n\n"${context}"`,
|
|
29
|
-
},
|
|
30
|
-
],
|
|
31
|
+
prompt: `Generate a title for this message:\n\n"${context}"`,
|
|
32
|
+
providerOptions: buildProviderOptions(config.chunker_llm, numCtx),
|
|
31
33
|
});
|
|
32
34
|
|
|
33
|
-
const title =
|
|
34
|
-
.filter((b) => b.type === "text")
|
|
35
|
-
.map((b) => b.text)
|
|
36
|
-
.join("")
|
|
37
|
-
.trim();
|
|
38
|
-
|
|
35
|
+
const title = text.trim();
|
|
39
36
|
if (title) {
|
|
40
37
|
await updateThreadTitle(projectDir, threadId, title);
|
|
41
38
|
}
|
|
42
39
|
} catch (err) {
|
|
43
|
-
logger.warn(
|
|
40
|
+
logger.warn(
|
|
41
|
+
`Failed to generate thread title: ${formatLlmError(err, config.chunker_llm)}`,
|
|
42
|
+
);
|
|
44
43
|
}
|
|
45
44
|
}
|
package/src/worker/context.ts
CHANGED
|
@@ -1,113 +1,86 @@
|
|
|
1
|
-
import
|
|
2
|
-
import type {
|
|
1
|
+
import type { ModelMessage } from "ai";
|
|
2
|
+
import type { LlmBlock } from "../config/schemas.ts";
|
|
3
|
+
import { getMaxInputTokens as llmGetMaxInputTokens } from "../llm/index.ts";
|
|
3
4
|
import { logger } from "../utils/logger.ts";
|
|
4
5
|
|
|
5
6
|
/** Rough estimate: ~4 characters per token for English text */
|
|
6
7
|
const CHARS_PER_TOKEN = 4;
|
|
7
8
|
|
|
8
|
-
/** Fallback if the models API call fails */
|
|
9
|
-
const DEFAULT_MAX_INPUT_TOKENS = 200_000;
|
|
10
|
-
|
|
11
9
|
/** Reserve this fraction of the context window for safety margin */
|
|
12
10
|
const HEADROOM_FRACTION = 0.1;
|
|
13
11
|
|
|
14
12
|
/** Maximum characters for a single tool result before truncation */
|
|
15
13
|
const MAX_TOOL_RESULT_CHARS = 50_000;
|
|
16
14
|
|
|
17
|
-
/**
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
/**
|
|
21
|
-
* Look up the model's max input tokens via the Anthropic Models API.
|
|
22
|
-
* Results are cached per model ID for the lifetime of the process.
|
|
23
|
-
*/
|
|
24
|
-
export async function getMaxInputTokens(
|
|
25
|
-
apiKey: string | undefined,
|
|
26
|
-
model: string,
|
|
27
|
-
): Promise<number> {
|
|
28
|
-
const cached = modelTokenCache.get(model);
|
|
29
|
-
if (cached !== undefined) return cached;
|
|
30
|
-
|
|
31
|
-
try {
|
|
32
|
-
const client = new Anthropic({ apiKey: apiKey || undefined });
|
|
33
|
-
const info = await client.beta.models.retrieve(model);
|
|
34
|
-
const limit = info.max_input_tokens ?? DEFAULT_MAX_INPUT_TOKENS;
|
|
35
|
-
modelTokenCache.set(model, limit);
|
|
36
|
-
return limit;
|
|
37
|
-
} catch (err) {
|
|
38
|
-
logger.debug(`Failed to retrieve model info for ${model}: ${err}`);
|
|
39
|
-
modelTokenCache.set(model, DEFAULT_MAX_INPUT_TOKENS);
|
|
40
|
-
return DEFAULT_MAX_INPUT_TOKENS;
|
|
41
|
-
}
|
|
15
|
+
/** Re-export so call sites have a single entry point. */
|
|
16
|
+
export function getMaxInputTokens(cfg: LlmBlock): Promise<number> {
|
|
17
|
+
return llmGetMaxInputTokens(cfg);
|
|
42
18
|
}
|
|
43
19
|
|
|
44
20
|
function estimateTokens(text: string): number {
|
|
45
21
|
return Math.ceil(text.length / CHARS_PER_TOKEN);
|
|
46
22
|
}
|
|
47
23
|
|
|
48
|
-
function messageChars(msg:
|
|
24
|
+
function messageChars(msg: ModelMessage): number {
|
|
49
25
|
if (typeof msg.content === "string") return msg.content.length;
|
|
50
|
-
if (Array.isArray(msg.content))
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
26
|
+
if (!Array.isArray(msg.content)) return 0;
|
|
27
|
+
let total = 0;
|
|
28
|
+
for (const block of msg.content) {
|
|
29
|
+
const b = block as Record<string, unknown>;
|
|
30
|
+
if (typeof b.text === "string") {
|
|
31
|
+
total += b.text.length;
|
|
32
|
+
} else if (b.type === "tool-result" && typeof b.output === "object") {
|
|
33
|
+
const out = b.output as { value?: unknown };
|
|
34
|
+
total +=
|
|
35
|
+
typeof out.value === "string"
|
|
36
|
+
? out.value.length
|
|
37
|
+
: JSON.stringify(out.value ?? "").length;
|
|
38
|
+
} else {
|
|
39
|
+
total += JSON.stringify(b).length;
|
|
61
40
|
}
|
|
62
|
-
return total;
|
|
63
41
|
}
|
|
64
|
-
return
|
|
42
|
+
return total;
|
|
65
43
|
}
|
|
66
44
|
|
|
67
45
|
/**
|
|
68
|
-
* Truncate individual tool results that are excessively large.
|
|
69
|
-
* Mutates messages in-place.
|
|
46
|
+
* Truncate individual tool results that are excessively large. Mutates in-place.
|
|
70
47
|
*/
|
|
71
|
-
function truncateToolResults(messages:
|
|
48
|
+
function truncateToolResults(messages: ModelMessage[]): void {
|
|
72
49
|
for (const msg of messages) {
|
|
50
|
+
if (msg.role !== "tool") continue;
|
|
73
51
|
if (!Array.isArray(msg.content)) continue;
|
|
74
52
|
for (const block of msg.content) {
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
)
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
53
|
+
const b = block as {
|
|
54
|
+
type?: string;
|
|
55
|
+
output?: { type?: string; value?: unknown };
|
|
56
|
+
};
|
|
57
|
+
if (b.type !== "tool-result" || !b.output) continue;
|
|
58
|
+
const out = b.output;
|
|
59
|
+
if (typeof out.value !== "string") continue;
|
|
60
|
+
if (out.value.length <= MAX_TOOL_RESULT_CHARS) continue;
|
|
61
|
+
const original = out.value.length;
|
|
62
|
+
out.value =
|
|
63
|
+
out.value.slice(0, MAX_TOOL_RESULT_CHARS) +
|
|
64
|
+
`\n\n[truncated: ${original} chars → ${MAX_TOOL_RESULT_CHARS} chars]`;
|
|
87
65
|
}
|
|
88
66
|
}
|
|
89
67
|
}
|
|
90
68
|
|
|
91
69
|
/**
|
|
92
70
|
* Ensure the conversation fits within the context window.
|
|
93
|
-
*
|
|
94
|
-
*
|
|
95
|
-
*
|
|
96
|
-
* (keeping the first user message and recent messages)
|
|
97
|
-
*
|
|
98
|
-
* Mutates messages in-place and returns the array.
|
|
71
|
+
* 1) Truncate oversized tool results in place.
|
|
72
|
+
* 2) If still too large, drop oldest messages from the middle (keeping the
|
|
73
|
+
* first user message and recent messages).
|
|
99
74
|
*/
|
|
100
75
|
export function fitToContextWindow(
|
|
101
|
-
messages:
|
|
76
|
+
messages: ModelMessage[],
|
|
102
77
|
systemPrompt: string,
|
|
103
78
|
maxInputTokens: number,
|
|
104
|
-
):
|
|
105
|
-
// Step 1: truncate oversized tool results
|
|
79
|
+
): ModelMessage[] {
|
|
106
80
|
truncateToolResults(messages);
|
|
107
81
|
|
|
108
|
-
// Step 2: estimate total tokens
|
|
109
82
|
const systemTokens = estimateTokens(systemPrompt);
|
|
110
|
-
const responseBuffer = 4096;
|
|
83
|
+
const responseBuffer = 4096;
|
|
111
84
|
const headroom = Math.ceil(maxInputTokens * HEADROOM_FRACTION);
|
|
112
85
|
|
|
113
86
|
const budget = maxInputTokens - systemTokens - responseBuffer - headroom;
|
|
@@ -121,16 +94,11 @@ export function fitToContextWindow(
|
|
|
121
94
|
let totalChars = messages.reduce((sum, m) => sum + messageChars(m), 0);
|
|
122
95
|
let totalTokens = Math.ceil(totalChars / CHARS_PER_TOKEN);
|
|
123
96
|
|
|
124
|
-
if (totalTokens <= budget)
|
|
125
|
-
return messages;
|
|
126
|
-
}
|
|
97
|
+
if (totalTokens <= budget) return messages;
|
|
127
98
|
|
|
128
|
-
// Step 3: drop oldest message pairs from the middle until we fit.
|
|
129
|
-
// Keep messages[0] (initial user message) and remove from index 1 onward.
|
|
130
99
|
let dropped = 0;
|
|
131
100
|
while (totalTokens > budget && messages.length > 2) {
|
|
132
|
-
|
|
133
|
-
const removed = messages.splice(1, 1)[0] as MessageParam;
|
|
101
|
+
const removed = messages.splice(1, 1)[0] as ModelMessage;
|
|
134
102
|
totalChars -= messageChars(removed);
|
|
135
103
|
totalTokens = Math.ceil(totalChars / CHARS_PER_TOKEN);
|
|
136
104
|
dropped++;
|