botholomew 0.18.7 → 0.19.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +56 -2
- package/package.json +12 -9
- package/src/chat/agent.ts +175 -181
- package/src/chat/session.ts +30 -31
- package/src/chat/usage.ts +19 -20
- package/src/commands/init.ts +20 -0
- package/src/config/loader.ts +79 -10
- package/src/config/schemas.ts +48 -22
- package/src/init/index.ts +12 -5
- package/src/init/templates.ts +45 -4
- package/src/llm/abort.ts +9 -0
- package/src/llm/cache-control.ts +65 -0
- package/src/llm/capabilities.ts +155 -0
- package/src/llm/error-format.ts +95 -0
- package/src/llm/fake.ts +226 -0
- package/src/llm/index.ts +19 -0
- package/src/llm/provider-options.ts +29 -0
- package/src/llm/provider.ts +65 -0
- package/src/llm/tools.ts +24 -0
- package/src/llm/types.ts +20 -0
- package/src/llm/usage.ts +33 -0
- package/src/prompts/capabilities.ts +72 -108
- package/src/tools/tool.ts +2 -22
- package/src/tui/hooks/useMessageQueue.ts +2 -1
- package/src/utils/title.ts +21 -22
- package/src/worker/context.ts +45 -77
- package/src/worker/llm.ts +147 -112
- package/src/worker/prompt.ts +1 -1
- package/src/worker/schedules.ts +43 -54
- package/src/worker/tick.ts +3 -3
- package/src/worker/fake-llm.ts +0 -277
- package/src/worker/llm-client.ts +0 -12
|
@@ -1,8 +1,15 @@
|
|
|
1
1
|
import { join } from "node:path";
|
|
2
|
-
import Anthropic from "@anthropic-ai/sdk";
|
|
3
2
|
import type { McpxClient } from "@evantahler/mcpx";
|
|
3
|
+
import { generateObject } from "ai";
|
|
4
|
+
import { z } from "zod";
|
|
4
5
|
import type { BotholomewConfig } from "../config/schemas.ts";
|
|
5
6
|
import { getPromptsDir } from "../constants.ts";
|
|
7
|
+
import {
|
|
8
|
+
buildProviderOptions,
|
|
9
|
+
formatLlmError,
|
|
10
|
+
getLanguageModel,
|
|
11
|
+
getMaxInputTokens,
|
|
12
|
+
} from "../llm/index.ts";
|
|
6
13
|
import { getAllTools, type ToolDefinition } from "../tools/tool.ts";
|
|
7
14
|
import {
|
|
8
15
|
type ContextFileMeta,
|
|
@@ -14,7 +21,6 @@ import { logger } from "../utils/logger.ts";
|
|
|
14
21
|
export const CAPABILITIES_FILENAME = "capabilities.md";
|
|
15
22
|
|
|
16
23
|
// LLM config — summarization is one call per refresh, no streaming needed.
|
|
17
|
-
const SUMMARIZE_TIMEOUT_MS = 30_000;
|
|
18
24
|
const SUMMARIZE_MAX_TOKENS = 4096;
|
|
19
25
|
|
|
20
26
|
// biome-ignore lint/suspicious/noExplicitAny: Zod-free tool schema for Anthropic SDK
|
|
@@ -142,71 +148,34 @@ interface SummarizedCapabilities {
|
|
|
142
148
|
mcpx_servers: ServerThemes[];
|
|
143
149
|
}
|
|
144
150
|
|
|
145
|
-
const
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
mcpx_servers: {
|
|
174
|
-
type: "array",
|
|
175
|
-
description:
|
|
176
|
-
"MCPX tools grouped by their source server. Within each server, split into themes only when the server exposes distinct services (e.g. Gmail + Google Calendar on one server).",
|
|
177
|
-
items: {
|
|
178
|
-
type: "object",
|
|
179
|
-
properties: {
|
|
180
|
-
server: {
|
|
181
|
-
type: "string",
|
|
182
|
-
description: "Server name exactly as given in the inventory.",
|
|
183
|
-
},
|
|
184
|
-
themes: {
|
|
185
|
-
type: "array",
|
|
186
|
-
items: {
|
|
187
|
-
type: "object",
|
|
188
|
-
properties: {
|
|
189
|
-
name: {
|
|
190
|
-
type: "string",
|
|
191
|
-
description: "Theme name (usually the service, e.g. Gmail)",
|
|
192
|
-
},
|
|
193
|
-
summary: {
|
|
194
|
-
type: "string",
|
|
195
|
-
description:
|
|
196
|
-
"One sentence with concrete action verbs. No tool names.",
|
|
197
|
-
},
|
|
198
|
-
},
|
|
199
|
-
required: ["name", "summary"],
|
|
200
|
-
},
|
|
201
|
-
},
|
|
202
|
-
},
|
|
203
|
-
required: ["server", "themes"],
|
|
204
|
-
},
|
|
205
|
-
},
|
|
206
|
-
},
|
|
207
|
-
required: ["internal_themes", "mcpx_servers"],
|
|
208
|
-
},
|
|
209
|
-
};
|
|
151
|
+
const ThemeSchema = z.object({
|
|
152
|
+
name: z.string().describe("Short theme name (2-4 words)."),
|
|
153
|
+
summary: z
|
|
154
|
+
.string()
|
|
155
|
+
.describe(
|
|
156
|
+
"One sentence with concrete action verbs. No tool names. No preamble.",
|
|
157
|
+
),
|
|
158
|
+
});
|
|
159
|
+
|
|
160
|
+
const SummarySchema = z.object({
|
|
161
|
+
internal_themes: z
|
|
162
|
+
.array(ThemeSchema)
|
|
163
|
+
.describe(
|
|
164
|
+
"Themes covering the agent's built-in tools (task queue, files & sandbox, search, threads, MCPX meta-tools, workers, self-reflection, etc.).",
|
|
165
|
+
),
|
|
166
|
+
mcpx_servers: z
|
|
167
|
+
.array(
|
|
168
|
+
z.object({
|
|
169
|
+
server: z
|
|
170
|
+
.string()
|
|
171
|
+
.describe("Server name exactly as given in the inventory."),
|
|
172
|
+
themes: z.array(ThemeSchema),
|
|
173
|
+
}),
|
|
174
|
+
)
|
|
175
|
+
.describe(
|
|
176
|
+
"MCPX tools grouped by their source server. Within each server, split into themes only when the server exposes distinct services.",
|
|
177
|
+
),
|
|
178
|
+
});
|
|
210
179
|
|
|
211
180
|
function renderInventoryForPrompt(inv: RawInventory): string {
|
|
212
181
|
const sections: string[] = [];
|
|
@@ -255,42 +224,42 @@ BAD examples (do not produce):
|
|
|
255
224
|
"Provides access to Gmail operations via tools like Gmail_SendEmail..."
|
|
256
225
|
"Tools for working with email"`;
|
|
257
226
|
|
|
227
|
+
function hasUsableCreds(config: BotholomewConfig): boolean {
|
|
228
|
+
const cfg = config.chunker_llm;
|
|
229
|
+
if (cfg.provider === "anthropic") {
|
|
230
|
+
return !!cfg.api_key && cfg.api_key !== "your-api-key-here";
|
|
231
|
+
}
|
|
232
|
+
if (cfg.provider === "openai-compatible") {
|
|
233
|
+
return !!cfg.base_url;
|
|
234
|
+
}
|
|
235
|
+
// ollama: no credentials required, assume reachable.
|
|
236
|
+
return true;
|
|
237
|
+
}
|
|
238
|
+
|
|
258
239
|
async function summarizeViaLLM(
|
|
259
240
|
inv: RawInventory,
|
|
260
|
-
config:
|
|
241
|
+
config: BotholomewConfig,
|
|
261
242
|
): Promise<SummarizedCapabilities | null> {
|
|
262
|
-
if (
|
|
263
|
-
!config.anthropic_api_key ||
|
|
264
|
-
config.anthropic_api_key === "your-api-key-here"
|
|
265
|
-
) {
|
|
266
|
-
return null;
|
|
267
|
-
}
|
|
243
|
+
if (!hasUsableCreds(config)) return null;
|
|
268
244
|
|
|
269
|
-
const
|
|
270
|
-
const userPrompt = `Summarize this tool inventory. Return via the \`${SUMMARIZE_TOOL_NAME}\` tool.\n\n${renderInventoryForPrompt(inv)}`;
|
|
245
|
+
const userPrompt = `Summarize this tool inventory.\n\n${renderInventoryForPrompt(inv)}`;
|
|
271
246
|
|
|
272
247
|
try {
|
|
273
|
-
const
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
const toolBlock = response.content.find((b) => b.type === "tool_use");
|
|
286
|
-
if (!toolBlock || toolBlock.type !== "tool_use") return null;
|
|
287
|
-
|
|
288
|
-
const input = toolBlock.input as SummarizedCapabilities;
|
|
289
|
-
if (!Array.isArray(input.internal_themes)) return null;
|
|
290
|
-
if (!Array.isArray(input.mcpx_servers)) return null;
|
|
291
|
-
return input;
|
|
248
|
+
const model = getLanguageModel(config.chunker_llm);
|
|
249
|
+
const numCtx = await getMaxInputTokens(config.chunker_llm);
|
|
250
|
+
const { object } = await generateObject({
|
|
251
|
+
model,
|
|
252
|
+
schema: SummarySchema,
|
|
253
|
+
system: SUMMARIZE_SYSTEM,
|
|
254
|
+
prompt: userPrompt,
|
|
255
|
+
maxOutputTokens: SUMMARIZE_MAX_TOKENS,
|
|
256
|
+
providerOptions: buildProviderOptions(config.chunker_llm, numCtx),
|
|
257
|
+
});
|
|
258
|
+
return object;
|
|
292
259
|
} catch (err) {
|
|
293
|
-
logger.debug(
|
|
260
|
+
logger.debug(
|
|
261
|
+
`Capability summarization failed: ${formatLlmError(err, config.chunker_llm)}`,
|
|
262
|
+
);
|
|
294
263
|
return null;
|
|
295
264
|
}
|
|
296
265
|
}
|
|
@@ -404,7 +373,7 @@ function renderFallback(inv: RawInventory, now: Date): string {
|
|
|
404
373
|
);
|
|
405
374
|
} else {
|
|
406
375
|
parts.push(
|
|
407
|
-
"_(LLM summarization unavailable — set `
|
|
376
|
+
"_(LLM summarization unavailable — set `llm.api_key` (or `llm.base_url` for local providers) and rerun to generate themed summaries. Until then, use `mcp_list_tools` with each server to see what's exposed.)_",
|
|
408
377
|
);
|
|
409
378
|
parts.push("");
|
|
410
379
|
const servers = [...inv.mcpByServer.keys()].sort();
|
|
@@ -418,29 +387,24 @@ function renderFallback(inv: RawInventory, now: Date): string {
|
|
|
418
387
|
}
|
|
419
388
|
|
|
420
389
|
/**
|
|
421
|
-
* Build the body of capabilities.md. When
|
|
422
|
-
*
|
|
423
|
-
* static fallback listing is rendered.
|
|
390
|
+
* Build the body of capabilities.md. When the configured chunker LLM has
|
|
391
|
+
* usable credentials, the model is asked to produce thematic summaries.
|
|
392
|
+
* Otherwise (or on failure) a static fallback listing is rendered.
|
|
424
393
|
*/
|
|
425
394
|
export async function generateCapabilitiesMarkdown(
|
|
426
395
|
mcpxClient: McpxClient | null,
|
|
427
|
-
config:
|
|
396
|
+
config: BotholomewConfig,
|
|
428
397
|
now: Date = new Date(),
|
|
429
398
|
onPhase?: ProgressCallback,
|
|
430
399
|
): Promise<GenerateResult> {
|
|
431
400
|
const inv = await collectInventory(mcpxClient, onPhase);
|
|
432
401
|
|
|
433
|
-
// Don't call the LLM when the inventory is empty / broken — the fallback
|
|
434
|
-
// conveys the same information and avoids an unnecessary API round trip.
|
|
435
402
|
const hasAnythingToSummarize =
|
|
436
403
|
inv.mcpByServer.size > 0 || inv.internalTotal > 0;
|
|
437
404
|
|
|
438
405
|
let summary: SummarizedCapabilities | null = null;
|
|
439
406
|
if (hasAnythingToSummarize) {
|
|
440
|
-
|
|
441
|
-
config.anthropic_api_key &&
|
|
442
|
-
config.anthropic_api_key !== "your-api-key-here";
|
|
443
|
-
if (canSummarize) {
|
|
407
|
+
if (hasUsableCreds(config)) {
|
|
444
408
|
onPhase?.(
|
|
445
409
|
`Summarizing ${inv.internalTotal} internal + ${inv.mcpTotal} MCPX tools`,
|
|
446
410
|
);
|
|
@@ -472,7 +436,7 @@ export interface WriteResult {
|
|
|
472
436
|
export async function writeCapabilitiesFile(
|
|
473
437
|
projectDir: string,
|
|
474
438
|
mcpxClient: McpxClient | null,
|
|
475
|
-
config:
|
|
439
|
+
config: BotholomewConfig,
|
|
476
440
|
onPhase?: ProgressCallback,
|
|
477
441
|
): Promise<WriteResult> {
|
|
478
442
|
const filePath = join(getPromptsDir(projectDir), CAPABILITIES_FILENAME);
|
package/src/tools/tool.ts
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
|
-
import type { Tool as AnthropicTool } from "@anthropic-ai/sdk/resources/messages";
|
|
2
1
|
import type { McpxClient } from "@evantahler/mcpx";
|
|
3
|
-
import { z } from "zod";
|
|
2
|
+
import type { z } from "zod";
|
|
4
3
|
import type { BotholomewConfig } from "../config/schemas.ts";
|
|
5
4
|
import type { WithMem } from "../mem/client.ts";
|
|
6
5
|
|
|
@@ -14,7 +13,7 @@ export interface ToolContext {
|
|
|
14
13
|
*/
|
|
15
14
|
withMem: WithMem;
|
|
16
15
|
projectDir: string;
|
|
17
|
-
config:
|
|
16
|
+
config: BotholomewConfig;
|
|
18
17
|
mcpxClient: McpxClient | null;
|
|
19
18
|
/**
|
|
20
19
|
* Identifier of the agent process running this tool, used as the holder
|
|
@@ -84,22 +83,3 @@ export function getAllTools(): AnyToolDefinition[] {
|
|
|
84
83
|
export function getToolsByGroup(group: string): AnyToolDefinition[] {
|
|
85
84
|
return getAllTools().filter((t) => t.group === group);
|
|
86
85
|
}
|
|
87
|
-
|
|
88
|
-
// --- Anthropic adapter ---
|
|
89
|
-
|
|
90
|
-
export function toAnthropicTool(tool: AnyToolDefinition): AnthropicTool {
|
|
91
|
-
const jsonSchema = z.toJSONSchema(tool.inputSchema);
|
|
92
|
-
return {
|
|
93
|
-
name: tool.name,
|
|
94
|
-
description: tool.description,
|
|
95
|
-
input_schema: {
|
|
96
|
-
type: "object" as const,
|
|
97
|
-
properties: jsonSchema.properties ?? {},
|
|
98
|
-
required: jsonSchema.required as string[] | undefined,
|
|
99
|
-
},
|
|
100
|
-
};
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
export function toAnthropicTools(): AnthropicTool[] {
|
|
104
|
-
return getAllTools().map(toAnthropicTool);
|
|
105
|
-
}
|
|
@@ -222,10 +222,11 @@ export function useMessageQueue({
|
|
|
222
222
|
}
|
|
223
223
|
finalizeSegment();
|
|
224
224
|
} catch (err) {
|
|
225
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
225
226
|
const errorMsg: ChatMessage = {
|
|
226
227
|
id: msgId(),
|
|
227
228
|
role: "system",
|
|
228
|
-
content: `Error: ${
|
|
229
|
+
content: `Error: ${message}`,
|
|
229
230
|
timestamp: new Date(),
|
|
230
231
|
};
|
|
231
232
|
setMessages((prev) => [...prev, errorMsg]);
|
package/src/utils/title.ts
CHANGED
|
@@ -1,45 +1,44 @@
|
|
|
1
|
+
import { generateText } from "ai";
|
|
1
2
|
import type { BotholomewConfig } from "../config/schemas.ts";
|
|
3
|
+
import {
|
|
4
|
+
buildProviderOptions,
|
|
5
|
+
formatLlmError,
|
|
6
|
+
getLanguageModel,
|
|
7
|
+
getMaxInputTokens,
|
|
8
|
+
} from "../llm/index.ts";
|
|
2
9
|
import { updateThreadTitle } from "../threads/store.ts";
|
|
3
|
-
import { createLlmClient } from "../worker/llm-client.ts";
|
|
4
10
|
import { logger } from "./logger.ts";
|
|
5
11
|
|
|
6
12
|
/**
|
|
7
|
-
* Generate a short title for a thread using the chunker model
|
|
8
|
-
* Fire-and-forget — errors are logged and never propagated.
|
|
9
|
-
* title back to the thread's CSV file by rewriting the thread_meta row.
|
|
13
|
+
* Generate a short title for a thread using the chunker model.
|
|
14
|
+
* Fire-and-forget — errors are logged and never propagated.
|
|
10
15
|
*/
|
|
11
16
|
export async function generateThreadTitle(
|
|
12
|
-
config:
|
|
17
|
+
config: BotholomewConfig,
|
|
13
18
|
projectDir: string,
|
|
14
19
|
threadId: string,
|
|
15
20
|
context: string,
|
|
16
21
|
): Promise<void> {
|
|
17
22
|
try {
|
|
18
|
-
const
|
|
23
|
+
const model = getLanguageModel(config.chunker_llm);
|
|
24
|
+
const numCtx = await getMaxInputTokens(config.chunker_llm);
|
|
19
25
|
|
|
20
|
-
const
|
|
21
|
-
model
|
|
22
|
-
|
|
26
|
+
const { text } = await generateText({
|
|
27
|
+
model,
|
|
28
|
+
maxOutputTokens: 50,
|
|
23
29
|
system:
|
|
24
30
|
"You are a title generator. The user will provide the first message from a conversation. Output a short descriptive title (5-8 words). Output ONLY the title, nothing else.",
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
role: "user",
|
|
28
|
-
content: `Generate a title for this message:\n\n"${context}"`,
|
|
29
|
-
},
|
|
30
|
-
],
|
|
31
|
+
prompt: `Generate a title for this message:\n\n"${context}"`,
|
|
32
|
+
providerOptions: buildProviderOptions(config.chunker_llm, numCtx),
|
|
31
33
|
});
|
|
32
34
|
|
|
33
|
-
const title =
|
|
34
|
-
.filter((b) => b.type === "text")
|
|
35
|
-
.map((b) => b.text)
|
|
36
|
-
.join("")
|
|
37
|
-
.trim();
|
|
38
|
-
|
|
35
|
+
const title = text.trim();
|
|
39
36
|
if (title) {
|
|
40
37
|
await updateThreadTitle(projectDir, threadId, title);
|
|
41
38
|
}
|
|
42
39
|
} catch (err) {
|
|
43
|
-
logger.warn(
|
|
40
|
+
logger.warn(
|
|
41
|
+
`Failed to generate thread title: ${formatLlmError(err, config.chunker_llm)}`,
|
|
42
|
+
);
|
|
44
43
|
}
|
|
45
44
|
}
|
package/src/worker/context.ts
CHANGED
|
@@ -1,113 +1,86 @@
|
|
|
1
|
-
import
|
|
2
|
-
import type {
|
|
1
|
+
import type { ModelMessage } from "ai";
|
|
2
|
+
import type { LlmBlock } from "../config/schemas.ts";
|
|
3
|
+
import { getMaxInputTokens as llmGetMaxInputTokens } from "../llm/index.ts";
|
|
3
4
|
import { logger } from "../utils/logger.ts";
|
|
4
5
|
|
|
5
6
|
/** Rough estimate: ~4 characters per token for English text */
|
|
6
7
|
const CHARS_PER_TOKEN = 4;
|
|
7
8
|
|
|
8
|
-
/** Fallback if the models API call fails */
|
|
9
|
-
const DEFAULT_MAX_INPUT_TOKENS = 200_000;
|
|
10
|
-
|
|
11
9
|
/** Reserve this fraction of the context window for safety margin */
|
|
12
10
|
const HEADROOM_FRACTION = 0.1;
|
|
13
11
|
|
|
14
12
|
/** Maximum characters for a single tool result before truncation */
|
|
15
13
|
const MAX_TOOL_RESULT_CHARS = 50_000;
|
|
16
14
|
|
|
17
|
-
/**
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
/**
|
|
21
|
-
* Look up the model's max input tokens via the Anthropic Models API.
|
|
22
|
-
* Results are cached per model ID for the lifetime of the process.
|
|
23
|
-
*/
|
|
24
|
-
export async function getMaxInputTokens(
|
|
25
|
-
apiKey: string | undefined,
|
|
26
|
-
model: string,
|
|
27
|
-
): Promise<number> {
|
|
28
|
-
const cached = modelTokenCache.get(model);
|
|
29
|
-
if (cached !== undefined) return cached;
|
|
30
|
-
|
|
31
|
-
try {
|
|
32
|
-
const client = new Anthropic({ apiKey: apiKey || undefined });
|
|
33
|
-
const info = await client.beta.models.retrieve(model);
|
|
34
|
-
const limit = info.max_input_tokens ?? DEFAULT_MAX_INPUT_TOKENS;
|
|
35
|
-
modelTokenCache.set(model, limit);
|
|
36
|
-
return limit;
|
|
37
|
-
} catch (err) {
|
|
38
|
-
logger.debug(`Failed to retrieve model info for ${model}: ${err}`);
|
|
39
|
-
modelTokenCache.set(model, DEFAULT_MAX_INPUT_TOKENS);
|
|
40
|
-
return DEFAULT_MAX_INPUT_TOKENS;
|
|
41
|
-
}
|
|
15
|
+
/** Re-export so call sites have a single entry point. */
|
|
16
|
+
export function getMaxInputTokens(cfg: LlmBlock): Promise<number> {
|
|
17
|
+
return llmGetMaxInputTokens(cfg);
|
|
42
18
|
}
|
|
43
19
|
|
|
44
20
|
function estimateTokens(text: string): number {
|
|
45
21
|
return Math.ceil(text.length / CHARS_PER_TOKEN);
|
|
46
22
|
}
|
|
47
23
|
|
|
48
|
-
function messageChars(msg:
|
|
24
|
+
function messageChars(msg: ModelMessage): number {
|
|
49
25
|
if (typeof msg.content === "string") return msg.content.length;
|
|
50
|
-
if (Array.isArray(msg.content))
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
26
|
+
if (!Array.isArray(msg.content)) return 0;
|
|
27
|
+
let total = 0;
|
|
28
|
+
for (const block of msg.content) {
|
|
29
|
+
const b = block as Record<string, unknown>;
|
|
30
|
+
if (typeof b.text === "string") {
|
|
31
|
+
total += b.text.length;
|
|
32
|
+
} else if (b.type === "tool-result" && typeof b.output === "object") {
|
|
33
|
+
const out = b.output as { value?: unknown };
|
|
34
|
+
total +=
|
|
35
|
+
typeof out.value === "string"
|
|
36
|
+
? out.value.length
|
|
37
|
+
: JSON.stringify(out.value ?? "").length;
|
|
38
|
+
} else {
|
|
39
|
+
total += JSON.stringify(b).length;
|
|
61
40
|
}
|
|
62
|
-
return total;
|
|
63
41
|
}
|
|
64
|
-
return
|
|
42
|
+
return total;
|
|
65
43
|
}
|
|
66
44
|
|
|
67
45
|
/**
|
|
68
|
-
* Truncate individual tool results that are excessively large.
|
|
69
|
-
* Mutates messages in-place.
|
|
46
|
+
* Truncate individual tool results that are excessively large. Mutates in-place.
|
|
70
47
|
*/
|
|
71
|
-
function truncateToolResults(messages:
|
|
48
|
+
function truncateToolResults(messages: ModelMessage[]): void {
|
|
72
49
|
for (const msg of messages) {
|
|
50
|
+
if (msg.role !== "tool") continue;
|
|
73
51
|
if (!Array.isArray(msg.content)) continue;
|
|
74
52
|
for (const block of msg.content) {
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
)
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
53
|
+
const b = block as {
|
|
54
|
+
type?: string;
|
|
55
|
+
output?: { type?: string; value?: unknown };
|
|
56
|
+
};
|
|
57
|
+
if (b.type !== "tool-result" || !b.output) continue;
|
|
58
|
+
const out = b.output;
|
|
59
|
+
if (typeof out.value !== "string") continue;
|
|
60
|
+
if (out.value.length <= MAX_TOOL_RESULT_CHARS) continue;
|
|
61
|
+
const original = out.value.length;
|
|
62
|
+
out.value =
|
|
63
|
+
out.value.slice(0, MAX_TOOL_RESULT_CHARS) +
|
|
64
|
+
`\n\n[truncated: ${original} chars → ${MAX_TOOL_RESULT_CHARS} chars]`;
|
|
87
65
|
}
|
|
88
66
|
}
|
|
89
67
|
}
|
|
90
68
|
|
|
91
69
|
/**
|
|
92
70
|
* Ensure the conversation fits within the context window.
|
|
93
|
-
*
|
|
94
|
-
*
|
|
95
|
-
*
|
|
96
|
-
* (keeping the first user message and recent messages)
|
|
97
|
-
*
|
|
98
|
-
* Mutates messages in-place and returns the array.
|
|
71
|
+
* 1) Truncate oversized tool results in place.
|
|
72
|
+
* 2) If still too large, drop oldest messages from the middle (keeping the
|
|
73
|
+
* first user message and recent messages).
|
|
99
74
|
*/
|
|
100
75
|
export function fitToContextWindow(
|
|
101
|
-
messages:
|
|
76
|
+
messages: ModelMessage[],
|
|
102
77
|
systemPrompt: string,
|
|
103
78
|
maxInputTokens: number,
|
|
104
|
-
):
|
|
105
|
-
// Step 1: truncate oversized tool results
|
|
79
|
+
): ModelMessage[] {
|
|
106
80
|
truncateToolResults(messages);
|
|
107
81
|
|
|
108
|
-
// Step 2: estimate total tokens
|
|
109
82
|
const systemTokens = estimateTokens(systemPrompt);
|
|
110
|
-
const responseBuffer = 4096;
|
|
83
|
+
const responseBuffer = 4096;
|
|
111
84
|
const headroom = Math.ceil(maxInputTokens * HEADROOM_FRACTION);
|
|
112
85
|
|
|
113
86
|
const budget = maxInputTokens - systemTokens - responseBuffer - headroom;
|
|
@@ -121,16 +94,11 @@ export function fitToContextWindow(
|
|
|
121
94
|
let totalChars = messages.reduce((sum, m) => sum + messageChars(m), 0);
|
|
122
95
|
let totalTokens = Math.ceil(totalChars / CHARS_PER_TOKEN);
|
|
123
96
|
|
|
124
|
-
if (totalTokens <= budget)
|
|
125
|
-
return messages;
|
|
126
|
-
}
|
|
97
|
+
if (totalTokens <= budget) return messages;
|
|
127
98
|
|
|
128
|
-
// Step 3: drop oldest message pairs from the middle until we fit.
|
|
129
|
-
// Keep messages[0] (initial user message) and remove from index 1 onward.
|
|
130
99
|
let dropped = 0;
|
|
131
100
|
while (totalTokens > budget && messages.length > 2) {
|
|
132
|
-
|
|
133
|
-
const removed = messages.splice(1, 1)[0] as MessageParam;
|
|
101
|
+
const removed = messages.splice(1, 1)[0] as ModelMessage;
|
|
134
102
|
totalChars -= messageChars(removed);
|
|
135
103
|
totalTokens = Math.ceil(totalChars / CHARS_PER_TOKEN);
|
|
136
104
|
dropped++;
|