botholomew 0.18.7 → 0.19.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +56 -2
- package/package.json +12 -9
- package/src/chat/agent.ts +175 -181
- package/src/chat/session.ts +30 -31
- package/src/chat/usage.ts +19 -20
- package/src/commands/init.ts +20 -0
- package/src/config/loader.ts +50 -10
- package/src/config/schemas.ts +48 -22
- package/src/init/index.ts +12 -5
- package/src/init/templates.ts +45 -4
- package/src/llm/abort.ts +9 -0
- package/src/llm/cache-control.ts +65 -0
- package/src/llm/capabilities.ts +155 -0
- package/src/llm/error-format.ts +95 -0
- package/src/llm/fake.ts +226 -0
- package/src/llm/index.ts +19 -0
- package/src/llm/provider-options.ts +29 -0
- package/src/llm/provider.ts +65 -0
- package/src/llm/tools.ts +24 -0
- package/src/llm/types.ts +20 -0
- package/src/llm/usage.ts +33 -0
- package/src/prompts/capabilities.ts +72 -108
- package/src/tools/tool.ts +2 -22
- package/src/tui/hooks/useMessageQueue.ts +2 -1
- package/src/utils/title.ts +21 -22
- package/src/worker/context.ts +45 -77
- package/src/worker/llm.ts +147 -112
- package/src/worker/prompt.ts +1 -1
- package/src/worker/schedules.ts +43 -54
- package/src/worker/tick.ts +3 -3
- package/src/worker/fake-llm.ts +0 -277
- package/src/worker/llm-client.ts +0 -12
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
import type { LlmBlock } from "../config/schemas.ts";
|
|
2
|
+
import { logger } from "../utils/logger.ts";
|
|
3
|
+
import { BotholomewLlmError } from "./types.ts";
|
|
4
|
+
|
|
5
|
+
const DEFAULT_OLLAMA_BASE_URL = "http://localhost:11434";
|
|
6
|
+
|
|
7
|
+
/** Manually-curated max input tokens per known model. */
|
|
8
|
+
const KNOWN_CONTEXT_WINDOWS: Record<string, number> = {
|
|
9
|
+
// Anthropic
|
|
10
|
+
"claude-opus-4-6": 200_000,
|
|
11
|
+
"claude-opus-4-5": 200_000,
|
|
12
|
+
"claude-sonnet-4-5": 200_000,
|
|
13
|
+
"claude-haiku-4-5-20251001": 200_000,
|
|
14
|
+
// OpenAI
|
|
15
|
+
"gpt-4o": 128_000,
|
|
16
|
+
"gpt-4o-mini": 128_000,
|
|
17
|
+
"gpt-4-turbo": 128_000,
|
|
18
|
+
// Ollama (defaults — extendable per Modelfile)
|
|
19
|
+
"llama3.1:8b": 128_000,
|
|
20
|
+
"llama3.1:70b": 128_000,
|
|
21
|
+
"qwen2.5:7b": 32_000,
|
|
22
|
+
"qwen2.5:3b": 32_000,
|
|
23
|
+
"mistral-nemo": 128_000,
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
const FALLBACK_BY_PROVIDER: Record<LlmBlock["provider"], number> = {
|
|
27
|
+
anthropic: 200_000,
|
|
28
|
+
// Ollama allocates KV cache up front, so a generous default eats RAM on
|
|
29
|
+
// local machines. 16K covers Botholomew's system prompt + tool schemas +
|
|
30
|
+
// a reasonable conversation; raise via `llm.max_input_tokens` if needed.
|
|
31
|
+
ollama: 16_000,
|
|
32
|
+
"openai-compatible": 32_000,
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
const tokenCache = new Map<string, number>();
|
|
36
|
+
const toolSupportCache = new Map<string, boolean>();
|
|
37
|
+
|
|
38
|
+
const TOOL_CAPABLE_HINT = `Try one of these tool-capable models:
|
|
39
|
+
Anthropic: claude-opus-4-6, claude-sonnet-4-5, claude-haiku-4-5
|
|
40
|
+
Ollama (local): llama3.1:8b, qwen2.5:7b, mistral-nemo, command-r
|
|
41
|
+
OpenAI-compatible: gpt-4o, gpt-4o-mini, or any function-calling model
|
|
42
|
+
|
|
43
|
+
Update \`llm.model\` in your botholomew config. If you believe this model
|
|
44
|
+
*does* support tools but the probe is wrong, set \`llm.supports_tools: true\`
|
|
45
|
+
to override.`;
|
|
46
|
+
|
|
47
|
+
function cacheKey(cfg: LlmBlock): string {
|
|
48
|
+
return `${cfg.provider}:${cfg.model}:${cfg.base_url ?? ""}`;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function ollamaBaseUrl(cfg: LlmBlock): string {
|
|
52
|
+
return (cfg.base_url || DEFAULT_OLLAMA_BASE_URL).replace(/\/+$/, "");
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
interface OllamaShowResponse {
|
|
56
|
+
capabilities?: string[];
|
|
57
|
+
model_info?: Record<string, unknown>;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
async function ollamaShow(cfg: LlmBlock): Promise<OllamaShowResponse | null> {
|
|
61
|
+
try {
|
|
62
|
+
const response = await fetch(`${ollamaBaseUrl(cfg)}/api/show`, {
|
|
63
|
+
method: "POST",
|
|
64
|
+
headers: { "Content-Type": "application/json" },
|
|
65
|
+
body: JSON.stringify({ model: cfg.model }),
|
|
66
|
+
});
|
|
67
|
+
if (!response.ok) return null;
|
|
68
|
+
return (await response.json()) as OllamaShowResponse;
|
|
69
|
+
} catch (err) {
|
|
70
|
+
logger.debug(`Ollama /api/show failed: ${err}`);
|
|
71
|
+
return null;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Throws `BotholomewLlmError("no_tool_support", ...)` if the configured model
|
|
77
|
+
* cannot call tools. Memoized per `provider:model:base_url`.
|
|
78
|
+
*/
|
|
79
|
+
export async function assertToolCapable(cfg: LlmBlock): Promise<void> {
|
|
80
|
+
const key = cacheKey(cfg);
|
|
81
|
+
const cached = toolSupportCache.get(key);
|
|
82
|
+
if (cached === true) return;
|
|
83
|
+
if (cached === false) {
|
|
84
|
+
throw makeNoToolError(cfg);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
let supported = false;
|
|
88
|
+
switch (cfg.provider) {
|
|
89
|
+
case "anthropic":
|
|
90
|
+
supported = true;
|
|
91
|
+
break;
|
|
92
|
+
case "openai-compatible":
|
|
93
|
+
supported = cfg.supports_tools !== false;
|
|
94
|
+
break;
|
|
95
|
+
case "ollama": {
|
|
96
|
+
const show = await ollamaShow(cfg);
|
|
97
|
+
if (show?.capabilities?.includes("tools")) {
|
|
98
|
+
supported = true;
|
|
99
|
+
} else if (show == null) {
|
|
100
|
+
// Probe failed — fall back to the manual override (default false).
|
|
101
|
+
supported = cfg.supports_tools === true;
|
|
102
|
+
} else {
|
|
103
|
+
supported = false;
|
|
104
|
+
}
|
|
105
|
+
break;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
toolSupportCache.set(key, supported);
|
|
110
|
+
if (!supported) throw makeNoToolError(cfg);
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
function makeNoToolError(cfg: LlmBlock): BotholomewLlmError {
|
|
114
|
+
return new BotholomewLlmError(
|
|
115
|
+
"no_tool_support",
|
|
116
|
+
`Model "${cfg.model}" (${cfg.provider}) does not support tool/function calling, which Botholomew requires.\n\n${TOOL_CAPABLE_HINT}`,
|
|
117
|
+
);
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Resolve max input tokens for the given model. Lookup order:
|
|
122
|
+
* 1. `cfg.max_input_tokens` override
|
|
123
|
+
* 2. Ollama `/api/show` `model_info.<arch>.context_length`
|
|
124
|
+
* 3. Hardcoded KNOWN_CONTEXT_WINDOWS table
|
|
125
|
+
* 4. Provider-level fallback
|
|
126
|
+
*/
|
|
127
|
+
export async function getMaxInputTokens(cfg: LlmBlock): Promise<number> {
|
|
128
|
+
if (cfg.max_input_tokens && cfg.max_input_tokens > 0) {
|
|
129
|
+
return cfg.max_input_tokens;
|
|
130
|
+
}
|
|
131
|
+
const key = cacheKey(cfg);
|
|
132
|
+
const cached = tokenCache.get(key);
|
|
133
|
+
if (cached !== undefined) return cached;
|
|
134
|
+
|
|
135
|
+
let resolved: number | null = null;
|
|
136
|
+
|
|
137
|
+
// For Ollama, skip /api/show — it reports the model's *maximum* (often
|
|
138
|
+
// 128K+), but Ollama allocates KV cache for the full num_ctx up front.
|
|
139
|
+
// Use the provider fallback so local Macs don't OOM. Users who want more
|
|
140
|
+
// can set `llm.max_input_tokens` explicitly.
|
|
141
|
+
if (cfg.provider !== "ollama") {
|
|
142
|
+
const fromTable = KNOWN_CONTEXT_WINDOWS[cfg.model];
|
|
143
|
+
if (fromTable && fromTable > 0) resolved = fromTable;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
if (resolved == null) {
|
|
147
|
+
resolved = FALLBACK_BY_PROVIDER[cfg.provider];
|
|
148
|
+
logger.debug(
|
|
149
|
+
`Falling back to default context window (${resolved}) for ${cfg.provider}:${cfg.model}. Set \`llm.max_input_tokens\` to override.`,
|
|
150
|
+
);
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
tokenCache.set(key, resolved);
|
|
154
|
+
return resolved;
|
|
155
|
+
}
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
import { APICallError } from "@ai-sdk/provider";
|
|
2
|
+
import type { LlmBlock } from "../config/schemas.ts";
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Turn an unknown error from the AI SDK / fetch / provider into a short,
|
|
6
|
+
* user-friendly string. Used by every LLM call site (chat, worker, title
|
|
7
|
+
* generator, schedule evaluator, capability summarizer) so the TUI / logs
|
|
8
|
+
* never have to render the AI SDK's raw `APICallError` (which dumps the
|
|
9
|
+
* full request body, headers, and tool schemas on toString).
|
|
10
|
+
*/
|
|
11
|
+
export function formatLlmError(err: unknown, cfg?: LlmBlock): string {
|
|
12
|
+
if (APICallError.isInstance(err)) {
|
|
13
|
+
return formatApiCallError(err, cfg);
|
|
14
|
+
}
|
|
15
|
+
if (err instanceof Error) {
|
|
16
|
+
const msg = err.message ?? String(err);
|
|
17
|
+
if (/ENOTFOUND|ECONNREFUSED|EHOSTUNREACH|fetch failed/i.test(msg)) {
|
|
18
|
+
if (cfg?.provider === "ollama") {
|
|
19
|
+
const url = cfg.base_url || "http://localhost:11434";
|
|
20
|
+
return `Can't reach Ollama at ${url}. Is the server running?`;
|
|
21
|
+
}
|
|
22
|
+
return `Network error: ${msg}`;
|
|
23
|
+
}
|
|
24
|
+
return msg;
|
|
25
|
+
}
|
|
26
|
+
return String(err);
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
function formatApiCallError(err: APICallError, cfg?: LlmBlock): string {
|
|
30
|
+
const status = err.statusCode;
|
|
31
|
+
const provider = cfg?.provider;
|
|
32
|
+
|
|
33
|
+
if (status === 401 || status === 403) {
|
|
34
|
+
if (provider === "anthropic") {
|
|
35
|
+
return "Unauthorized — check `llm.api_key` (or `ANTHROPIC_API_KEY` env var).";
|
|
36
|
+
}
|
|
37
|
+
if (provider === "ollama") {
|
|
38
|
+
const where = cfg?.base_url ?? "";
|
|
39
|
+
if (where.includes("ollama.com")) {
|
|
40
|
+
return "Unauthorized — Ollama Cloud requires a bearer token. Get one from https://ollama.com (account → API keys) and put it in `llm.api_key`.";
|
|
41
|
+
}
|
|
42
|
+
return "Unauthorized — your Ollama endpoint rejected the request.";
|
|
43
|
+
}
|
|
44
|
+
if (provider === "openai-compatible") {
|
|
45
|
+
return "Unauthorized — check `llm.api_key` for your OpenAI-compatible endpoint.";
|
|
46
|
+
}
|
|
47
|
+
return "Unauthorized — check your API credentials.";
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
if (status === 404) {
|
|
51
|
+
if (cfg) {
|
|
52
|
+
return `Model not found: \`${cfg.model}\` on ${cfg.provider}. Check the model id (and \`base_url\` if remote).`;
|
|
53
|
+
}
|
|
54
|
+
return "Model not found. Check the model id and base_url.";
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
if (status === 429) {
|
|
58
|
+
return "Rate limited by the provider. Wait and retry.";
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
if (status && status >= 500) {
|
|
62
|
+
return `Provider error (${status}). Try again in a moment.`;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// Generic fallback — keep it short. Do NOT include `err.requestBodyValues`
|
|
66
|
+
// (it contains the full prompt + tool schemas) or `err.responseHeaders`.
|
|
67
|
+
return err.message || `Provider call failed${status ? ` (${status})` : ""}.`;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Drain promises returned by `streamText` that we don't await on the error
|
|
72
|
+
* path. The AI SDK exposes `usage`, `providerMetadata`, `text`, etc. as
|
|
73
|
+
* eagerly-created promises tied to the underlying request; when the stream
|
|
74
|
+
* errors out, these reject too. If we throw out of the for-await loop
|
|
75
|
+
* before awaiting them, Node logs them as unhandled rejections — which is
|
|
76
|
+
* what produced the giant request-body dump in the TUI.
|
|
77
|
+
*/
|
|
78
|
+
export function drainStreamPromises(result: {
|
|
79
|
+
usage?: PromiseLike<unknown>;
|
|
80
|
+
providerMetadata?: PromiseLike<unknown>;
|
|
81
|
+
}): void {
|
|
82
|
+
const swallow = () => {};
|
|
83
|
+
if (
|
|
84
|
+
result.usage &&
|
|
85
|
+
typeof (result.usage as Promise<unknown>).catch === "function"
|
|
86
|
+
) {
|
|
87
|
+
void (result.usage as Promise<unknown>).catch(swallow);
|
|
88
|
+
}
|
|
89
|
+
if (
|
|
90
|
+
result.providerMetadata &&
|
|
91
|
+
typeof (result.providerMetadata as Promise<unknown>).catch === "function"
|
|
92
|
+
) {
|
|
93
|
+
void (result.providerMetadata as Promise<unknown>).catch(swallow);
|
|
94
|
+
}
|
|
95
|
+
}
|
package/src/llm/fake.ts
ADDED
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
import { existsSync, readFileSync } from "node:fs";
|
|
2
|
+
import { MockLanguageModelV3, simulateReadableStream } from "ai/test";
|
|
3
|
+
|
|
4
|
+
export interface FakeTurn {
|
|
5
|
+
/** Optional regex matched against the most recent user-authored text. */
|
|
6
|
+
match?: string;
|
|
7
|
+
/** Full reply text; auto-chunked if `chunks` is absent. */
|
|
8
|
+
text?: string;
|
|
9
|
+
/** Explicit text chunks; overrides auto-chunking. */
|
|
10
|
+
chunks?: string[];
|
|
11
|
+
/** Characters per auto-chunk when `chunks` is absent. */
|
|
12
|
+
chunkSize?: number;
|
|
13
|
+
/** Delay between chunks in milliseconds. */
|
|
14
|
+
delayMs?: number;
|
|
15
|
+
/** Initial wait before the first chunk emits, in milliseconds. */
|
|
16
|
+
preDelayMs?: number;
|
|
17
|
+
/** Optional tool calls to emit after text. */
|
|
18
|
+
toolCalls?: Array<{
|
|
19
|
+
id?: string;
|
|
20
|
+
name: string;
|
|
21
|
+
input: Record<string, unknown>;
|
|
22
|
+
}>;
|
|
23
|
+
/** Optional usage/cache reporting. */
|
|
24
|
+
usage?: {
|
|
25
|
+
inputTokens?: number;
|
|
26
|
+
outputTokens?: number;
|
|
27
|
+
};
|
|
28
|
+
providerMetadata?: Record<string, Record<string, unknown>>;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export interface FakeFixture {
|
|
32
|
+
turns: FakeTurn[];
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
let loadedFixture: FakeFixture | null = null;
|
|
36
|
+
let loadedFixturePath: string | undefined;
|
|
37
|
+
let sequentialIndex = 0;
|
|
38
|
+
|
|
39
|
+
function loadFixture(): FakeFixture {
|
|
40
|
+
const fixturePath = process.env.BOTHOLOMEW_FAKE_LLM_FIXTURE;
|
|
41
|
+
if (loadedFixture && loadedFixturePath === fixturePath) {
|
|
42
|
+
return loadedFixture;
|
|
43
|
+
}
|
|
44
|
+
loadedFixturePath = fixturePath;
|
|
45
|
+
sequentialIndex = 0;
|
|
46
|
+
if (!fixturePath) {
|
|
47
|
+
loadedFixture = { turns: [] };
|
|
48
|
+
return loadedFixture;
|
|
49
|
+
}
|
|
50
|
+
if (!existsSync(fixturePath)) {
|
|
51
|
+
throw new Error(
|
|
52
|
+
`BOTHOLOMEW_FAKE_LLM_FIXTURE points to missing file: ${fixturePath}`,
|
|
53
|
+
);
|
|
54
|
+
}
|
|
55
|
+
loadedFixture = JSON.parse(readFileSync(fixturePath, "utf8")) as FakeFixture;
|
|
56
|
+
return loadedFixture;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function selectTurn(lastUserText: string): FakeTurn {
|
|
60
|
+
const fixture = loadFixture();
|
|
61
|
+
if (fixture.turns.length === 0) {
|
|
62
|
+
return { text: "(fake LLM: no fixture turns configured)" };
|
|
63
|
+
}
|
|
64
|
+
for (let i = sequentialIndex; i < fixture.turns.length; i++) {
|
|
65
|
+
const t = fixture.turns[i];
|
|
66
|
+
if (t?.match && new RegExp(t.match, "i").test(lastUserText)) {
|
|
67
|
+
sequentialIndex = i + 1;
|
|
68
|
+
return t;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
if (sequentialIndex < fixture.turns.length) {
|
|
72
|
+
const t = fixture.turns[sequentialIndex];
|
|
73
|
+
sequentialIndex++;
|
|
74
|
+
if (t) return t;
|
|
75
|
+
}
|
|
76
|
+
return fixture.turns[fixture.turns.length - 1] ?? { text: "" };
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
function chunkText(text: string, size: number): string[] {
|
|
80
|
+
if (size <= 0 || text.length === 0) return text ? [text] : [];
|
|
81
|
+
const out: string[] = [];
|
|
82
|
+
for (let i = 0; i < text.length; i += size) out.push(text.slice(i, i + size));
|
|
83
|
+
return out;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
function extractLastUserText(prompt: unknown): string {
|
|
87
|
+
if (!Array.isArray(prompt)) return "";
|
|
88
|
+
for (let i = prompt.length - 1; i >= 0; i--) {
|
|
89
|
+
const m = prompt[i] as { role?: string; content?: unknown };
|
|
90
|
+
if (m.role !== "user") continue;
|
|
91
|
+
if (typeof m.content === "string") return m.content;
|
|
92
|
+
if (Array.isArray(m.content)) {
|
|
93
|
+
for (const part of m.content) {
|
|
94
|
+
const p = part as { type?: string; text?: unknown };
|
|
95
|
+
if (p.type === "text" && typeof p.text === "string") return p.text;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
return "";
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
function isTitleGeneratorCall(prompt: unknown): boolean {
|
|
103
|
+
if (!Array.isArray(prompt)) return false;
|
|
104
|
+
for (const m of prompt) {
|
|
105
|
+
const msg = m as { role?: string; content?: unknown };
|
|
106
|
+
if (msg.role === "system" && typeof msg.content === "string") {
|
|
107
|
+
return /title generator/i.test(msg.content);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
return false;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// biome-ignore lint/suspicious/noExplicitAny: V3StreamPart union too wide to enumerate
|
|
114
|
+
type StreamPart = any;
|
|
115
|
+
|
|
116
|
+
function buildStreamParts(turn: FakeTurn): StreamPart[] {
|
|
117
|
+
const parts: StreamPart[] = [{ type: "stream-start", warnings: [] }];
|
|
118
|
+
const text = turn.text ?? turn.chunks?.join("") ?? "";
|
|
119
|
+
const chunks = turn.chunks ?? chunkText(text, turn.chunkSize ?? 6);
|
|
120
|
+
|
|
121
|
+
const textId = "txt_0";
|
|
122
|
+
if (text) {
|
|
123
|
+
parts.push({ type: "text-start", id: textId });
|
|
124
|
+
for (const chunk of chunks) {
|
|
125
|
+
parts.push({ type: "text-delta", id: textId, delta: chunk });
|
|
126
|
+
}
|
|
127
|
+
parts.push({ type: "text-end", id: textId });
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
if (turn.toolCalls) {
|
|
131
|
+
for (const tc of turn.toolCalls) {
|
|
132
|
+
const id = tc.id ?? `toolu_${Math.random().toString(36).slice(2, 14)}`;
|
|
133
|
+
parts.push({ type: "tool-input-start", id, toolName: tc.name });
|
|
134
|
+
parts.push({
|
|
135
|
+
type: "tool-call",
|
|
136
|
+
toolCallId: id,
|
|
137
|
+
toolName: tc.name,
|
|
138
|
+
input: JSON.stringify(tc.input),
|
|
139
|
+
});
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
const unified = turn.toolCalls?.length ? "tool-calls" : "stop";
|
|
144
|
+
const inTok = turn.usage?.inputTokens ?? 100;
|
|
145
|
+
const outTok =
|
|
146
|
+
turn.usage?.outputTokens ?? Math.max(1, Math.floor(text.length / 4));
|
|
147
|
+
parts.push({
|
|
148
|
+
type: "finish",
|
|
149
|
+
finishReason: { unified, raw: unified },
|
|
150
|
+
usage: {
|
|
151
|
+
inputTokens: {
|
|
152
|
+
total: inTok,
|
|
153
|
+
noCache: inTok,
|
|
154
|
+
cacheRead: 0,
|
|
155
|
+
cacheWrite: 0,
|
|
156
|
+
},
|
|
157
|
+
outputTokens: { total: outTok, text: outTok, reasoning: 0 },
|
|
158
|
+
totalTokens: inTok + outTok,
|
|
159
|
+
},
|
|
160
|
+
providerMetadata: turn.providerMetadata,
|
|
161
|
+
});
|
|
162
|
+
|
|
163
|
+
return parts;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
export function createFakeLanguageModel(): MockLanguageModelV3 {
|
|
167
|
+
return new MockLanguageModelV3({
|
|
168
|
+
provider: "fake",
|
|
169
|
+
modelId: "botholomew-fake-llm",
|
|
170
|
+
doStream: async (options) => {
|
|
171
|
+
const titleCall = isTitleGeneratorCall(options.prompt);
|
|
172
|
+
const turn: FakeTurn = titleCall
|
|
173
|
+
? { text: "Chat session", delayMs: 0 }
|
|
174
|
+
: selectTurn(extractLastUserText(options.prompt));
|
|
175
|
+
|
|
176
|
+
const parts = buildStreamParts(turn);
|
|
177
|
+
return {
|
|
178
|
+
stream: simulateReadableStream({
|
|
179
|
+
chunks: parts,
|
|
180
|
+
initialDelayInMs: turn.preDelayMs ?? null,
|
|
181
|
+
chunkDelayInMs: turn.delayMs ?? null,
|
|
182
|
+
}),
|
|
183
|
+
};
|
|
184
|
+
},
|
|
185
|
+
doGenerate: async (options) => {
|
|
186
|
+
const titleCall = isTitleGeneratorCall(options.prompt);
|
|
187
|
+
const turn: FakeTurn = titleCall
|
|
188
|
+
? { text: "Chat session" }
|
|
189
|
+
: selectTurn(extractLastUserText(options.prompt));
|
|
190
|
+
const text = turn.text ?? turn.chunks?.join("") ?? "";
|
|
191
|
+
const content: Array<Record<string, unknown>> = [];
|
|
192
|
+
if (text) content.push({ type: "text", text });
|
|
193
|
+
if (turn.toolCalls) {
|
|
194
|
+
for (const tc of turn.toolCalls) {
|
|
195
|
+
const id =
|
|
196
|
+
tc.id ?? `toolu_${Math.random().toString(36).slice(2, 14)}`;
|
|
197
|
+
content.push({
|
|
198
|
+
type: "tool-call",
|
|
199
|
+
toolCallId: id,
|
|
200
|
+
toolName: tc.name,
|
|
201
|
+
input: JSON.stringify(tc.input),
|
|
202
|
+
});
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
const unified = turn.toolCalls?.length ? "tool-calls" : "stop";
|
|
206
|
+
const inTok = turn.usage?.inputTokens ?? 100;
|
|
207
|
+
const outTok =
|
|
208
|
+
turn.usage?.outputTokens ?? Math.max(1, Math.floor(text.length / 4));
|
|
209
|
+
return {
|
|
210
|
+
content: content as never,
|
|
211
|
+
finishReason: { unified, raw: unified },
|
|
212
|
+
usage: {
|
|
213
|
+
inputTokens: {
|
|
214
|
+
total: inTok,
|
|
215
|
+
noCache: inTok,
|
|
216
|
+
cacheRead: 0,
|
|
217
|
+
cacheWrite: 0,
|
|
218
|
+
},
|
|
219
|
+
outputTokens: { total: outTok, text: outTok, reasoning: 0 },
|
|
220
|
+
totalTokens: inTok + outTok,
|
|
221
|
+
},
|
|
222
|
+
warnings: [],
|
|
223
|
+
};
|
|
224
|
+
},
|
|
225
|
+
});
|
|
226
|
+
}
|
package/src/llm/index.ts
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
export { type AbortHandle, createAbortHandle } from "./abort.ts";
|
|
2
|
+
export { withAnthropicCacheBreakpoints } from "./cache-control.ts";
|
|
3
|
+
export { assertToolCapable, getMaxInputTokens } from "./capabilities.ts";
|
|
4
|
+
export { drainStreamPromises, formatLlmError } from "./error-format.ts";
|
|
5
|
+
export {
|
|
6
|
+
createFakeLanguageModel,
|
|
7
|
+
type FakeFixture,
|
|
8
|
+
type FakeTurn,
|
|
9
|
+
} from "./fake.ts";
|
|
10
|
+
export { describeModel, getLanguageModel } from "./provider.ts";
|
|
11
|
+
export { buildProviderOptions } from "./provider-options.ts";
|
|
12
|
+
export { toAiSdkTool, toAiSdkTools } from "./tools.ts";
|
|
13
|
+
export {
|
|
14
|
+
BotholomewLlmError,
|
|
15
|
+
type CacheTokens,
|
|
16
|
+
type LlmBlock,
|
|
17
|
+
type LlmProvider,
|
|
18
|
+
} from "./types.ts";
|
|
19
|
+
export { extractCacheTokens } from "./usage.ts";
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import type { SharedV3ProviderOptions } from "@ai-sdk/provider";
|
|
2
|
+
import type { LlmBlock } from "../config/schemas.ts";
|
|
3
|
+
|
|
4
|
+
type ProviderOptions = SharedV3ProviderOptions;
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Build the `providerOptions` payload passed to `streamText` / `generateText`
|
|
8
|
+
* / `generateObject` for the current provider.
|
|
9
|
+
*
|
|
10
|
+
* For Ollama, this is critical: without `num_ctx` set per-request the server
|
|
11
|
+
* defaults to whatever the model's Modelfile says (usually 4096), and any
|
|
12
|
+
* prompt larger than that silently gets truncated — which mangles the
|
|
13
|
+
* system prompt and tool schemas.
|
|
14
|
+
*
|
|
15
|
+
* Returns `undefined` for providers that don't need per-call options.
|
|
16
|
+
*/
|
|
17
|
+
export function buildProviderOptions(
|
|
18
|
+
cfg: LlmBlock,
|
|
19
|
+
numCtx: number,
|
|
20
|
+
): ProviderOptions | undefined {
|
|
21
|
+
if (cfg.provider === "ollama") {
|
|
22
|
+
return {
|
|
23
|
+
ollama: {
|
|
24
|
+
options: { num_ctx: numCtx },
|
|
25
|
+
},
|
|
26
|
+
};
|
|
27
|
+
}
|
|
28
|
+
return undefined;
|
|
29
|
+
}
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import { createAnthropic } from "@ai-sdk/anthropic";
|
|
2
|
+
import { createOpenAICompatible } from "@ai-sdk/openai-compatible";
|
|
3
|
+
import type { LanguageModel } from "ai";
|
|
4
|
+
import { createOllama } from "ollama-ai-provider-v2";
|
|
5
|
+
import type { LlmBlock } from "../config/schemas.ts";
|
|
6
|
+
import { createFakeLanguageModel } from "./fake.ts";
|
|
7
|
+
import { BotholomewLlmError } from "./types.ts";
|
|
8
|
+
|
|
9
|
+
const DEFAULT_OLLAMA_BASE_URL = "http://localhost:11434";
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Return an AI SDK `LanguageModel` for the given config block. When
|
|
13
|
+
* `BOTHOLOMEW_FAKE_LLM=1` is set, always returns a `MockLanguageModelV3`
|
|
14
|
+
* regardless of provider — the fixture path is read inside the fake.
|
|
15
|
+
*/
|
|
16
|
+
export function getLanguageModel(cfg: LlmBlock): LanguageModel {
|
|
17
|
+
if (process.env.BOTHOLOMEW_FAKE_LLM === "1") {
|
|
18
|
+
return createFakeLanguageModel();
|
|
19
|
+
}
|
|
20
|
+
switch (cfg.provider) {
|
|
21
|
+
case "anthropic": {
|
|
22
|
+
if (!cfg.api_key) {
|
|
23
|
+
throw new BotholomewLlmError(
|
|
24
|
+
"no_credentials",
|
|
25
|
+
"Anthropic provider requires `llm.api_key` (or ANTHROPIC_API_KEY env var).",
|
|
26
|
+
);
|
|
27
|
+
}
|
|
28
|
+
const anthropic = createAnthropic({ apiKey: cfg.api_key });
|
|
29
|
+
return anthropic(cfg.model);
|
|
30
|
+
}
|
|
31
|
+
case "ollama": {
|
|
32
|
+
const baseURL = `${(cfg.base_url || DEFAULT_OLLAMA_BASE_URL).replace(/\/+$/, "")}/api`;
|
|
33
|
+
// When `api_key` is set, send it as a bearer token. Local Ollama
|
|
34
|
+
// ignores auth headers; Ollama Cloud (https://ollama.com) requires
|
|
35
|
+
// them. Same code path covers both.
|
|
36
|
+
const headers = cfg.api_key
|
|
37
|
+
? { Authorization: `Bearer ${cfg.api_key}` }
|
|
38
|
+
: undefined;
|
|
39
|
+
const ollama = createOllama({ baseURL, headers });
|
|
40
|
+
return ollama(cfg.model);
|
|
41
|
+
}
|
|
42
|
+
case "openai-compatible": {
|
|
43
|
+
if (!cfg.base_url) {
|
|
44
|
+
throw new BotholomewLlmError(
|
|
45
|
+
"no_credentials",
|
|
46
|
+
"OpenAI-compatible provider requires `llm.base_url`.",
|
|
47
|
+
);
|
|
48
|
+
}
|
|
49
|
+
const provider = createOpenAICompatible({
|
|
50
|
+
name: "openai-compatible",
|
|
51
|
+
baseURL: cfg.base_url.replace(/\/+$/, ""),
|
|
52
|
+
apiKey: cfg.api_key || undefined,
|
|
53
|
+
});
|
|
54
|
+
return provider(cfg.model);
|
|
55
|
+
}
|
|
56
|
+
default: {
|
|
57
|
+
const exhaustive: never = cfg.provider;
|
|
58
|
+
throw new Error(`Unsupported LLM provider: ${String(exhaustive)}`);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
export function describeModel(cfg: LlmBlock): string {
|
|
64
|
+
return `${cfg.provider}:${cfg.model}`;
|
|
65
|
+
}
|
package/src/llm/tools.ts
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import { type Tool, type ToolSet, tool } from "ai";
|
|
2
|
+
import type { z } from "zod";
|
|
3
|
+
import type { AnyToolDefinition } from "../tools/tool.ts";
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Convert a Botholomew `ToolDefinition` into an AI-SDK `Tool`. We deliberately
|
|
7
|
+
* do NOT wire the `execute` function: our turn loop runs tools itself so we can
|
|
8
|
+
* keep max_turns, parallel execution, queued user injections, terminal worker
|
|
9
|
+
* tools, and the soft-error (`is_error`) convention working consistently.
|
|
10
|
+
*/
|
|
11
|
+
export function toAiSdkTool(def: AnyToolDefinition): Tool {
|
|
12
|
+
return tool({
|
|
13
|
+
description: def.description,
|
|
14
|
+
inputSchema: def.inputSchema as z.ZodType,
|
|
15
|
+
});
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export function toAiSdkTools(defs: AnyToolDefinition[]): ToolSet {
|
|
19
|
+
const out: Record<string, Tool> = {};
|
|
20
|
+
for (const def of defs) {
|
|
21
|
+
out[def.name] = toAiSdkTool(def);
|
|
22
|
+
}
|
|
23
|
+
return out as ToolSet;
|
|
24
|
+
}
|
package/src/llm/types.ts
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
export type { LlmBlock, LlmProvider } from "../config/schemas.ts";
|
|
2
|
+
|
|
3
|
+
export interface CacheTokens {
|
|
4
|
+
input: number;
|
|
5
|
+
output: number;
|
|
6
|
+
cacheRead: number;
|
|
7
|
+
cacheCreation: number;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
export class BotholomewLlmError extends Error {
|
|
11
|
+
code: "no_tool_support" | "no_credentials" | "model_unreachable";
|
|
12
|
+
constructor(
|
|
13
|
+
code: "no_tool_support" | "no_credentials" | "model_unreachable",
|
|
14
|
+
message: string,
|
|
15
|
+
) {
|
|
16
|
+
super(message);
|
|
17
|
+
this.code = code;
|
|
18
|
+
this.name = "BotholomewLlmError";
|
|
19
|
+
}
|
|
20
|
+
}
|
package/src/llm/usage.ts
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import type { LanguageModelUsage, ProviderMetadata } from "ai";
|
|
2
|
+
import type { CacheTokens } from "./types.ts";
|
|
3
|
+
|
|
4
|
+
interface AnthropicCacheMeta {
|
|
5
|
+
cacheReadInputTokens?: number;
|
|
6
|
+
cacheCreationInputTokens?: number;
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Normalize cache-token accounting across providers. Anthropic surfaces cache
|
|
11
|
+
* read/creation via `providerMetadata.anthropic`; AI SDK also bubbles cache
|
|
12
|
+
* reads into `usage.inputTokenDetails.cacheReadTokens` for some providers.
|
|
13
|
+
* Non-caching providers (Ollama, OpenAI-compatible) yield zeros.
|
|
14
|
+
*/
|
|
15
|
+
export function extractCacheTokens(
|
|
16
|
+
usage: LanguageModelUsage | undefined,
|
|
17
|
+
meta?: ProviderMetadata,
|
|
18
|
+
): CacheTokens {
|
|
19
|
+
const anthropicMeta = (meta?.anthropic ?? {}) as AnthropicCacheMeta;
|
|
20
|
+
return {
|
|
21
|
+
input: usage?.inputTokens ?? 0,
|
|
22
|
+
output: usage?.outputTokens ?? 0,
|
|
23
|
+
cacheRead:
|
|
24
|
+
anthropicMeta.cacheReadInputTokens ??
|
|
25
|
+
usage?.inputTokenDetails?.cacheReadTokens ??
|
|
26
|
+
usage?.cachedInputTokens ??
|
|
27
|
+
0,
|
|
28
|
+
cacheCreation:
|
|
29
|
+
anthropicMeta.cacheCreationInputTokens ??
|
|
30
|
+
usage?.inputTokenDetails?.cacheWriteTokens ??
|
|
31
|
+
0,
|
|
32
|
+
};
|
|
33
|
+
}
|