botholomew 0.3.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -0
- package/package.json +3 -1
- package/src/chat/agent.ts +87 -23
- package/src/chat/session.ts +19 -6
- package/src/cli.ts +2 -0
- package/src/commands/chat.ts +5 -2
- package/src/commands/context.ts +91 -35
- package/src/commands/thread.ts +180 -0
- package/src/config/schemas.ts +3 -1
- package/src/context/embedder.ts +0 -3
- package/src/daemon/context.ts +146 -0
- package/src/daemon/large-results.ts +100 -0
- package/src/daemon/llm.ts +45 -19
- package/src/daemon/prompt.ts +1 -6
- package/src/daemon/tick.ts +9 -0
- package/src/db/sql/4-unique_context_path.sql +1 -0
- package/src/db/threads.ts +17 -0
- package/src/init/templates.ts +2 -1
- package/src/tools/context/read-large-result.ts +33 -0
- package/src/tools/context/search.ts +2 -0
- package/src/tools/context/update-beliefs.ts +2 -0
- package/src/tools/context/update-goals.ts +2 -0
- package/src/tools/dir/create.ts +3 -2
- package/src/tools/dir/list.ts +2 -1
- package/src/tools/dir/size.ts +2 -1
- package/src/tools/dir/tree.ts +3 -2
- package/src/tools/file/copy.ts +12 -3
- package/src/tools/file/count-lines.ts +2 -1
- package/src/tools/file/delete.ts +3 -2
- package/src/tools/file/edit.ts +3 -2
- package/src/tools/file/exists.ts +2 -1
- package/src/tools/file/info.ts +2 -0
- package/src/tools/file/move.ts +12 -3
- package/src/tools/file/read.ts +2 -1
- package/src/tools/file/write.ts +5 -4
- package/src/tools/mcp/exec.ts +70 -3
- package/src/tools/mcp/info.ts +8 -0
- package/src/tools/mcp/list-tools.ts +18 -6
- package/src/tools/mcp/search.ts +38 -10
- package/src/tools/registry.ts +4 -0
- package/src/tools/schedule/create.ts +2 -0
- package/src/tools/schedule/list.ts +2 -0
- package/src/tools/search/grep.ts +3 -2
- package/src/tools/search/semantic.ts +2 -0
- package/src/tools/task/complete.ts +2 -0
- package/src/tools/task/create.ts +17 -4
- package/src/tools/task/fail.ts +2 -0
- package/src/tools/task/list.ts +2 -0
- package/src/tools/task/update.ts +87 -0
- package/src/tools/task/view.ts +3 -1
- package/src/tools/task/wait.ts +2 -0
- package/src/tools/thread/list.ts +2 -0
- package/src/tools/thread/view.ts +3 -1
- package/src/tools/tool.ts +7 -3
- package/src/tui/App.tsx +323 -78
- package/src/tui/components/ContextPanel.tsx +415 -0
- package/src/tui/components/Divider.tsx +14 -0
- package/src/tui/components/HelpPanel.tsx +166 -0
- package/src/tui/components/InputBar.tsx +157 -47
- package/src/tui/components/Logo.tsx +79 -0
- package/src/tui/components/MessageList.tsx +50 -23
- package/src/tui/components/QueuePanel.tsx +57 -0
- package/src/tui/components/StatusBar.tsx +21 -9
- package/src/tui/components/TabBar.tsx +40 -0
- package/src/tui/components/TaskPanel.tsx +409 -0
- package/src/tui/components/ThreadPanel.tsx +541 -0
- package/src/tui/components/ToolCall.tsx +68 -5
- package/src/tui/components/ToolPanel.tsx +295 -281
- package/src/tui/theme.ts +75 -0
- package/src/utils/title.ts +47 -0
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
import ansis from "ansis";
|
|
2
|
+
import type { Command } from "commander";
|
|
3
|
+
import type { DbConnection } from "../db/connection.ts";
|
|
4
|
+
import type { Interaction, Thread } from "../db/threads.ts";
|
|
5
|
+
import { deleteThread, getThread, listThreads } from "../db/threads.ts";
|
|
6
|
+
import { logger } from "../utils/logger.ts";
|
|
7
|
+
import { withDb } from "./with-db.ts";
|
|
8
|
+
|
|
9
|
+
export function registerThreadCommand(program: Command) {
|
|
10
|
+
const thread = program.command("thread").description("Manage chat threads");
|
|
11
|
+
|
|
12
|
+
thread
|
|
13
|
+
.command("list")
|
|
14
|
+
.description("List threads")
|
|
15
|
+
.option("-t, --type <type>", "filter by type (daemon_tick, chat_session)")
|
|
16
|
+
.option("-l, --limit <n>", "max number of threads", parseInt)
|
|
17
|
+
.action((opts) =>
|
|
18
|
+
withDb(program, async (conn) => {
|
|
19
|
+
const threads = await listThreads(conn, {
|
|
20
|
+
type: opts.type,
|
|
21
|
+
limit: opts.limit,
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
if (threads.length === 0) {
|
|
25
|
+
logger.dim("No threads found.");
|
|
26
|
+
return;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
for (const t of threads) {
|
|
30
|
+
printThread(t);
|
|
31
|
+
}
|
|
32
|
+
}),
|
|
33
|
+
);
|
|
34
|
+
|
|
35
|
+
thread
|
|
36
|
+
.command("view <id>")
|
|
37
|
+
.description("View thread details and interactions")
|
|
38
|
+
.option(
|
|
39
|
+
"--only <roles>",
|
|
40
|
+
"show only these roles (comma-separated: user,assistant,tool,system)",
|
|
41
|
+
)
|
|
42
|
+
.action((id, opts) =>
|
|
43
|
+
withDb(program, async (conn) => {
|
|
44
|
+
const resolvedId = await resolveThreadId(conn, id);
|
|
45
|
+
if (!resolvedId) {
|
|
46
|
+
logger.error(`Thread not found: ${id}`);
|
|
47
|
+
process.exit(1);
|
|
48
|
+
}
|
|
49
|
+
const result = await getThread(conn, resolvedId);
|
|
50
|
+
if (!result) {
|
|
51
|
+
logger.error(`Thread not found: ${id}`);
|
|
52
|
+
process.exit(1);
|
|
53
|
+
}
|
|
54
|
+
const interactions = opts.only
|
|
55
|
+
? result.interactions.filter((i) =>
|
|
56
|
+
(opts.only as string).split(",").includes(i.role),
|
|
57
|
+
)
|
|
58
|
+
: result.interactions;
|
|
59
|
+
printThreadDetail(result.thread, interactions);
|
|
60
|
+
}),
|
|
61
|
+
);
|
|
62
|
+
|
|
63
|
+
thread
|
|
64
|
+
.command("delete <id>")
|
|
65
|
+
.description("Delete a thread and its interactions")
|
|
66
|
+
.action((id) =>
|
|
67
|
+
withDb(program, async (conn) => {
|
|
68
|
+
const resolvedId = await resolveThreadId(conn, id);
|
|
69
|
+
if (!resolvedId) {
|
|
70
|
+
logger.error(`Thread not found: ${id}`);
|
|
71
|
+
process.exit(1);
|
|
72
|
+
}
|
|
73
|
+
const deleted = await deleteThread(conn, resolvedId);
|
|
74
|
+
if (!deleted) {
|
|
75
|
+
logger.error(`Thread not found: ${id}`);
|
|
76
|
+
process.exit(1);
|
|
77
|
+
}
|
|
78
|
+
logger.success(`Deleted thread: ${resolvedId}`);
|
|
79
|
+
}),
|
|
80
|
+
);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
async function resolveThreadId(
|
|
84
|
+
conn: DbConnection,
|
|
85
|
+
idPrefix: string,
|
|
86
|
+
): Promise<string | null> {
|
|
87
|
+
if (idPrefix.length >= 36) return idPrefix;
|
|
88
|
+
const all = await listThreads(conn);
|
|
89
|
+
const matches = all.filter((t) => t.id.startsWith(idPrefix));
|
|
90
|
+
if (matches.length === 1) {
|
|
91
|
+
const match = matches[0] as Thread;
|
|
92
|
+
return match.id;
|
|
93
|
+
}
|
|
94
|
+
if (matches.length === 0) return null;
|
|
95
|
+
logger.error(
|
|
96
|
+
`Ambiguous thread prefix "${idPrefix}" matches ${matches.length} threads`,
|
|
97
|
+
);
|
|
98
|
+
process.exit(1);
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
function typeColor(type: Thread["type"]): string {
|
|
102
|
+
switch (type) {
|
|
103
|
+
case "daemon_tick":
|
|
104
|
+
return ansis.magenta(type);
|
|
105
|
+
case "chat_session":
|
|
106
|
+
return ansis.cyan(type);
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
function statusLabel(thread: Thread): string {
|
|
111
|
+
return thread.ended_at ? ansis.dim("ended") : ansis.green("active");
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
function roleColor(role: Interaction["role"]): string {
|
|
115
|
+
switch (role) {
|
|
116
|
+
case "user":
|
|
117
|
+
return ansis.cyan(role);
|
|
118
|
+
case "assistant":
|
|
119
|
+
return ansis.green(role);
|
|
120
|
+
case "system":
|
|
121
|
+
return ansis.yellow(role);
|
|
122
|
+
case "tool":
|
|
123
|
+
return ansis.magenta(role);
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
function printThread(t: Thread) {
|
|
128
|
+
const id = ansis.dim(t.id.slice(0, 8));
|
|
129
|
+
const title = t.title || ansis.dim("(untitled)");
|
|
130
|
+
console.log(` ${id} ${typeColor(t.type)} ${statusLabel(t)} ${title}`);
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
function printThreadDetail(t: Thread, interactions: Interaction[]) {
|
|
134
|
+
console.log(ansis.bold(t.title || "(untitled)"));
|
|
135
|
+
console.log(` ID: ${t.id}`);
|
|
136
|
+
console.log(` Type: ${typeColor(t.type)}`);
|
|
137
|
+
console.log(` Status: ${statusLabel(t)}`);
|
|
138
|
+
if (t.task_id) console.log(` Task: ${t.task_id}`);
|
|
139
|
+
console.log(` Started: ${t.started_at.toISOString()}`);
|
|
140
|
+
console.log(
|
|
141
|
+
` Ended: ${t.ended_at ? t.ended_at.toISOString() : ansis.dim("—")}`,
|
|
142
|
+
);
|
|
143
|
+
|
|
144
|
+
if (interactions.length === 0) {
|
|
145
|
+
console.log(`\n ${ansis.dim("No interactions.")}`);
|
|
146
|
+
return;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
console.log(`\n Interactions (${interactions.length}):`);
|
|
150
|
+
for (const i of interactions) {
|
|
151
|
+
printInteraction(i);
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
function formatTime(date: Date): string {
|
|
156
|
+
return date
|
|
157
|
+
.toISOString()
|
|
158
|
+
.replace("T", " ")
|
|
159
|
+
.replace(/\.\d{3}Z$/, "");
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
function printInteraction(i: Interaction) {
|
|
163
|
+
const seq = ansis.dim(`#${i.sequence}`);
|
|
164
|
+
const ts = ansis.dim(formatTime(i.created_at));
|
|
165
|
+
const kind = ansis.dim(`[${i.kind}]`);
|
|
166
|
+
let preview: string;
|
|
167
|
+
if (i.kind === "tool_use" && i.tool_name) {
|
|
168
|
+
preview = ansis.yellow(i.tool_name);
|
|
169
|
+
} else {
|
|
170
|
+
const text = i.content.replace(/\n/g, " ");
|
|
171
|
+
preview = text.length > 120 ? `${text.slice(0, 120)}...` : text;
|
|
172
|
+
}
|
|
173
|
+
const extras: string[] = [];
|
|
174
|
+
if (i.token_count) extras.push(`${i.token_count} tok`);
|
|
175
|
+
if (i.duration_ms) extras.push(`${i.duration_ms}ms`);
|
|
176
|
+
const suffix = extras.length > 0 ? ` ${ansis.dim(extras.join(", "))}` : "";
|
|
177
|
+
console.log(
|
|
178
|
+
` ${seq} ${ts} ${roleColor(i.role)} ${kind} ${preview}${suffix}`,
|
|
179
|
+
);
|
|
180
|
+
}
|
package/src/config/schemas.ts
CHANGED
|
@@ -5,13 +5,15 @@ export interface BotholomewConfig {
|
|
|
5
5
|
tick_interval_seconds?: number;
|
|
6
6
|
max_tick_duration_seconds?: number;
|
|
7
7
|
system_prompt_override?: string;
|
|
8
|
+
max_turns?: number;
|
|
8
9
|
}
|
|
9
10
|
|
|
10
11
|
export const DEFAULT_CONFIG: Required<BotholomewConfig> = {
|
|
11
12
|
anthropic_api_key: "",
|
|
12
13
|
model: "claude-opus-4-20250514",
|
|
13
|
-
chunker_model: "claude-haiku-4-
|
|
14
|
+
chunker_model: "claude-haiku-4-5-20251001",
|
|
14
15
|
tick_interval_seconds: 300,
|
|
15
16
|
max_tick_duration_seconds: 120,
|
|
16
17
|
system_prompt_override: "",
|
|
18
|
+
max_turns: 0,
|
|
17
19
|
};
|
package/src/context/embedder.ts
CHANGED
|
@@ -3,7 +3,6 @@ import {
|
|
|
3
3
|
EMBEDDING_DTYPE,
|
|
4
4
|
EMBEDDING_MODEL_ID,
|
|
5
5
|
} from "../constants.ts";
|
|
6
|
-
import { logger } from "../utils/logger.ts";
|
|
7
6
|
|
|
8
7
|
type EmbedFn = (texts: string[]) => Promise<number[][]>;
|
|
9
8
|
|
|
@@ -11,12 +10,10 @@ let pipelineInstance: ReturnType<typeof createPipelinePromise> | null = null;
|
|
|
11
10
|
|
|
12
11
|
function createPipelinePromise() {
|
|
13
12
|
return (async () => {
|
|
14
|
-
logger.info(`Loading embedding model ${EMBEDDING_MODEL_ID}...`);
|
|
15
13
|
const { pipeline } = await import("@huggingface/transformers");
|
|
16
14
|
const pipe = await pipeline("feature-extraction", EMBEDDING_MODEL_ID, {
|
|
17
15
|
dtype: EMBEDDING_DTYPE,
|
|
18
16
|
});
|
|
19
|
-
logger.info("Embedding model loaded.");
|
|
20
17
|
return pipe;
|
|
21
18
|
})();
|
|
22
19
|
}
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
import Anthropic from "@anthropic-ai/sdk";
|
|
2
|
+
import type { MessageParam } from "@anthropic-ai/sdk/resources/messages";
|
|
3
|
+
import { logger } from "../utils/logger.ts";
|
|
4
|
+
|
|
5
|
+
/** Rough estimate: ~4 characters per token for English text */
|
|
6
|
+
const CHARS_PER_TOKEN = 4;
|
|
7
|
+
|
|
8
|
+
/** Fallback if the models API call fails */
|
|
9
|
+
const DEFAULT_MAX_INPUT_TOKENS = 200_000;
|
|
10
|
+
|
|
11
|
+
/** Reserve this fraction of the context window for safety margin */
|
|
12
|
+
const HEADROOM_FRACTION = 0.1;
|
|
13
|
+
|
|
14
|
+
/** Maximum characters for a single tool result before truncation */
|
|
15
|
+
const MAX_TOOL_RESULT_CHARS = 50_000;
|
|
16
|
+
|
|
17
|
+
/** Cache model max_input_tokens to avoid repeated API calls */
|
|
18
|
+
const modelTokenCache = new Map<string, number>();
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Look up the model's max input tokens via the Anthropic Models API.
|
|
22
|
+
* Results are cached per model ID for the lifetime of the process.
|
|
23
|
+
*/
|
|
24
|
+
export async function getMaxInputTokens(
|
|
25
|
+
apiKey: string | undefined,
|
|
26
|
+
model: string,
|
|
27
|
+
): Promise<number> {
|
|
28
|
+
const cached = modelTokenCache.get(model);
|
|
29
|
+
if (cached !== undefined) return cached;
|
|
30
|
+
|
|
31
|
+
try {
|
|
32
|
+
const client = new Anthropic({ apiKey: apiKey || undefined });
|
|
33
|
+
const info = await client.beta.models.retrieve(model);
|
|
34
|
+
const limit = info.max_input_tokens ?? DEFAULT_MAX_INPUT_TOKENS;
|
|
35
|
+
modelTokenCache.set(model, limit);
|
|
36
|
+
return limit;
|
|
37
|
+
} catch (err) {
|
|
38
|
+
logger.debug(`Failed to retrieve model info for ${model}: ${err}`);
|
|
39
|
+
modelTokenCache.set(model, DEFAULT_MAX_INPUT_TOKENS);
|
|
40
|
+
return DEFAULT_MAX_INPUT_TOKENS;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
function estimateTokens(text: string): number {
|
|
45
|
+
return Math.ceil(text.length / CHARS_PER_TOKEN);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
function messageChars(msg: MessageParam): number {
|
|
49
|
+
if (typeof msg.content === "string") return msg.content.length;
|
|
50
|
+
if (Array.isArray(msg.content)) {
|
|
51
|
+
let total = 0;
|
|
52
|
+
for (const block of msg.content) {
|
|
53
|
+
if ("text" in block && typeof block.text === "string") {
|
|
54
|
+
total += block.text.length;
|
|
55
|
+
} else if ("content" in block && typeof block.content === "string") {
|
|
56
|
+
total += block.content.length;
|
|
57
|
+
} else {
|
|
58
|
+
// tool_use blocks with input, etc.
|
|
59
|
+
total += JSON.stringify(block).length;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
return total;
|
|
63
|
+
}
|
|
64
|
+
return JSON.stringify(msg.content).length;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Truncate individual tool results that are excessively large.
|
|
69
|
+
* Mutates messages in-place.
|
|
70
|
+
*/
|
|
71
|
+
function truncateToolResults(messages: MessageParam[]): void {
|
|
72
|
+
for (const msg of messages) {
|
|
73
|
+
if (!Array.isArray(msg.content)) continue;
|
|
74
|
+
for (const block of msg.content) {
|
|
75
|
+
if (
|
|
76
|
+
"type" in block &&
|
|
77
|
+
block.type === "tool_result" &&
|
|
78
|
+
"content" in block &&
|
|
79
|
+
typeof block.content === "string" &&
|
|
80
|
+
block.content.length > MAX_TOOL_RESULT_CHARS
|
|
81
|
+
) {
|
|
82
|
+
const original = block.content.length;
|
|
83
|
+
(block as { content: string }).content =
|
|
84
|
+
block.content.slice(0, MAX_TOOL_RESULT_CHARS) +
|
|
85
|
+
`\n\n[truncated: ${original} chars → ${MAX_TOOL_RESULT_CHARS} chars]`;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Ensure the conversation fits within the context window.
|
|
93
|
+
* Strategy:
|
|
94
|
+
* 1. Truncate oversized tool results
|
|
95
|
+
* 2. If still too large, drop oldest assistant/tool pairs from the middle
|
|
96
|
+
* (keeping the first user message and recent messages)
|
|
97
|
+
*
|
|
98
|
+
* Mutates messages in-place and returns the array.
|
|
99
|
+
*/
|
|
100
|
+
export function fitToContextWindow(
|
|
101
|
+
messages: MessageParam[],
|
|
102
|
+
systemPrompt: string,
|
|
103
|
+
maxInputTokens: number,
|
|
104
|
+
): MessageParam[] {
|
|
105
|
+
// Step 1: truncate oversized tool results
|
|
106
|
+
truncateToolResults(messages);
|
|
107
|
+
|
|
108
|
+
// Step 2: estimate total tokens
|
|
109
|
+
const systemTokens = estimateTokens(systemPrompt);
|
|
110
|
+
const responseBuffer = 4096; // max_tokens for the response
|
|
111
|
+
const headroom = Math.ceil(maxInputTokens * HEADROOM_FRACTION);
|
|
112
|
+
|
|
113
|
+
const budget = maxInputTokens - systemTokens - responseBuffer - headroom;
|
|
114
|
+
if (budget <= 0) {
|
|
115
|
+
logger.warn(
|
|
116
|
+
`System prompt alone is ~${systemTokens} tokens, very close to the ${maxInputTokens} token limit`,
|
|
117
|
+
);
|
|
118
|
+
return messages;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
let totalChars = messages.reduce((sum, m) => sum + messageChars(m), 0);
|
|
122
|
+
let totalTokens = Math.ceil(totalChars / CHARS_PER_TOKEN);
|
|
123
|
+
|
|
124
|
+
if (totalTokens <= budget) {
|
|
125
|
+
return messages;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// Step 3: drop oldest message pairs from the middle until we fit.
|
|
129
|
+
// Keep messages[0] (initial user message) and remove from index 1 onward.
|
|
130
|
+
let dropped = 0;
|
|
131
|
+
while (totalTokens > budget && messages.length > 2) {
|
|
132
|
+
// Remove the oldest non-first message (index 1)
|
|
133
|
+
const removed = messages.splice(1, 1)[0] as MessageParam;
|
|
134
|
+
totalChars -= messageChars(removed);
|
|
135
|
+
totalTokens = Math.ceil(totalChars / CHARS_PER_TOKEN);
|
|
136
|
+
dropped++;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
if (dropped > 0) {
|
|
140
|
+
logger.info(
|
|
141
|
+
`Context window management: dropped ${dropped} older messages to fit within ${maxInputTokens} token budget`,
|
|
142
|
+
);
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
return messages;
|
|
146
|
+
}
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Temporary in-memory store for large tool results.
|
|
3
|
+
*
|
|
4
|
+
* When a tool result exceeds MAX_INLINE_CHARS, it is stored here and replaced
|
|
5
|
+
* with a summary stub. The LLM can then paginate through the full result
|
|
6
|
+
* using the `read_large_result` tool.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
/** Maximum characters to inline directly in the conversation */
|
|
10
|
+
export const MAX_INLINE_CHARS = 10_000;
|
|
11
|
+
|
|
12
|
+
/** Characters per page when paginating */
|
|
13
|
+
export const PAGE_SIZE_CHARS = 8_000;
|
|
14
|
+
|
|
15
|
+
interface StoredResult {
|
|
16
|
+
toolName: string;
|
|
17
|
+
content: string;
|
|
18
|
+
totalChars: number;
|
|
19
|
+
totalPages: number;
|
|
20
|
+
createdAt: number;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
const store = new Map<string, StoredResult>();
|
|
24
|
+
let nextId = 1;
|
|
25
|
+
|
|
26
|
+
/** Store a large result and return its reference ID */
|
|
27
|
+
export function storeLargeResult(toolName: string, content: string): string {
|
|
28
|
+
const id = `lr_${nextId++}`;
|
|
29
|
+
const totalPages = Math.ceil(content.length / PAGE_SIZE_CHARS);
|
|
30
|
+
store.set(id, {
|
|
31
|
+
toolName,
|
|
32
|
+
content,
|
|
33
|
+
totalChars: content.length,
|
|
34
|
+
totalPages,
|
|
35
|
+
createdAt: Date.now(),
|
|
36
|
+
});
|
|
37
|
+
return id;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/** Read a page from a stored result (1-based page number) */
|
|
41
|
+
export function readLargeResultPage(
|
|
42
|
+
id: string,
|
|
43
|
+
page: number,
|
|
44
|
+
): { content: string; page: number; totalPages: number } | null {
|
|
45
|
+
const entry = store.get(id);
|
|
46
|
+
if (!entry) return null;
|
|
47
|
+
|
|
48
|
+
const start = (page - 1) * PAGE_SIZE_CHARS;
|
|
49
|
+
if (start >= entry.content.length) return null;
|
|
50
|
+
|
|
51
|
+
const content = entry.content.slice(start, start + PAGE_SIZE_CHARS);
|
|
52
|
+
return { content, page, totalPages: entry.totalPages };
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/** Build the inline stub that replaces the full result in the conversation */
|
|
56
|
+
export function buildResultStub(
|
|
57
|
+
id: string,
|
|
58
|
+
toolName: string,
|
|
59
|
+
content: string,
|
|
60
|
+
): string {
|
|
61
|
+
const totalPages = Math.ceil(content.length / PAGE_SIZE_CHARS);
|
|
62
|
+
const preview = content.slice(0, 500);
|
|
63
|
+
return [
|
|
64
|
+
`[Large result from ${toolName} stored as ${id} — ${content.length} chars, ${totalPages} page(s)]`,
|
|
65
|
+
"",
|
|
66
|
+
"Preview:",
|
|
67
|
+
preview,
|
|
68
|
+
preview.length < content.length ? "..." : "",
|
|
69
|
+
"",
|
|
70
|
+
`Use read_large_result with id="${id}" to read page-by-page (pages 1–${totalPages}).`,
|
|
71
|
+
].join("\n");
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
export interface MaybeStoreResultOutput {
|
|
75
|
+
text: string;
|
|
76
|
+
stored?: { id: string; chars: number; pages: number };
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* If the tool output exceeds MAX_INLINE_CHARS, store it and return a stub.
|
|
81
|
+
* Otherwise return the original output unchanged.
|
|
82
|
+
*/
|
|
83
|
+
export function maybeStoreResult(
|
|
84
|
+
toolName: string,
|
|
85
|
+
output: string,
|
|
86
|
+
): MaybeStoreResultOutput {
|
|
87
|
+
if (output.length <= MAX_INLINE_CHARS) return { text: output };
|
|
88
|
+
|
|
89
|
+
const id = storeLargeResult(toolName, output);
|
|
90
|
+
const pages = Math.ceil(output.length / PAGE_SIZE_CHARS);
|
|
91
|
+
return {
|
|
92
|
+
text: buildResultStub(id, toolName, output),
|
|
93
|
+
stored: { id, chars: output.length, pages },
|
|
94
|
+
};
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
/** Clear all stored results (useful between agent loop runs or for cleanup) */
|
|
98
|
+
export function clearLargeResults(): void {
|
|
99
|
+
store.clear();
|
|
100
|
+
}
|
package/src/daemon/llm.ts
CHANGED
|
@@ -11,6 +11,8 @@ import type { Task } from "../db/tasks.ts";
|
|
|
11
11
|
import { logInteraction } from "../db/threads.ts";
|
|
12
12
|
import { registerAllTools } from "../tools/registry.ts";
|
|
13
13
|
import { getTool, type ToolContext, toAnthropicTools } from "../tools/tool.ts";
|
|
14
|
+
import { fitToContextWindow, getMaxInputTokens } from "./context.ts";
|
|
15
|
+
import { clearLargeResults, maybeStoreResult } from "./large-results.ts";
|
|
14
16
|
|
|
15
17
|
registerAllTools();
|
|
16
18
|
|
|
@@ -47,7 +49,7 @@ export async function runAgentLoop(input: {
|
|
|
47
49
|
mcpxClient: input.mcpxClient ?? null,
|
|
48
50
|
};
|
|
49
51
|
|
|
50
|
-
const userMessage = `
|
|
52
|
+
const userMessage = `Task:\nName: ${task.name}\nDescription: ${task.description}\nPriority: ${task.priority}`;
|
|
51
53
|
|
|
52
54
|
const messages: MessageParam[] = [{ role: "user", content: userMessage }];
|
|
53
55
|
|
|
@@ -58,11 +60,17 @@ export async function runAgentLoop(input: {
|
|
|
58
60
|
content: userMessage,
|
|
59
61
|
});
|
|
60
62
|
|
|
63
|
+
clearLargeResults();
|
|
61
64
|
const daemonTools = toAnthropicTools();
|
|
65
|
+
const maxInputTokens = await getMaxInputTokens(
|
|
66
|
+
config.anthropic_api_key,
|
|
67
|
+
config.model,
|
|
68
|
+
);
|
|
62
69
|
|
|
63
|
-
const maxTurns =
|
|
64
|
-
for (let turn = 0; turn < maxTurns; turn++) {
|
|
70
|
+
const maxTurns = config.max_turns;
|
|
71
|
+
for (let turn = 0; !maxTurns || turn < maxTurns; turn++) {
|
|
65
72
|
const startTime = Date.now();
|
|
73
|
+
fitToContextWindow(messages, systemPrompt, maxInputTokens);
|
|
66
74
|
const response = await client.messages.create({
|
|
67
75
|
model: config.model,
|
|
68
76
|
max_tokens: 4096,
|
|
@@ -102,32 +110,35 @@ export async function runAgentLoop(input: {
|
|
|
102
110
|
// Add assistant response to conversation
|
|
103
111
|
messages.push({ role: "assistant", content: response.content });
|
|
104
112
|
|
|
105
|
-
//
|
|
106
|
-
const toolResults: ToolResultBlockParam[] = [];
|
|
107
|
-
|
|
113
|
+
// Log all tool_use entries
|
|
108
114
|
for (const toolUse of toolUseBlocks) {
|
|
109
|
-
const toolInput = JSON.stringify(toolUse.input);
|
|
110
|
-
|
|
111
|
-
// Log tool use
|
|
112
115
|
await logInteraction(conn, threadId, {
|
|
113
116
|
role: "assistant",
|
|
114
117
|
kind: "tool_use",
|
|
115
118
|
content: `Calling ${toolUse.name}`,
|
|
116
119
|
toolName: toolUse.name,
|
|
117
|
-
toolInput,
|
|
120
|
+
toolInput: JSON.stringify(toolUse.input),
|
|
118
121
|
});
|
|
122
|
+
}
|
|
119
123
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
124
|
+
// Execute all tools in parallel
|
|
125
|
+
const execResults = await Promise.all(
|
|
126
|
+
toolUseBlocks.map(async (toolUse) => {
|
|
127
|
+
const start = Date.now();
|
|
128
|
+
const result = await executeToolCall(toolUse, toolCtx);
|
|
129
|
+
return { toolUse, result, durationMs: Date.now() - start };
|
|
130
|
+
}),
|
|
131
|
+
);
|
|
123
132
|
|
|
124
|
-
|
|
133
|
+
// Log results and collect tool_result messages
|
|
134
|
+
const toolResults: ToolResultBlockParam[] = [];
|
|
135
|
+
for (const { toolUse, result, durationMs } of execResults) {
|
|
125
136
|
await logInteraction(conn, threadId, {
|
|
126
137
|
role: "tool",
|
|
127
138
|
kind: "tool_result",
|
|
128
139
|
content: result.output,
|
|
129
140
|
toolName: toolUse.name,
|
|
130
|
-
durationMs
|
|
141
|
+
durationMs,
|
|
131
142
|
});
|
|
132
143
|
|
|
133
144
|
if (result.terminal && result.agentResult) {
|
|
@@ -137,7 +148,8 @@ export async function runAgentLoop(input: {
|
|
|
137
148
|
toolResults.push({
|
|
138
149
|
type: "tool_result",
|
|
139
150
|
tool_use_id: toolUse.id,
|
|
140
|
-
content: result.output,
|
|
151
|
+
content: maybeStoreResult(toolUse.name, result.output).text,
|
|
152
|
+
is_error: result.isError || undefined,
|
|
141
153
|
});
|
|
142
154
|
}
|
|
143
155
|
|
|
@@ -150,6 +162,7 @@ export async function runAgentLoop(input: {
|
|
|
150
162
|
interface ToolCallResult {
|
|
151
163
|
output: string;
|
|
152
164
|
terminal: boolean;
|
|
165
|
+
isError: boolean;
|
|
153
166
|
agentResult?: AgentLoopResult;
|
|
154
167
|
}
|
|
155
168
|
|
|
@@ -159,18 +172,30 @@ async function executeToolCall(
|
|
|
159
172
|
): Promise<ToolCallResult> {
|
|
160
173
|
const tool = getTool(toolUse.name);
|
|
161
174
|
if (!tool) {
|
|
162
|
-
return {
|
|
175
|
+
return {
|
|
176
|
+
output: `Unknown tool: ${toolUse.name}`,
|
|
177
|
+
terminal: false,
|
|
178
|
+
isError: true,
|
|
179
|
+
};
|
|
163
180
|
}
|
|
164
181
|
|
|
165
182
|
const parsed = tool.inputSchema.safeParse(toolUse.input);
|
|
166
183
|
if (!parsed.success) {
|
|
184
|
+
const issues = parsed.error.issues
|
|
185
|
+
.map((i) => `${i.path.join(".")}: ${i.message}`)
|
|
186
|
+
.join("; ");
|
|
167
187
|
return {
|
|
168
|
-
output: `Invalid input: ${
|
|
188
|
+
output: `Invalid input for ${toolUse.name}: ${issues}. Check the tool's expected parameters.`,
|
|
169
189
|
terminal: false,
|
|
190
|
+
isError: true,
|
|
170
191
|
};
|
|
171
192
|
}
|
|
172
193
|
|
|
173
194
|
const result = await tool.execute(parsed.data, ctx);
|
|
195
|
+
const isError =
|
|
196
|
+
typeof result === "object" && result !== null && "is_error" in result
|
|
197
|
+
? (result as { is_error: boolean }).is_error
|
|
198
|
+
: false;
|
|
174
199
|
const output = typeof result === "string" ? result : JSON.stringify(result);
|
|
175
200
|
|
|
176
201
|
// Check if this is a terminal tool (complete/fail/wait)
|
|
@@ -184,10 +209,11 @@ async function executeToolCall(
|
|
|
184
209
|
return {
|
|
185
210
|
output,
|
|
186
211
|
terminal: true,
|
|
212
|
+
isError,
|
|
187
213
|
agentResult: { status, reason: String(reason) },
|
|
188
214
|
};
|
|
189
215
|
}
|
|
190
216
|
}
|
|
191
217
|
|
|
192
|
-
return { output, terminal: false };
|
|
218
|
+
return { output, terminal: false, isError };
|
|
193
219
|
}
|
package/src/daemon/prompt.ts
CHANGED
|
@@ -123,13 +123,8 @@ export async function buildSystemPrompt(
|
|
|
123
123
|
// Instructions
|
|
124
124
|
parts.push("## Instructions");
|
|
125
125
|
parts.push(
|
|
126
|
-
"You are
|
|
126
|
+
"You are Botholomew, a wise-owl daemon that works through tasks. Use available tools to complete your assigned task, then call complete_task, fail_task, or wait_task. Use create_task for subtasks and update_task to refine pending tasks. Batch independent tool calls in a single response for parallel execution.",
|
|
127
127
|
);
|
|
128
|
-
parts.push("When given a task, use the available tools to complete it.");
|
|
129
|
-
parts.push(
|
|
130
|
-
"Always call complete_task, fail_task, or wait_task when you are done.",
|
|
131
|
-
);
|
|
132
|
-
parts.push("If you need to create subtasks, use create_task.");
|
|
133
128
|
if (options?.hasMcpTools) {
|
|
134
129
|
parts.push("");
|
|
135
130
|
parts.push("## External Tools (MCP)");
|
package/src/daemon/tick.ts
CHANGED
|
@@ -8,6 +8,7 @@ import {
|
|
|
8
8
|
} from "../db/tasks.ts";
|
|
9
9
|
import { createThread, endThread, logInteraction } from "../db/threads.ts";
|
|
10
10
|
import { logger } from "../utils/logger.ts";
|
|
11
|
+
import { generateThreadTitle } from "../utils/title.ts";
|
|
11
12
|
import { runAgentLoop } from "./llm.ts";
|
|
12
13
|
import { buildSystemPrompt } from "./prompt.ts";
|
|
13
14
|
import { processSchedules } from "./schedules.ts";
|
|
@@ -82,6 +83,14 @@ export async function tick(
|
|
|
82
83
|
});
|
|
83
84
|
|
|
84
85
|
logger.info(`Task ${task.id} -> ${result.status}`);
|
|
86
|
+
|
|
87
|
+
// Generate a descriptive title for the thread
|
|
88
|
+
void generateThreadTitle(
|
|
89
|
+
config,
|
|
90
|
+
conn,
|
|
91
|
+
threadId,
|
|
92
|
+
`Task: ${task.name}\nDescription: ${task.description}\nOutcome: ${result.status}${result.reason ? ` — ${result.reason}` : ""}`,
|
|
93
|
+
);
|
|
85
94
|
} catch (err) {
|
|
86
95
|
await updateTaskStatus(conn, task.id, "failed", String(err));
|
|
87
96
|
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
CREATE UNIQUE INDEX IF NOT EXISTS idx_context_items_context_path ON context_items(context_path);
|
package/src/db/threads.ts
CHANGED
|
@@ -154,6 +154,14 @@ export async function reopenThread(
|
|
|
154
154
|
db.query("UPDATE threads SET ended_at = NULL WHERE id = ?1").run(threadId);
|
|
155
155
|
}
|
|
156
156
|
|
|
157
|
+
export async function updateThreadTitle(
|
|
158
|
+
db: DbConnection,
|
|
159
|
+
threadId: string,
|
|
160
|
+
title: string,
|
|
161
|
+
): Promise<void> {
|
|
162
|
+
db.query("UPDATE threads SET title = ?2 WHERE id = ?1").run(threadId, title);
|
|
163
|
+
}
|
|
164
|
+
|
|
157
165
|
export async function getThread(
|
|
158
166
|
db: DbConnection,
|
|
159
167
|
threadId: string,
|
|
@@ -175,6 +183,15 @@ export async function getThread(
|
|
|
175
183
|
};
|
|
176
184
|
}
|
|
177
185
|
|
|
186
|
+
export async function deleteThread(
|
|
187
|
+
db: DbConnection,
|
|
188
|
+
threadId: string,
|
|
189
|
+
): Promise<boolean> {
|
|
190
|
+
db.query("DELETE FROM interactions WHERE thread_id = ?1").run(threadId);
|
|
191
|
+
const result = db.query("DELETE FROM threads WHERE id = ?1").run(threadId);
|
|
192
|
+
return result.changes > 0;
|
|
193
|
+
}
|
|
194
|
+
|
|
178
195
|
export async function listThreads(
|
|
179
196
|
db: DbConnection,
|
|
180
197
|
filters?: {
|