botholomew 0.12.5 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +91 -68
- package/package.json +2 -2
- package/src/chat/agent.ts +59 -86
- package/src/chat/session.ts +29 -25
- package/src/commands/capabilities.ts +1 -1
- package/src/commands/context.ts +178 -926
- package/src/commands/db.ts +9 -13
- package/src/commands/init.ts +4 -1
- package/src/commands/nuke.ts +57 -90
- package/src/commands/schedule.ts +103 -124
- package/src/commands/skill.ts +2 -2
- package/src/commands/task.ts +86 -95
- package/src/commands/thread.ts +107 -112
- package/src/commands/worker.ts +88 -88
- package/src/constants.ts +93 -16
- package/src/context/capabilities.ts +10 -10
- package/src/context/fetcher.ts +9 -10
- package/src/context/reindex.ts +189 -0
- package/src/context/store.ts +803 -0
- package/src/db/doctor.ts +1 -8
- package/src/db/embeddings.ts +227 -175
- package/src/db/sql/19-disk_backed_index.sql +36 -0
- package/src/db/sql/20-drop_db_tables_for_files.sql +19 -0
- package/src/fs/atomic.ts +217 -0
- package/src/fs/compat.ts +86 -0
- package/src/fs/sandbox.ts +293 -0
- package/src/init/index.ts +69 -52
- package/src/init/templates.ts +1 -1
- package/src/mcpx/client.ts +1 -1
- package/src/schedules/schema.ts +19 -0
- package/src/schedules/store.ts +296 -0
- package/src/skills/commands.ts +1 -3
- package/src/tasks/schema.ts +47 -0
- package/src/tasks/store.ts +486 -0
- package/src/threads/store.ts +559 -0
- package/src/tools/capabilities/refresh.ts +42 -21
- package/src/tools/context/pipe.ts +15 -71
- package/src/tools/context/update-beliefs.ts +3 -3
- package/src/tools/context/update-goals.ts +3 -3
- package/src/tools/dir/create.ts +26 -23
- package/src/tools/dir/size.ts +46 -17
- package/src/tools/dir/tree.ts +74 -279
- package/src/tools/file/copy.ts +50 -24
- package/src/tools/file/count-lines.ts +34 -10
- package/src/tools/file/delete.ts +53 -23
- package/src/tools/file/edit.ts +39 -14
- package/src/tools/file/exists.ts +12 -26
- package/src/tools/file/info.ts +27 -85
- package/src/tools/file/move.ts +39 -24
- package/src/tools/file/read.ts +32 -80
- package/src/tools/file/write.ts +14 -91
- package/src/tools/registry.ts +8 -7
- package/src/tools/schedule/create.ts +2 -2
- package/src/tools/schedule/list.ts +7 -3
- package/src/tools/search/fuse.ts +12 -33
- package/src/tools/search/index.ts +36 -43
- package/src/tools/search/regexp.ts +29 -17
- package/src/tools/search/semantic.ts +137 -51
- package/src/tools/skill/delete.ts +1 -1
- package/src/tools/skill/list.ts +1 -1
- package/src/tools/skill/write.ts +1 -1
- package/src/tools/task/create.ts +41 -16
- package/src/tools/task/delete.ts +3 -3
- package/src/tools/task/list.ts +6 -3
- package/src/tools/task/update.ts +31 -9
- package/src/tools/task/view.ts +6 -6
- package/src/tools/thread/list.ts +2 -2
- package/src/tools/thread/search.ts +208 -0
- package/src/tools/thread/view.ts +50 -5
- package/src/tools/tool.ts +5 -0
- package/src/tools/util/sleep.ts +77 -0
- package/src/tools/worker/spawn.ts +28 -14
- package/src/tui/App.tsx +12 -19
- package/src/tui/components/ContextPanel.tsx +83 -316
- package/src/tui/components/SchedulePanel.tsx +34 -48
- package/src/tui/components/SleepProgress.tsx +70 -0
- package/src/tui/components/StatusBar.tsx +15 -15
- package/src/tui/components/TaskPanel.tsx +34 -38
- package/src/tui/components/ThreadPanel.tsx +29 -38
- package/src/tui/components/ToolCall.tsx +10 -0
- package/src/tui/components/WorkerPanel.tsx +21 -19
- package/src/tui/markdown.ts +2 -8
- package/src/utils/title.ts +5 -7
- package/src/utils/v7-date.ts +47 -0
- package/src/worker/heartbeat.ts +46 -24
- package/src/worker/index.ts +13 -15
- package/src/worker/llm.ts +30 -37
- package/src/worker/prompt.ts +19 -41
- package/src/worker/schedules.ts +48 -69
- package/src/worker/spawn.ts +11 -11
- package/src/worker/tick.ts +39 -43
- package/src/workers/store.ts +247 -0
- package/src/commands/tools.ts +0 -367
- package/src/context/describer.ts +0 -140
- package/src/context/drives.ts +0 -110
- package/src/context/ingest.ts +0 -162
- package/src/context/refresh.ts +0 -183
- package/src/db/context.ts +0 -637
- package/src/db/daemon-state.ts +0 -6
- package/src/db/reembed.ts +0 -113
- package/src/db/schedules.ts +0 -213
- package/src/db/tasks.ts +0 -347
- package/src/db/threads.ts +0 -276
- package/src/db/workers.ts +0 -212
- package/src/tools/context/list-drives.ts +0 -36
- package/src/tools/context/refresh.ts +0 -165
- package/src/tools/context/search.ts +0 -54
package/src/context/ingest.ts
DELETED
|
@@ -1,162 +0,0 @@
|
|
|
1
|
-
import type { BotholomewConfig } from "../config/schemas.ts";
|
|
2
|
-
import type { DbConnection } from "../db/connection.ts";
|
|
3
|
-
import { getContextItem, getContextItemById } from "../db/context.ts";
|
|
4
|
-
import {
|
|
5
|
-
createEmbedding,
|
|
6
|
-
deleteEmbeddingsForItem,
|
|
7
|
-
rebuildSearchIndex,
|
|
8
|
-
} from "../db/embeddings.ts";
|
|
9
|
-
import { logger } from "../utils/logger.ts";
|
|
10
|
-
import { chunk } from "./chunker.ts";
|
|
11
|
-
import { type DriveTarget, formatDriveRef } from "./drives.ts";
|
|
12
|
-
import { embed as defaultEmbed } from "./embedder.ts";
|
|
13
|
-
|
|
14
|
-
type IngestEmbedFn = (texts: string[]) => Promise<number[][]>;
|
|
15
|
-
|
|
16
|
-
export interface PreparedIngestion {
|
|
17
|
-
itemId: string;
|
|
18
|
-
title: string;
|
|
19
|
-
description: string;
|
|
20
|
-
drive: string;
|
|
21
|
-
path: string;
|
|
22
|
-
chunks: { index: number; content: string }[];
|
|
23
|
-
vectors: number[][];
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
/**
|
|
27
|
-
* Prepare an item for ingestion: chunk content and compute embeddings.
|
|
28
|
-
* This is the expensive (parallelizable) part — no DB writes happen here.
|
|
29
|
-
*/
|
|
30
|
-
export async function prepareIngestion(
|
|
31
|
-
conn: DbConnection,
|
|
32
|
-
itemId: string,
|
|
33
|
-
config: Required<BotholomewConfig>,
|
|
34
|
-
embedFn?: IngestEmbedFn,
|
|
35
|
-
): Promise<PreparedIngestion | null> {
|
|
36
|
-
const item = await getContextItemById(conn, itemId);
|
|
37
|
-
if (!item) {
|
|
38
|
-
logger.warn(`ingest: context item ${itemId} not found`);
|
|
39
|
-
return null;
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
if (!item.is_textual || !item.content) {
|
|
43
|
-
logger.debug(`ingest: skipping non-textual item ${itemId}`);
|
|
44
|
-
return null;
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
const doEmbed = embedFn ?? ((texts: string[]) => defaultEmbed(texts, config));
|
|
48
|
-
|
|
49
|
-
const chunks = await chunk(item.content, item.mime_type, config);
|
|
50
|
-
if (chunks.length === 0) return null;
|
|
51
|
-
|
|
52
|
-
const ref = formatDriveRef(item);
|
|
53
|
-
const textsForEmbedding = chunks.map((c) => {
|
|
54
|
-
const parts: string[] = [];
|
|
55
|
-
if (item.title) parts.push(`Title: ${item.title}`);
|
|
56
|
-
if (item.description) parts.push(`Description: ${item.description}`);
|
|
57
|
-
parts.push(`Source: ${ref}`);
|
|
58
|
-
parts.push(c.content);
|
|
59
|
-
return parts.join("\n");
|
|
60
|
-
});
|
|
61
|
-
const vectors = await doEmbed(textsForEmbedding);
|
|
62
|
-
|
|
63
|
-
return {
|
|
64
|
-
itemId,
|
|
65
|
-
title: item.title,
|
|
66
|
-
description: item.description,
|
|
67
|
-
drive: item.drive,
|
|
68
|
-
path: item.path,
|
|
69
|
-
chunks,
|
|
70
|
-
vectors,
|
|
71
|
-
};
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
export interface IngestionResult {
|
|
75
|
-
chunks: number;
|
|
76
|
-
isUpdate: boolean;
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
/**
|
|
80
|
-
* Store a prepared ingestion into the database.
|
|
81
|
-
* All statements in BEGIN/COMMIT/ROLLBACK must share one connection, so the
|
|
82
|
-
* caller must pass a connection that lives long enough for the transaction
|
|
83
|
-
* (the tool executor wraps each tool call in `withDb`, which satisfies this).
|
|
84
|
-
*/
|
|
85
|
-
export async function storeIngestion(
|
|
86
|
-
conn: DbConnection,
|
|
87
|
-
prepared: PreparedIngestion,
|
|
88
|
-
): Promise<IngestionResult> {
|
|
89
|
-
let isUpdate = false;
|
|
90
|
-
await conn.exec("BEGIN TRANSACTION");
|
|
91
|
-
try {
|
|
92
|
-
const deleted = await deleteEmbeddingsForItem(conn, prepared.itemId);
|
|
93
|
-
isUpdate = deleted > 0;
|
|
94
|
-
|
|
95
|
-
for (const [i, c] of prepared.chunks.entries()) {
|
|
96
|
-
const v = prepared.vectors[i];
|
|
97
|
-
if (!v) continue;
|
|
98
|
-
await createEmbedding(conn, {
|
|
99
|
-
contextItemId: prepared.itemId,
|
|
100
|
-
chunkIndex: c.index,
|
|
101
|
-
chunkContent: c.content,
|
|
102
|
-
title: prepared.title,
|
|
103
|
-
description: prepared.description,
|
|
104
|
-
embedding: v,
|
|
105
|
-
});
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
await conn.queryRun(
|
|
109
|
-
"UPDATE context_items SET indexed_at = current_timestamp::VARCHAR WHERE id = ?1",
|
|
110
|
-
prepared.itemId,
|
|
111
|
-
);
|
|
112
|
-
|
|
113
|
-
await conn.exec("COMMIT");
|
|
114
|
-
} catch (err) {
|
|
115
|
-
await conn.exec("ROLLBACK");
|
|
116
|
-
throw err;
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
// FTS index is a snapshot and doesn't see the writes above until rebuilt.
|
|
120
|
-
await rebuildSearchIndex(conn);
|
|
121
|
-
|
|
122
|
-
const action = isUpdate ? "updated" : "added";
|
|
123
|
-
logger.info(
|
|
124
|
-
`ingest: ${action} ${prepared.chunks.length} chunks for "${prepared.title}" (${prepared.itemId})`,
|
|
125
|
-
);
|
|
126
|
-
return { chunks: prepared.chunks.length, isUpdate };
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
/**
|
|
130
|
-
* Full ingestion pipeline for a context item:
|
|
131
|
-
* 1. Fetch item from DB
|
|
132
|
-
* 2. Skip if non-textual or empty
|
|
133
|
-
* 3. Chunk content and embed chunks (outside transaction)
|
|
134
|
-
* 4. In a transaction: delete old embeddings, store new ones, update indexed_at
|
|
135
|
-
*/
|
|
136
|
-
export async function ingestContextItem(
|
|
137
|
-
conn: DbConnection,
|
|
138
|
-
itemId: string,
|
|
139
|
-
config: Required<BotholomewConfig>,
|
|
140
|
-
embedFn?: IngestEmbedFn,
|
|
141
|
-
): Promise<number> {
|
|
142
|
-
const prepared = await prepareIngestion(conn, itemId, config, embedFn);
|
|
143
|
-
if (!prepared) return 0;
|
|
144
|
-
return (await storeIngestion(conn, prepared)).chunks;
|
|
145
|
-
}
|
|
146
|
-
|
|
147
|
-
/**
|
|
148
|
-
* Ingest a context item by its (drive, path) pair.
|
|
149
|
-
*/
|
|
150
|
-
export async function ingestByPath(
|
|
151
|
-
conn: DbConnection,
|
|
152
|
-
target: DriveTarget,
|
|
153
|
-
config: Required<BotholomewConfig>,
|
|
154
|
-
embedFn?: IngestEmbedFn,
|
|
155
|
-
): Promise<number> {
|
|
156
|
-
const item = await getContextItem(conn, target);
|
|
157
|
-
if (!item) {
|
|
158
|
-
logger.warn(`ingest: no item at ${formatDriveRef(target)}`);
|
|
159
|
-
return 0;
|
|
160
|
-
}
|
|
161
|
-
return ingestContextItem(conn, item.id, config, embedFn);
|
|
162
|
-
}
|
package/src/context/refresh.ts
DELETED
|
@@ -1,183 +0,0 @@
|
|
|
1
|
-
import type { McpxClient } from "@evantahler/mcpx";
|
|
2
|
-
import type { BotholomewConfig } from "../config/schemas.ts";
|
|
3
|
-
import type { DbConnection } from "../db/connection.ts";
|
|
4
|
-
import { type ContextItem, updateContextItem } from "../db/context.ts";
|
|
5
|
-
import { formatDriveRef } from "./drives.ts";
|
|
6
|
-
import { type FetchedContent, fetchUrl } from "./fetcher.ts";
|
|
7
|
-
import {
|
|
8
|
-
type PreparedIngestion,
|
|
9
|
-
prepareIngestion,
|
|
10
|
-
storeIngestion,
|
|
11
|
-
} from "./ingest.ts";
|
|
12
|
-
|
|
13
|
-
export type RefreshItemStatus = "updated" | "unchanged" | "missing" | "error";
|
|
14
|
-
|
|
15
|
-
export interface RefreshItemResult {
|
|
16
|
-
id: string;
|
|
17
|
-
drive: string;
|
|
18
|
-
path: string;
|
|
19
|
-
ref: string;
|
|
20
|
-
status: RefreshItemStatus;
|
|
21
|
-
error?: string;
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
export interface RefreshResult {
|
|
25
|
-
checked: number;
|
|
26
|
-
updated: number;
|
|
27
|
-
unchanged: number;
|
|
28
|
-
missing: number;
|
|
29
|
-
reembedded: number;
|
|
30
|
-
chunks: number;
|
|
31
|
-
embeddings_skipped: boolean;
|
|
32
|
-
items: RefreshItemResult[];
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
export interface RefreshOptions {
|
|
36
|
-
concurrency?: number;
|
|
37
|
-
onItemProgress?: (done: number, total: number) => void;
|
|
38
|
-
onEmbedProgress?: (done: number, total: number) => void;
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
type IngestEmbedFn = (texts: string[]) => Promise<number[][]>;
|
|
42
|
-
|
|
43
|
-
/** Signature compatible with {@link fetchUrl}. Injectable for tests. */
|
|
44
|
-
export type FetchUrlFn = (
|
|
45
|
-
url: string,
|
|
46
|
-
config: Required<BotholomewConfig>,
|
|
47
|
-
mcpxClient: McpxClient | null,
|
|
48
|
-
) => Promise<FetchedContent>;
|
|
49
|
-
|
|
50
|
-
/**
|
|
51
|
-
* Refresh a batch of context items: re-read from origin, diff, update
|
|
52
|
-
* content, and re-embed only the items that changed.
|
|
53
|
-
*
|
|
54
|
-
* Dispatches on `drive`:
|
|
55
|
-
* disk → read from filesystem
|
|
56
|
-
* agent → skip (no external origin)
|
|
57
|
-
* other → re-fetch via `item.source_url` (captured at ingest time).
|
|
58
|
-
* The built-in `url` drive stores the URL as its path so it can
|
|
59
|
-
* also refresh directly from `path`. Any other drive with no
|
|
60
|
-
* `source_url` surfaces a per-item error — the user must re-add
|
|
61
|
-
* from URL. No code here knows anything about the remote
|
|
62
|
-
* service behind a drive.
|
|
63
|
-
*/
|
|
64
|
-
export async function refreshContextItems(
|
|
65
|
-
conn: DbConnection,
|
|
66
|
-
items: ContextItem[],
|
|
67
|
-
config: Required<BotholomewConfig>,
|
|
68
|
-
mcpxClient: McpxClient | null,
|
|
69
|
-
opts: RefreshOptions = {},
|
|
70
|
-
embedFn?: IngestEmbedFn,
|
|
71
|
-
fetchFn: FetchUrlFn = fetchUrl,
|
|
72
|
-
): Promise<RefreshResult> {
|
|
73
|
-
const refreshable = items.filter((i) => i.drive !== "agent");
|
|
74
|
-
|
|
75
|
-
const results: RefreshItemResult[] = [];
|
|
76
|
-
const toReembed: string[] = [];
|
|
77
|
-
|
|
78
|
-
for (const [idx, item] of refreshable.entries()) {
|
|
79
|
-
opts.onItemProgress?.(idx, refreshable.length);
|
|
80
|
-
const base = {
|
|
81
|
-
id: item.id,
|
|
82
|
-
drive: item.drive,
|
|
83
|
-
path: item.path,
|
|
84
|
-
ref: formatDriveRef(item),
|
|
85
|
-
};
|
|
86
|
-
|
|
87
|
-
try {
|
|
88
|
-
let content: string;
|
|
89
|
-
|
|
90
|
-
if (item.drive === "disk") {
|
|
91
|
-
const bunFile = Bun.file(item.path);
|
|
92
|
-
if (!(await bunFile.exists())) {
|
|
93
|
-
results.push({ ...base, status: "missing" });
|
|
94
|
-
continue;
|
|
95
|
-
}
|
|
96
|
-
content = await bunFile.text();
|
|
97
|
-
} else {
|
|
98
|
-
const url =
|
|
99
|
-
item.source_url ??
|
|
100
|
-
(item.drive === "url" ? item.path.replace(/^\//, "") : null);
|
|
101
|
-
if (!url) {
|
|
102
|
-
results.push({
|
|
103
|
-
...base,
|
|
104
|
-
status: "error",
|
|
105
|
-
error: `Cannot refresh ${formatDriveRef(item)}: no source_url recorded. Re-add from the original URL.`,
|
|
106
|
-
});
|
|
107
|
-
continue;
|
|
108
|
-
}
|
|
109
|
-
const fetched = await fetchFn(url, config, mcpxClient);
|
|
110
|
-
content = fetched.content;
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
if (content === item.content) {
|
|
114
|
-
results.push({ ...base, status: "unchanged" });
|
|
115
|
-
continue;
|
|
116
|
-
}
|
|
117
|
-
|
|
118
|
-
await updateContextItem(conn, item.id, { content });
|
|
119
|
-
results.push({ ...base, status: "updated" });
|
|
120
|
-
toReembed.push(item.id);
|
|
121
|
-
} catch (err) {
|
|
122
|
-
results.push({
|
|
123
|
-
...base,
|
|
124
|
-
status: "error",
|
|
125
|
-
error: err instanceof Error ? err.message : String(err),
|
|
126
|
-
});
|
|
127
|
-
}
|
|
128
|
-
}
|
|
129
|
-
opts.onItemProgress?.(refreshable.length, refreshable.length);
|
|
130
|
-
|
|
131
|
-
const updated = results.filter((r) => r.status === "updated").length;
|
|
132
|
-
const unchanged = results.filter((r) => r.status === "unchanged").length;
|
|
133
|
-
const missing = results.filter((r) => r.status === "missing").length;
|
|
134
|
-
|
|
135
|
-
if (toReembed.length === 0) {
|
|
136
|
-
return {
|
|
137
|
-
checked: refreshable.length,
|
|
138
|
-
updated,
|
|
139
|
-
unchanged,
|
|
140
|
-
missing,
|
|
141
|
-
reembedded: 0,
|
|
142
|
-
chunks: 0,
|
|
143
|
-
embeddings_skipped: false,
|
|
144
|
-
items: results,
|
|
145
|
-
};
|
|
146
|
-
}
|
|
147
|
-
|
|
148
|
-
const concurrency = opts.concurrency ?? 10;
|
|
149
|
-
const prepared: PreparedIngestion[] = [];
|
|
150
|
-
let completed = 0;
|
|
151
|
-
|
|
152
|
-
for (let i = 0; i < toReembed.length; i += concurrency) {
|
|
153
|
-
const batch = toReembed.slice(i, i + concurrency);
|
|
154
|
-
const batchResults = await Promise.all(
|
|
155
|
-
batch.map(async (id) => {
|
|
156
|
-
const r = await prepareIngestion(conn, id, config, embedFn);
|
|
157
|
-
completed++;
|
|
158
|
-
opts.onEmbedProgress?.(completed, toReembed.length);
|
|
159
|
-
return r;
|
|
160
|
-
}),
|
|
161
|
-
);
|
|
162
|
-
for (const r of batchResults) {
|
|
163
|
-
if (r) prepared.push(r);
|
|
164
|
-
}
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
let chunks = 0;
|
|
168
|
-
for (const p of prepared) {
|
|
169
|
-
const result = await storeIngestion(conn, p);
|
|
170
|
-
chunks += result.chunks;
|
|
171
|
-
}
|
|
172
|
-
|
|
173
|
-
return {
|
|
174
|
-
checked: refreshable.length,
|
|
175
|
-
updated,
|
|
176
|
-
unchanged,
|
|
177
|
-
missing,
|
|
178
|
-
reembedded: prepared.length,
|
|
179
|
-
chunks,
|
|
180
|
-
embeddings_skipped: false,
|
|
181
|
-
items: results,
|
|
182
|
-
};
|
|
183
|
-
}
|