botholomew 0.12.5 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +91 -68
- package/package.json +2 -2
- package/src/chat/agent.ts +59 -86
- package/src/chat/session.ts +29 -25
- package/src/commands/capabilities.ts +1 -1
- package/src/commands/context.ts +178 -926
- package/src/commands/db.ts +9 -13
- package/src/commands/init.ts +4 -1
- package/src/commands/nuke.ts +57 -90
- package/src/commands/schedule.ts +103 -124
- package/src/commands/skill.ts +2 -2
- package/src/commands/task.ts +86 -95
- package/src/commands/thread.ts +107 -112
- package/src/commands/worker.ts +88 -88
- package/src/constants.ts +93 -16
- package/src/context/capabilities.ts +10 -10
- package/src/context/fetcher.ts +9 -10
- package/src/context/reindex.ts +189 -0
- package/src/context/store.ts +803 -0
- package/src/db/doctor.ts +1 -8
- package/src/db/embeddings.ts +227 -175
- package/src/db/sql/19-disk_backed_index.sql +36 -0
- package/src/db/sql/20-drop_db_tables_for_files.sql +19 -0
- package/src/fs/atomic.ts +217 -0
- package/src/fs/compat.ts +86 -0
- package/src/fs/sandbox.ts +293 -0
- package/src/init/index.ts +69 -52
- package/src/init/templates.ts +1 -1
- package/src/mcpx/client.ts +1 -1
- package/src/schedules/schema.ts +19 -0
- package/src/schedules/store.ts +296 -0
- package/src/skills/commands.ts +1 -3
- package/src/tasks/schema.ts +47 -0
- package/src/tasks/store.ts +486 -0
- package/src/threads/store.ts +559 -0
- package/src/tools/capabilities/refresh.ts +42 -21
- package/src/tools/context/pipe.ts +15 -71
- package/src/tools/context/update-beliefs.ts +3 -3
- package/src/tools/context/update-goals.ts +3 -3
- package/src/tools/dir/create.ts +26 -23
- package/src/tools/dir/size.ts +46 -17
- package/src/tools/dir/tree.ts +74 -279
- package/src/tools/file/copy.ts +50 -24
- package/src/tools/file/count-lines.ts +34 -10
- package/src/tools/file/delete.ts +53 -23
- package/src/tools/file/edit.ts +39 -14
- package/src/tools/file/exists.ts +12 -26
- package/src/tools/file/info.ts +27 -85
- package/src/tools/file/move.ts +39 -24
- package/src/tools/file/read.ts +32 -80
- package/src/tools/file/write.ts +14 -91
- package/src/tools/registry.ts +8 -7
- package/src/tools/schedule/create.ts +2 -2
- package/src/tools/schedule/list.ts +7 -3
- package/src/tools/search/fuse.ts +12 -33
- package/src/tools/search/index.ts +36 -43
- package/src/tools/search/regexp.ts +29 -17
- package/src/tools/search/semantic.ts +137 -51
- package/src/tools/skill/delete.ts +1 -1
- package/src/tools/skill/list.ts +1 -1
- package/src/tools/skill/write.ts +1 -1
- package/src/tools/task/create.ts +41 -16
- package/src/tools/task/delete.ts +3 -3
- package/src/tools/task/list.ts +6 -3
- package/src/tools/task/update.ts +31 -9
- package/src/tools/task/view.ts +6 -6
- package/src/tools/thread/list.ts +2 -2
- package/src/tools/thread/search.ts +208 -0
- package/src/tools/thread/view.ts +50 -5
- package/src/tools/tool.ts +5 -0
- package/src/tools/util/sleep.ts +77 -0
- package/src/tools/worker/spawn.ts +28 -14
- package/src/tui/App.tsx +12 -19
- package/src/tui/components/ContextPanel.tsx +83 -316
- package/src/tui/components/SchedulePanel.tsx +34 -48
- package/src/tui/components/SleepProgress.tsx +70 -0
- package/src/tui/components/StatusBar.tsx +15 -15
- package/src/tui/components/TaskPanel.tsx +34 -38
- package/src/tui/components/ThreadPanel.tsx +29 -38
- package/src/tui/components/ToolCall.tsx +10 -0
- package/src/tui/components/WorkerPanel.tsx +21 -19
- package/src/tui/markdown.ts +2 -8
- package/src/utils/title.ts +5 -7
- package/src/utils/v7-date.ts +47 -0
- package/src/worker/heartbeat.ts +46 -24
- package/src/worker/index.ts +13 -15
- package/src/worker/llm.ts +30 -37
- package/src/worker/prompt.ts +19 -41
- package/src/worker/schedules.ts +48 -69
- package/src/worker/spawn.ts +11 -11
- package/src/worker/tick.ts +39 -43
- package/src/workers/store.ts +247 -0
- package/src/commands/tools.ts +0 -367
- package/src/context/describer.ts +0 -140
- package/src/context/drives.ts +0 -110
- package/src/context/ingest.ts +0 -162
- package/src/context/refresh.ts +0 -183
- package/src/db/context.ts +0 -637
- package/src/db/daemon-state.ts +0 -6
- package/src/db/reembed.ts +0 -113
- package/src/db/schedules.ts +0 -213
- package/src/db/tasks.ts +0 -347
- package/src/db/threads.ts +0 -276
- package/src/db/workers.ts +0 -212
- package/src/tools/context/list-drives.ts +0 -36
- package/src/tools/context/refresh.ts +0 -165
- package/src/tools/context/search.ts +0 -54
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
import { createHash } from "node:crypto";
|
|
2
|
+
import { readFile, stat } from "node:fs/promises";
|
|
3
|
+
import { join } from "node:path";
|
|
4
|
+
import type { BotholomewConfig } from "../config/schemas.ts";
|
|
5
|
+
import { CONTEXT_DIR } from "../constants.ts";
|
|
6
|
+
import { withDb } from "../db/connection.ts";
|
|
7
|
+
import {
|
|
8
|
+
type ChunkInput,
|
|
9
|
+
deleteIndexedPath,
|
|
10
|
+
getIndexedPath,
|
|
11
|
+
listIndexedPaths,
|
|
12
|
+
rebuildSearchIndex,
|
|
13
|
+
upsertChunksForPath,
|
|
14
|
+
} from "../db/embeddings.ts";
|
|
15
|
+
import { logger } from "../utils/logger.ts";
|
|
16
|
+
import { chunkByTextSplit } from "./chunker.ts";
|
|
17
|
+
import { embed as defaultEmbed } from "./embedder.ts";
|
|
18
|
+
import { listContextDir } from "./store.ts";
|
|
19
|
+
|
|
20
|
+
/** Embed function shape — exported for tests that want to inject a fake. */
|
|
21
|
+
export type EmbedFn = (
|
|
22
|
+
texts: string[],
|
|
23
|
+
config: Required<BotholomewConfig>,
|
|
24
|
+
) => Promise<number[][]>;
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Walk every textual file under `<projectDir>/context/` and reconcile the
|
|
28
|
+
* disk-backed search index. Adds new files, replaces stale ones whose
|
|
29
|
+
* content_hash changed, and drops index rows for files that no longer exist.
|
|
30
|
+
*
|
|
31
|
+
* Uses the deterministic text splitter (`chunkByTextSplit`) — never the LLM
|
|
32
|
+
* chunker — so a fresh project with no API key still indexes successfully.
|
|
33
|
+
*/
|
|
34
|
+
export async function reindexContext(
|
|
35
|
+
projectDir: string,
|
|
36
|
+
config: Required<BotholomewConfig>,
|
|
37
|
+
dbPath: string,
|
|
38
|
+
opts: {
|
|
39
|
+
onProgress?: (msg: string) => void;
|
|
40
|
+
/** Override embed for tests; defaults to the real WASM embedder. */
|
|
41
|
+
embedFn?: EmbedFn;
|
|
42
|
+
} = {},
|
|
43
|
+
): Promise<ReindexSummary> {
|
|
44
|
+
const onProgress = opts.onProgress ?? (() => {});
|
|
45
|
+
const embed = opts.embedFn ?? defaultEmbed;
|
|
46
|
+
|
|
47
|
+
// 1. Walk context/ for every textual file along with its current
|
|
48
|
+
// (path, hash, mtime, size). Binary files are intentionally skipped —
|
|
49
|
+
// embeddings on bytes are meaningless and would just consume storage.
|
|
50
|
+
onProgress("scanning files");
|
|
51
|
+
const onDisk = await collectDiskFiles(projectDir);
|
|
52
|
+
|
|
53
|
+
// 2. Read the existing index so we can decide what's add / update / skip /
|
|
54
|
+
// remove without re-embedding files that haven't changed.
|
|
55
|
+
const indexed = await withDb(dbPath, listIndexedPaths);
|
|
56
|
+
const indexedByPath = new Map(indexed.map((r) => [r.path, r]));
|
|
57
|
+
|
|
58
|
+
let added = 0;
|
|
59
|
+
let updated = 0;
|
|
60
|
+
let unchanged = 0;
|
|
61
|
+
let removed = 0;
|
|
62
|
+
let chunksWritten = 0;
|
|
63
|
+
|
|
64
|
+
// 3. For each file on disk: skip if (path, hash) is already indexed and the
|
|
65
|
+
// on-disk content hash matches; otherwise (re)embed.
|
|
66
|
+
for (const file of onDisk) {
|
|
67
|
+
const existing = indexedByPath.get(file.path);
|
|
68
|
+
if (existing && existing.content_hash === file.contentHash) {
|
|
69
|
+
unchanged++;
|
|
70
|
+
indexedByPath.delete(file.path);
|
|
71
|
+
continue;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
onProgress(`embedding ${file.path}`);
|
|
75
|
+
const text = await readFile(
|
|
76
|
+
join(projectDir, CONTEXT_DIR, file.path),
|
|
77
|
+
"utf-8",
|
|
78
|
+
);
|
|
79
|
+
const chunks = chunkByTextSplit(text);
|
|
80
|
+
if (chunks.length === 0) {
|
|
81
|
+
// Empty/whitespace-only file. Drop any stale rows for it; otherwise
|
|
82
|
+
// there's nothing to index.
|
|
83
|
+
if (existing) {
|
|
84
|
+
await withDb(dbPath, (conn) => deleteIndexedPath(conn, file.path));
|
|
85
|
+
}
|
|
86
|
+
continue;
|
|
87
|
+
}
|
|
88
|
+
const vectors = await embed(
|
|
89
|
+
chunks.map((c) => c.content),
|
|
90
|
+
config,
|
|
91
|
+
);
|
|
92
|
+
const inputs: ChunkInput[] = chunks.map((c, i) => ({
|
|
93
|
+
chunk_index: c.index,
|
|
94
|
+
chunk_content: c.content,
|
|
95
|
+
embedding: vectors[i] ?? new Array(config.embedding_dimension).fill(0),
|
|
96
|
+
}));
|
|
97
|
+
await withDb(dbPath, (conn) =>
|
|
98
|
+
upsertChunksForPath(conn, {
|
|
99
|
+
path: file.path,
|
|
100
|
+
contentHash: file.contentHash,
|
|
101
|
+
mtimeMs: file.mtimeMs,
|
|
102
|
+
sizeBytes: file.sizeBytes,
|
|
103
|
+
chunks: inputs,
|
|
104
|
+
}),
|
|
105
|
+
);
|
|
106
|
+
if (existing) updated++;
|
|
107
|
+
else added++;
|
|
108
|
+
chunksWritten += inputs.length;
|
|
109
|
+
indexedByPath.delete(file.path);
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// 4. Anything left in indexedByPath is in the index but not on disk →
|
|
113
|
+
// delete its rows so search results don't surface ghost files.
|
|
114
|
+
for (const orphan of indexedByPath.keys()) {
|
|
115
|
+
await withDb(dbPath, (conn) => deleteIndexedPath(conn, orphan));
|
|
116
|
+
removed++;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
if (added + updated + removed > 0) {
|
|
120
|
+
onProgress("rebuilding FTS index");
|
|
121
|
+
await withDb(dbPath, rebuildSearchIndex);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
return { added, updated, unchanged, removed, chunksWritten };
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
export interface ReindexSummary {
|
|
128
|
+
added: number;
|
|
129
|
+
updated: number;
|
|
130
|
+
unchanged: number;
|
|
131
|
+
removed: number;
|
|
132
|
+
chunksWritten: number;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
interface DiskFile {
|
|
136
|
+
path: string;
|
|
137
|
+
contentHash: string;
|
|
138
|
+
mtimeMs: number;
|
|
139
|
+
sizeBytes: number;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
async function collectDiskFiles(projectDir: string): Promise<DiskFile[]> {
|
|
143
|
+
const entries = await listContextDir(projectDir, "", { recursive: true });
|
|
144
|
+
const out: DiskFile[] = [];
|
|
145
|
+
for (const e of entries) {
|
|
146
|
+
if (e.is_directory) continue;
|
|
147
|
+
if (!e.is_textual) continue;
|
|
148
|
+
const abs = join(projectDir, CONTEXT_DIR, e.path);
|
|
149
|
+
let st: Awaited<ReturnType<typeof stat>>;
|
|
150
|
+
try {
|
|
151
|
+
st = await stat(abs);
|
|
152
|
+
} catch (err) {
|
|
153
|
+
logger.warn(`reindex: skipping ${e.path}: ${err}`);
|
|
154
|
+
continue;
|
|
155
|
+
}
|
|
156
|
+
const buf = await readFile(abs);
|
|
157
|
+
const contentHash = createHash("sha256").update(buf).digest("hex");
|
|
158
|
+
out.push({
|
|
159
|
+
path: e.path,
|
|
160
|
+
contentHash,
|
|
161
|
+
mtimeMs: st.mtimeMs,
|
|
162
|
+
sizeBytes: st.size,
|
|
163
|
+
});
|
|
164
|
+
}
|
|
165
|
+
return out;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
/**
|
|
169
|
+
* Drop a single path from the index. Used by file/dir tool callers when
|
|
170
|
+
* they delete or move a file and want the index to reflect it immediately
|
|
171
|
+
* instead of waiting for the next reindex.
|
|
172
|
+
*/
|
|
173
|
+
export async function dropIndexedPath(
|
|
174
|
+
dbPath: string,
|
|
175
|
+
path: string,
|
|
176
|
+
): Promise<void> {
|
|
177
|
+
await withDb(dbPath, async (conn) => {
|
|
178
|
+
await deleteIndexedPath(conn, path);
|
|
179
|
+
await rebuildSearchIndex(conn);
|
|
180
|
+
});
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
export async function getIndexEntry(
|
|
184
|
+
dbPath: string,
|
|
185
|
+
path: string,
|
|
186
|
+
): Promise<{ chunks: number } | null> {
|
|
187
|
+
const row = await withDb(dbPath, (conn) => getIndexedPath(conn, path));
|
|
188
|
+
return row ? { chunks: row.chunk_count } : null;
|
|
189
|
+
}
|