botholomew 0.12.5 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. package/README.md +91 -68
  2. package/package.json +2 -2
  3. package/src/chat/agent.ts +42 -82
  4. package/src/chat/session.ts +29 -25
  5. package/src/commands/capabilities.ts +1 -1
  6. package/src/commands/context.ts +177 -926
  7. package/src/commands/db.ts +9 -13
  8. package/src/commands/init.ts +4 -1
  9. package/src/commands/nuke.ts +57 -90
  10. package/src/commands/schedule.ts +103 -124
  11. package/src/commands/skill.ts +2 -2
  12. package/src/commands/task.ts +86 -95
  13. package/src/commands/thread.ts +107 -112
  14. package/src/commands/worker.ts +88 -88
  15. package/src/constants.ts +93 -16
  16. package/src/context/capabilities.ts +10 -10
  17. package/src/context/fetcher.ts +9 -10
  18. package/src/context/reindex.ts +189 -0
  19. package/src/context/store.ts +630 -0
  20. package/src/db/doctor.ts +1 -8
  21. package/src/db/embeddings.ts +227 -175
  22. package/src/db/sql/19-disk_backed_index.sql +36 -0
  23. package/src/db/sql/20-drop_db_tables_for_files.sql +19 -0
  24. package/src/fs/atomic.ts +217 -0
  25. package/src/fs/compat.ts +86 -0
  26. package/src/fs/sandbox.ts +279 -0
  27. package/src/init/index.ts +69 -52
  28. package/src/init/templates.ts +1 -1
  29. package/src/mcpx/client.ts +1 -1
  30. package/src/schedules/schema.ts +19 -0
  31. package/src/schedules/store.ts +296 -0
  32. package/src/skills/commands.ts +1 -3
  33. package/src/tasks/schema.ts +47 -0
  34. package/src/tasks/store.ts +486 -0
  35. package/src/threads/store.ts +559 -0
  36. package/src/tools/capabilities/refresh.ts +42 -21
  37. package/src/tools/context/pipe.ts +15 -71
  38. package/src/tools/context/update-beliefs.ts +3 -3
  39. package/src/tools/context/update-goals.ts +3 -3
  40. package/src/tools/dir/create.ts +26 -23
  41. package/src/tools/dir/size.ts +46 -17
  42. package/src/tools/dir/tree.ts +73 -279
  43. package/src/tools/file/copy.ts +50 -24
  44. package/src/tools/file/count-lines.ts +34 -10
  45. package/src/tools/file/delete.ts +44 -23
  46. package/src/tools/file/edit.ts +39 -14
  47. package/src/tools/file/exists.ts +12 -26
  48. package/src/tools/file/info.ts +25 -85
  49. package/src/tools/file/move.ts +39 -24
  50. package/src/tools/file/read.ts +32 -80
  51. package/src/tools/file/write.ts +14 -91
  52. package/src/tools/registry.ts +3 -7
  53. package/src/tools/schedule/create.ts +2 -2
  54. package/src/tools/schedule/list.ts +7 -3
  55. package/src/tools/search/fuse.ts +12 -33
  56. package/src/tools/search/index.ts +36 -43
  57. package/src/tools/search/regexp.ts +29 -17
  58. package/src/tools/search/semantic.ts +137 -51
  59. package/src/tools/skill/delete.ts +1 -1
  60. package/src/tools/skill/list.ts +1 -1
  61. package/src/tools/skill/write.ts +1 -1
  62. package/src/tools/task/create.ts +41 -16
  63. package/src/tools/task/delete.ts +3 -3
  64. package/src/tools/task/list.ts +6 -3
  65. package/src/tools/task/update.ts +31 -9
  66. package/src/tools/task/view.ts +6 -6
  67. package/src/tools/thread/list.ts +2 -2
  68. package/src/tools/thread/search.ts +208 -0
  69. package/src/tools/thread/view.ts +50 -5
  70. package/src/tools/worker/spawn.ts +28 -14
  71. package/src/tui/App.tsx +12 -19
  72. package/src/tui/components/ContextPanel.tsx +83 -316
  73. package/src/tui/components/SchedulePanel.tsx +34 -48
  74. package/src/tui/components/StatusBar.tsx +15 -15
  75. package/src/tui/components/TaskPanel.tsx +34 -38
  76. package/src/tui/components/ThreadPanel.tsx +29 -38
  77. package/src/tui/components/WorkerPanel.tsx +21 -19
  78. package/src/tui/markdown.ts +2 -8
  79. package/src/utils/title.ts +5 -7
  80. package/src/utils/v7-date.ts +47 -0
  81. package/src/worker/heartbeat.ts +46 -24
  82. package/src/worker/index.ts +13 -15
  83. package/src/worker/llm.ts +30 -37
  84. package/src/worker/prompt.ts +19 -41
  85. package/src/worker/schedules.ts +48 -69
  86. package/src/worker/spawn.ts +11 -11
  87. package/src/worker/tick.ts +39 -43
  88. package/src/workers/store.ts +247 -0
  89. package/src/commands/tools.ts +0 -367
  90. package/src/context/describer.ts +0 -140
  91. package/src/context/drives.ts +0 -110
  92. package/src/context/ingest.ts +0 -162
  93. package/src/context/refresh.ts +0 -183
  94. package/src/db/context.ts +0 -637
  95. package/src/db/daemon-state.ts +0 -6
  96. package/src/db/reembed.ts +0 -113
  97. package/src/db/schedules.ts +0 -213
  98. package/src/db/tasks.ts +0 -347
  99. package/src/db/threads.ts +0 -276
  100. package/src/db/workers.ts +0 -212
  101. package/src/tools/context/list-drives.ts +0 -36
  102. package/src/tools/context/refresh.ts +0 -165
  103. package/src/tools/context/search.ts +0 -54
@@ -0,0 +1,189 @@
1
+ import { createHash } from "node:crypto";
2
+ import { readFile, stat } from "node:fs/promises";
3
+ import { join } from "node:path";
4
+ import type { BotholomewConfig } from "../config/schemas.ts";
5
+ import { CONTEXT_DIR } from "../constants.ts";
6
+ import { withDb } from "../db/connection.ts";
7
+ import {
8
+ type ChunkInput,
9
+ deleteIndexedPath,
10
+ getIndexedPath,
11
+ listIndexedPaths,
12
+ rebuildSearchIndex,
13
+ upsertChunksForPath,
14
+ } from "../db/embeddings.ts";
15
+ import { logger } from "../utils/logger.ts";
16
+ import { chunkByTextSplit } from "./chunker.ts";
17
+ import { embed as defaultEmbed } from "./embedder.ts";
18
+ import { listContextDir } from "./store.ts";
19
+
20
+ /** Embed function shape — exported for tests that want to inject a fake. */
21
+ export type EmbedFn = (
22
+ texts: string[],
23
+ config: Required<BotholomewConfig>,
24
+ ) => Promise<number[][]>;
25
+
26
+ /**
27
+ * Walk every textual file under `<projectDir>/context/` and reconcile the
28
+ * disk-backed search index. Adds new files, replaces stale ones whose
29
+ * content_hash changed, and drops index rows for files that no longer exist.
30
+ *
31
+ * Uses the deterministic text splitter (`chunkByTextSplit`) — never the LLM
32
+ * chunker — so a fresh project with no API key still indexes successfully.
33
+ */
34
+ export async function reindexContext(
35
+ projectDir: string,
36
+ config: Required<BotholomewConfig>,
37
+ dbPath: string,
38
+ opts: {
39
+ onProgress?: (msg: string) => void;
40
+ /** Override embed for tests; defaults to the real WASM embedder. */
41
+ embedFn?: EmbedFn;
42
+ } = {},
43
+ ): Promise<ReindexSummary> {
44
+ const onProgress = opts.onProgress ?? (() => {});
45
+ const embed = opts.embedFn ?? defaultEmbed;
46
+
47
+ // 1. Walk context/ for every textual file along with its current
48
+ // (path, hash, mtime, size). Binary files are intentionally skipped —
49
+ // embeddings on bytes are meaningless and would just consume storage.
50
+ onProgress("scanning files");
51
+ const onDisk = await collectDiskFiles(projectDir);
52
+
53
+ // 2. Read the existing index so we can decide what's add / update / skip /
54
+ // remove without re-embedding files that haven't changed.
55
+ const indexed = await withDb(dbPath, listIndexedPaths);
56
+ const indexedByPath = new Map(indexed.map((r) => [r.path, r]));
57
+
58
+ let added = 0;
59
+ let updated = 0;
60
+ let unchanged = 0;
61
+ let removed = 0;
62
+ let chunksWritten = 0;
63
+
64
+ // 3. For each file on disk: skip if (path, hash) is already indexed and the
65
+ // on-disk content hash matches; otherwise (re)embed.
66
+ for (const file of onDisk) {
67
+ const existing = indexedByPath.get(file.path);
68
+ if (existing && existing.content_hash === file.contentHash) {
69
+ unchanged++;
70
+ indexedByPath.delete(file.path);
71
+ continue;
72
+ }
73
+
74
+ onProgress(`embedding ${file.path}`);
75
+ const text = await readFile(
76
+ join(projectDir, CONTEXT_DIR, file.path),
77
+ "utf-8",
78
+ );
79
+ const chunks = chunkByTextSplit(text);
80
+ if (chunks.length === 0) {
81
+ // Empty/whitespace-only file. Drop any stale rows for it; otherwise
82
+ // there's nothing to index.
83
+ if (existing) {
84
+ await withDb(dbPath, (conn) => deleteIndexedPath(conn, file.path));
85
+ }
86
+ continue;
87
+ }
88
+ const vectors = await embed(
89
+ chunks.map((c) => c.content),
90
+ config,
91
+ );
92
+ const inputs: ChunkInput[] = chunks.map((c, i) => ({
93
+ chunk_index: c.index,
94
+ chunk_content: c.content,
95
+ embedding: vectors[i] ?? new Array(config.embedding_dimension).fill(0),
96
+ }));
97
+ await withDb(dbPath, (conn) =>
98
+ upsertChunksForPath(conn, {
99
+ path: file.path,
100
+ contentHash: file.contentHash,
101
+ mtimeMs: file.mtimeMs,
102
+ sizeBytes: file.sizeBytes,
103
+ chunks: inputs,
104
+ }),
105
+ );
106
+ if (existing) updated++;
107
+ else added++;
108
+ chunksWritten += inputs.length;
109
+ indexedByPath.delete(file.path);
110
+ }
111
+
112
+ // 4. Anything left in indexedByPath is in the index but not on disk →
113
+ // delete its rows so search results don't surface ghost files.
114
+ for (const orphan of indexedByPath.keys()) {
115
+ await withDb(dbPath, (conn) => deleteIndexedPath(conn, orphan));
116
+ removed++;
117
+ }
118
+
119
+ if (added + updated + removed > 0) {
120
+ onProgress("rebuilding FTS index");
121
+ await withDb(dbPath, rebuildSearchIndex);
122
+ }
123
+
124
+ return { added, updated, unchanged, removed, chunksWritten };
125
+ }
126
+
127
+ export interface ReindexSummary {
128
+ added: number;
129
+ updated: number;
130
+ unchanged: number;
131
+ removed: number;
132
+ chunksWritten: number;
133
+ }
134
+
135
+ interface DiskFile {
136
+ path: string;
137
+ contentHash: string;
138
+ mtimeMs: number;
139
+ sizeBytes: number;
140
+ }
141
+
142
+ async function collectDiskFiles(projectDir: string): Promise<DiskFile[]> {
143
+ const entries = await listContextDir(projectDir, "", { recursive: true });
144
+ const out: DiskFile[] = [];
145
+ for (const e of entries) {
146
+ if (e.is_directory) continue;
147
+ if (!e.is_textual) continue;
148
+ const abs = join(projectDir, CONTEXT_DIR, e.path);
149
+ let st: Awaited<ReturnType<typeof stat>>;
150
+ try {
151
+ st = await stat(abs);
152
+ } catch (err) {
153
+ logger.warn(`reindex: skipping ${e.path}: ${err}`);
154
+ continue;
155
+ }
156
+ const buf = await readFile(abs);
157
+ const contentHash = createHash("sha256").update(buf).digest("hex");
158
+ out.push({
159
+ path: e.path,
160
+ contentHash,
161
+ mtimeMs: st.mtimeMs,
162
+ sizeBytes: st.size,
163
+ });
164
+ }
165
+ return out;
166
+ }
167
+
168
+ /**
169
+ * Drop a single path from the index. Used by file/dir tool callers when
170
+ * they delete or move a file and want the index to reflect it immediately
171
+ * instead of waiting for the next reindex.
172
+ */
173
+ export async function dropIndexedPath(
174
+ dbPath: string,
175
+ path: string,
176
+ ): Promise<void> {
177
+ await withDb(dbPath, async (conn) => {
178
+ await deleteIndexedPath(conn, path);
179
+ await rebuildSearchIndex(conn);
180
+ });
181
+ }
182
+
183
+ export async function getIndexEntry(
184
+ dbPath: string,
185
+ path: string,
186
+ ): Promise<{ chunks: number } | null> {
187
+ const row = await withDb(dbPath, (conn) => getIndexedPath(conn, path));
188
+ return row ? { chunks: row.chunk_count } : null;
189
+ }