botholomew 0.12.5 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. package/README.md +91 -68
  2. package/package.json +2 -2
  3. package/src/chat/agent.ts +42 -82
  4. package/src/chat/session.ts +29 -25
  5. package/src/commands/capabilities.ts +1 -1
  6. package/src/commands/context.ts +177 -926
  7. package/src/commands/db.ts +9 -13
  8. package/src/commands/init.ts +4 -1
  9. package/src/commands/nuke.ts +57 -90
  10. package/src/commands/schedule.ts +103 -124
  11. package/src/commands/skill.ts +2 -2
  12. package/src/commands/task.ts +86 -95
  13. package/src/commands/thread.ts +107 -112
  14. package/src/commands/worker.ts +88 -88
  15. package/src/constants.ts +93 -16
  16. package/src/context/capabilities.ts +10 -10
  17. package/src/context/fetcher.ts +9 -10
  18. package/src/context/reindex.ts +189 -0
  19. package/src/context/store.ts +630 -0
  20. package/src/db/doctor.ts +1 -8
  21. package/src/db/embeddings.ts +227 -175
  22. package/src/db/sql/19-disk_backed_index.sql +36 -0
  23. package/src/db/sql/20-drop_db_tables_for_files.sql +19 -0
  24. package/src/fs/atomic.ts +217 -0
  25. package/src/fs/compat.ts +86 -0
  26. package/src/fs/sandbox.ts +279 -0
  27. package/src/init/index.ts +69 -52
  28. package/src/init/templates.ts +1 -1
  29. package/src/mcpx/client.ts +1 -1
  30. package/src/schedules/schema.ts +19 -0
  31. package/src/schedules/store.ts +296 -0
  32. package/src/skills/commands.ts +1 -3
  33. package/src/tasks/schema.ts +47 -0
  34. package/src/tasks/store.ts +486 -0
  35. package/src/threads/store.ts +559 -0
  36. package/src/tools/capabilities/refresh.ts +42 -21
  37. package/src/tools/context/pipe.ts +15 -71
  38. package/src/tools/context/update-beliefs.ts +3 -3
  39. package/src/tools/context/update-goals.ts +3 -3
  40. package/src/tools/dir/create.ts +26 -23
  41. package/src/tools/dir/size.ts +46 -17
  42. package/src/tools/dir/tree.ts +73 -279
  43. package/src/tools/file/copy.ts +50 -24
  44. package/src/tools/file/count-lines.ts +34 -10
  45. package/src/tools/file/delete.ts +44 -23
  46. package/src/tools/file/edit.ts +39 -14
  47. package/src/tools/file/exists.ts +12 -26
  48. package/src/tools/file/info.ts +25 -85
  49. package/src/tools/file/move.ts +39 -24
  50. package/src/tools/file/read.ts +32 -80
  51. package/src/tools/file/write.ts +14 -91
  52. package/src/tools/registry.ts +3 -7
  53. package/src/tools/schedule/create.ts +2 -2
  54. package/src/tools/schedule/list.ts +7 -3
  55. package/src/tools/search/fuse.ts +12 -33
  56. package/src/tools/search/index.ts +36 -43
  57. package/src/tools/search/regexp.ts +29 -17
  58. package/src/tools/search/semantic.ts +137 -51
  59. package/src/tools/skill/delete.ts +1 -1
  60. package/src/tools/skill/list.ts +1 -1
  61. package/src/tools/skill/write.ts +1 -1
  62. package/src/tools/task/create.ts +41 -16
  63. package/src/tools/task/delete.ts +3 -3
  64. package/src/tools/task/list.ts +6 -3
  65. package/src/tools/task/update.ts +31 -9
  66. package/src/tools/task/view.ts +6 -6
  67. package/src/tools/thread/list.ts +2 -2
  68. package/src/tools/thread/search.ts +208 -0
  69. package/src/tools/thread/view.ts +50 -5
  70. package/src/tools/worker/spawn.ts +28 -14
  71. package/src/tui/App.tsx +12 -19
  72. package/src/tui/components/ContextPanel.tsx +83 -316
  73. package/src/tui/components/SchedulePanel.tsx +34 -48
  74. package/src/tui/components/StatusBar.tsx +15 -15
  75. package/src/tui/components/TaskPanel.tsx +34 -38
  76. package/src/tui/components/ThreadPanel.tsx +29 -38
  77. package/src/tui/components/WorkerPanel.tsx +21 -19
  78. package/src/tui/markdown.ts +2 -8
  79. package/src/utils/title.ts +5 -7
  80. package/src/utils/v7-date.ts +47 -0
  81. package/src/worker/heartbeat.ts +46 -24
  82. package/src/worker/index.ts +13 -15
  83. package/src/worker/llm.ts +30 -37
  84. package/src/worker/prompt.ts +19 -41
  85. package/src/worker/schedules.ts +48 -69
  86. package/src/worker/spawn.ts +11 -11
  87. package/src/worker/tick.ts +39 -43
  88. package/src/workers/store.ts +247 -0
  89. package/src/commands/tools.ts +0 -367
  90. package/src/context/describer.ts +0 -140
  91. package/src/context/drives.ts +0 -110
  92. package/src/context/ingest.ts +0 -162
  93. package/src/context/refresh.ts +0 -183
  94. package/src/db/context.ts +0 -637
  95. package/src/db/daemon-state.ts +0 -6
  96. package/src/db/reembed.ts +0 -113
  97. package/src/db/schedules.ts +0 -213
  98. package/src/db/tasks.ts +0 -347
  99. package/src/db/threads.ts +0 -276
  100. package/src/db/workers.ts +0 -212
  101. package/src/tools/context/list-drives.ts +0 -36
  102. package/src/tools/context/refresh.ts +0 -165
  103. package/src/tools/context/search.ts +0 -54
@@ -1,968 +1,219 @@
1
- import { readdir, stat } from "node:fs/promises";
2
- import { basename, join, resolve } from "node:path";
1
+ import { stat } from "node:fs/promises";
2
+ import { join } from "node:path";
3
3
  import ansis from "ansis";
4
4
  import type { Command } from "commander";
5
- import { isText } from "istextorbinary";
6
5
  import { createSpinner } from "nanospinner";
7
6
  import { loadConfig } from "../config/loader.ts";
8
- import type { BotholomewConfig } from "../config/schemas.ts";
9
- import { getDbPath } from "../constants.ts";
10
- import { generateDescription } from "../context/describer.ts";
7
+ import { CONTEXT_DIR, getDbPath } from "../constants.ts";
8
+ import { fetchUrl } from "../context/fetcher.ts";
9
+ import { reindexContext } from "../context/reindex.ts";
11
10
  import {
12
- type DriveTarget,
13
- detectDriveFromUrl,
14
- formatDriveRef,
15
- parseDriveRef,
16
- } from "../context/drives.ts";
17
- import { FetchFailureError, fetchUrl } from "../context/fetcher.ts";
18
- import {
19
- type PreparedIngestion,
20
- prepareIngestion,
21
- storeIngestion,
22
- } from "../context/ingest.ts";
23
- import { refreshContextItems } from "../context/refresh.ts";
24
- import { isUrl } from "../context/url-utils.ts";
25
- import type { DbConnection } from "../db/connection.ts";
26
- import {
27
- type ContextItem,
28
- createContextItemStrict,
29
- deleteContextItemByPath,
30
- getContextItem,
31
- getDistinctDirectories,
32
- listContextItems,
33
- listContextItemsByPrefix,
34
- PathConflictError,
35
- resolveContextItem,
36
- upsertContextItem,
37
- } from "../db/context.ts";
38
- import { getEmbeddingsForItem } from "../db/embeddings.ts";
39
- import { reembedMissingVectors } from "../db/reembed.ts";
11
+ buildTree,
12
+ fileExists,
13
+ listContextDir,
14
+ type TreeNode,
15
+ writeContextFile,
16
+ } from "../context/store.ts";
17
+ import { withDb } from "../db/connection.ts";
18
+ import { indexStats } from "../db/embeddings.ts";
19
+ import { migrate } from "../db/schema.ts";
40
20
  import { createMcpxClient } from "../mcpx/client.ts";
41
- import { searchTool } from "../tools/search/index.ts";
42
- import type { ToolContext } from "../tools/tool.ts";
43
21
  import { logger } from "../utils/logger.ts";
44
- import { registerContextToolSubcommands } from "./tools.ts";
45
- import { withDb } from "./with-db.ts";
46
-
47
- function fmtDate(d: Date): string {
48
- const pad = (n: number) => String(n).padStart(2, "0");
49
- return `${d.getFullYear()}-${pad(d.getMonth() + 1)}-${pad(d.getDate())} ${pad(d.getHours())}:${pad(d.getMinutes())}`;
50
- }
51
22
 
52
23
  export function registerContextCommand(program: Command) {
53
- const ctx = program.command("context").description("Manage context");
54
-
55
- ctx
56
- .command("list")
57
- .description("List context entries")
58
- .option("--drive <drive>", "filter by drive (e.g. disk, url, agent)")
59
- .option("--path <prefix>", "filter by path prefix (requires --drive)")
60
- .option(
61
- "--non-recursive",
62
- "list only immediate children; include directories",
63
- )
64
- .option("-l, --limit <n>", "max number of items", Number.parseInt)
65
- .option("-o, --offset <n>", "skip first N items", Number.parseInt)
66
- .action((opts) =>
67
- withDb(program, async (conn) => {
68
- if (opts.path && !opts.drive) {
69
- logger.error("--path requires --drive to scope the prefix.");
70
- process.exit(1);
71
- }
72
- if (opts.nonRecursive && !opts.drive) {
73
- logger.error(
74
- "--non-recursive requires --drive to scope the listing.",
75
- );
76
- process.exit(1);
77
- }
78
-
79
- const prefix = opts.path ?? (opts.nonRecursive ? "/" : null);
80
- const items = prefix
81
- ? await listContextItemsByPrefix(conn, opts.drive, prefix, {
82
- recursive: !opts.nonRecursive,
83
- limit: opts.limit,
84
- offset: opts.offset,
85
- })
86
- : await listContextItems(conn, {
87
- drive: opts.drive,
88
- limit: opts.limit,
89
- offset: opts.offset,
90
- });
91
-
92
- const dirs = opts.nonRecursive
93
- ? await getDistinctDirectories(conn, opts.drive, opts.path ?? "/")
94
- : [];
95
-
96
- if (items.length === 0 && dirs.length === 0) {
97
- logger.dim("No context entries found.");
98
- return;
99
- }
100
-
101
- const header = `${ansis.bold("ID".padEnd(36))} ${ansis.bold("Ref".padEnd(50))} ${"Title".padEnd(20)} ${"Description".padEnd(30)} ${"Type".padEnd(15)} ${"Updated".padEnd(18)} Indexed`;
102
- console.log(header);
103
- console.log("-".repeat(header.length));
104
-
105
- const dash = ansis.dim("—");
106
- for (const dir of dirs) {
107
- const ref = formatDriveRef({ drive: opts.drive, path: `${dir}/` });
108
- console.log(
109
- `${dash.padEnd(36)} ${ansis.cyan(ref.slice(0, 49).padEnd(50))} ${dash.padEnd(20)} ${dash.padEnd(30)} ${ansis.dim("directory".padEnd(15))} ${dash.padEnd(18)} ${dash}`,
110
- );
111
- }
112
-
113
- for (const item of items) {
114
- const indexed = item.indexed_at
115
- ? ansis.green("yes")
116
- : ansis.dim("no");
117
- const updated = ansis.dim(fmtDate(item.updated_at).padEnd(18));
118
- const desc = item.description
119
- ? ansis.dim(item.description.slice(0, 29).padEnd(30))
120
- : ansis.dim("".padEnd(30));
121
- const id = ansis.dim(item.id.padEnd(36));
122
- const ref = formatDriveRef(item);
123
- console.log(
124
- `${id} ${ref.slice(0, 49).padEnd(50)} ${item.title.slice(0, 19).padEnd(20)} ${desc} ${item.mime_type.slice(0, 14).padEnd(15)} ${updated} ${indexed}`,
125
- );
126
- }
127
-
128
- const totals: string[] = [];
129
- if (dirs.length > 0) {
130
- totals.push(`${dirs.length} dir(s)`);
131
- }
132
- totals.push(`${items.length} item(s)`);
133
- console.log(`\n${ansis.dim(totals.join(", "))}`);
134
- }),
24
+ const context = program
25
+ .command("context")
26
+ .description(
27
+ "Inspect and manage the on-disk context/ tree (the agent's knowledge store)",
135
28
  );
136
29
 
137
- ctx
138
- .command("add <paths...>")
139
- .description("Add files, directories, or URLs to context")
30
+ // ---- import --------------------------------------------------------------
31
+ context
32
+ .command("import <url>")
33
+ .description(
34
+ "Fetch a URL via MCP (Google Docs, Firecrawl, GitHub, etc.) and write the result into context/.",
35
+ )
140
36
  .option(
141
- "--on-conflict <policy>",
142
- "collision policy: error | overwrite | skip",
143
- "skip",
37
+ "-p, --path <path>",
38
+ "destination path under context/ (default: derived from the URL)",
144
39
  )
145
40
  .option(
146
- "--prompt-addition <text>",
147
- "extra guidance for the URL fetcher agent (e.g., auth notes, tool hints)",
41
+ "--prompt <text>",
42
+ "extra guidance passed to the LLM-driven fetcher (e.g. 'export as markdown')",
148
43
  )
149
- .action((paths: string[], opts) =>
150
- withDb(program, async (conn, dir) => {
151
- type ConflictPolicy = "error" | "overwrite" | "skip";
152
- const policy = opts.onConflict as ConflictPolicy;
153
- if (!["error", "overwrite", "skip"].includes(policy)) {
154
- logger.error(
155
- `Invalid --on-conflict value: ${policy} (must be error, overwrite, or skip)`,
156
- );
157
- process.exit(1);
158
- }
159
-
160
- type FileToAdd = { filePath: string; target: DriveTarget };
161
- type UrlToAdd = { url: string; target: DriveTarget | null };
162
- const filesToAdd: FileToAdd[] = [];
163
- const urlsToAdd: UrlToAdd[] = [];
164
- const spinner = createSpinner("Scanning paths...").start();
165
-
166
- for (const path of paths) {
167
- if (isUrl(path)) {
168
- // We defer drive detection until after the fetch — the MCP server
169
- // name is a useful hint — but pre-compute a best-guess from the URL
170
- // alone for dedup against existing (drive, path) rows.
171
- urlsToAdd.push({
172
- url: path,
173
- target: detectDriveFromUrl(path),
174
- });
175
- } else {
176
- const resolvedPath = resolve(path);
177
- let info: Awaited<ReturnType<typeof stat>>;
178
- try {
179
- info = await stat(resolvedPath);
180
- } catch {
181
- spinner.error({ text: `Path not found: ${resolvedPath}` });
182
- process.exit(1);
183
- }
184
-
185
- if (info.isDirectory()) {
186
- const entries = await walkDirectory(resolvedPath);
187
- for (const filePath of entries) {
188
- filesToAdd.push({
189
- filePath,
190
- target: { drive: "disk", path: filePath },
191
- });
192
- }
193
- } else {
194
- filesToAdd.push({
195
- filePath: resolvedPath,
196
- target: { drive: "disk", path: resolvedPath },
197
- });
198
- }
199
- }
200
- }
201
-
202
- const totalCount = filesToAdd.length + urlsToAdd.length;
203
- spinner.success({
204
- text: `Found ${totalCount} item(s) to add (${filesToAdd.length} file(s), ${urlsToAdd.length} URL(s)).`,
44
+ .option("--overwrite", "replace an existing file at the destination path")
45
+ .action(async (url: string, opts) => {
46
+ const dir = program.opts().dir;
47
+ const config = await loadConfig(dir);
48
+ const mcpxClient = await createMcpxClient(dir);
49
+ const spinner = createSpinner(`fetching ${url}`).start();
50
+ try {
51
+ const fetched = await fetchUrl(url, config, mcpxClient, opts.prompt);
52
+ spinner.update({ text: "writing to context/" });
53
+ const dest = opts.path ?? deriveContextPath(url, fetched.source);
54
+ await writeContextFile(dir, dest, fetched.content, {
55
+ onConflict: opts.overwrite ? "overwrite" : "error",
205
56
  });
206
-
207
- const config = await loadConfig(dir);
208
- const CONCURRENCY = 10;
209
-
210
- // Phase 0: (drive, path) dedup — items already in context are routed
211
- // per --on-conflict before we pay for the describe or fetch.
212
- type AlreadyInContext = {
213
- target: DriveTarget;
214
- existing: ContextItem;
215
- };
216
- const alreadyInContext: AlreadyInContext[] = [];
217
- const remainingFiles: FileToAdd[] = [];
218
- const remainingUrls: UrlToAdd[] = [];
219
-
220
- for (const f of filesToAdd) {
221
- const existing = await getContextItem(conn, f.target);
222
- if (existing) {
223
- alreadyInContext.push({ target: f.target, existing });
224
- } else {
225
- remainingFiles.push(f);
226
- }
227
- }
228
- for (const u of urlsToAdd) {
229
- if (!u.target) {
230
- remainingUrls.push(u);
231
- continue;
232
- }
233
- const existing = await getContextItem(conn, u.target);
234
- if (existing) {
235
- alreadyInContext.push({ target: u.target, existing });
236
- } else {
237
- remainingUrls.push(u);
238
- }
239
- }
240
-
241
- let refreshedCount = 0;
242
- let refreshedChunks = 0;
243
- const dedupSkipped: string[] = [];
244
-
245
- if (alreadyInContext.length > 0) {
246
- if (policy === "error") {
247
- logger.error(
248
- `${alreadyInContext.length} item(s) already in context:`,
249
- );
250
- for (const a of alreadyInContext) {
251
- console.log(
252
- ` ${ansis.red("✗")} ${formatDriveRef(a.target)} (id: ${a.existing.id})`,
253
- );
254
- }
255
- logger.dim(
256
- "Re-run with --on-conflict=skip to ignore these items or --on-conflict=overwrite to refresh them.",
257
- );
258
- process.exit(1);
259
- }
260
-
261
- if (policy === "skip") {
262
- for (const a of alreadyInContext) {
263
- logger.dim(`⊘ already in context: ${formatDriveRef(a.target)}`);
264
- dedupSkipped.push(formatDriveRef(a.target));
265
- }
266
- } else {
267
- const itemsToRefresh = alreadyInContext.map((a) => a.existing);
268
- const hasUrls = itemsToRefresh.some((i) => i.drive !== "disk");
269
- const mcpxClient = hasUrls ? await createMcpxClient(dir) : null;
270
-
271
- const refreshSpinner = createSpinner(
272
- `Refreshing 0/${itemsToRefresh.length} existing item(s)...`,
273
- ).start();
274
- const refreshResult = await refreshContextItems(
275
- conn,
276
- itemsToRefresh,
277
- config,
278
- mcpxClient,
279
- {
280
- onItemProgress: (done, total) => {
281
- refreshSpinner.update({
282
- text: `Refreshing ${done}/${total} existing item(s)...`,
283
- });
284
- },
285
- },
286
- );
287
- refreshSpinner.success({
288
- text: `Refreshed ${refreshResult.checked} existing item(s): ${refreshResult.updated} updated, ${refreshResult.unchanged} unchanged, ${refreshResult.missing} missing.`,
289
- });
290
-
291
- refreshedCount = refreshResult.updated + refreshResult.unchanged;
292
- refreshedChunks = refreshResult.chunks;
293
- for (const item of refreshResult.items) {
294
- if (item.status === "missing") {
295
- logger.warn(` Missing: ${item.ref}`);
296
- } else if (item.status === "error") {
297
- logger.warn(` Error refreshing ${item.ref}: ${item.error}`);
298
- }
299
- }
300
- }
301
- }
302
-
303
- // Phase 1: Upsert DB records (batched, parallel LLM descriptions)
304
- let addCompleted = 0;
305
- const itemIds: { id: string; target: DriveTarget }[] = [];
306
- const conflicts: { target: DriveTarget; existingId: string }[] = [];
307
- const skipped: string[] = [];
308
-
309
- if (remainingFiles.length > 0) {
310
- const fileSpinner = createSpinner(
311
- `Adding and describing 0/${remainingFiles.length} file(s)...`,
312
- ).start();
313
-
314
- for (let i = 0; i < remainingFiles.length; i += CONCURRENCY) {
315
- const batch = remainingFiles.slice(i, i + CONCURRENCY);
316
- const results = await Promise.all(
317
- batch.map(async ({ filePath, target }) => {
318
- const result = await addFile(
319
- conn,
320
- filePath,
321
- target,
322
- config,
323
- policy,
324
- );
325
- addCompleted++;
326
- fileSpinner.update({
327
- text: `Adding and describing ${addCompleted}/${remainingFiles.length} file(s)...`,
328
- });
329
- return result;
330
- }),
331
- );
332
- for (const r of results) {
333
- if (!r) continue;
334
- if (r.kind === "added") {
335
- itemIds.push({ id: r.id, target: r.target });
336
- } else if (r.kind === "conflict") {
337
- conflicts.push({ target: r.target, existingId: r.existingId });
338
- } else if (r.kind === "skipped") {
339
- skipped.push(formatDriveRef(r.target));
340
- }
341
- }
342
- }
343
-
344
- fileSpinner.success({
345
- text: `Added and described ${addCompleted} file(s).`,
346
- });
347
- }
348
-
349
- if (remainingUrls.length > 0) {
350
- const mcpxClient = await createMcpxClient(dir);
351
- if (!mcpxClient) {
352
- logger.dim(
353
- "No MCP servers configured — remote fetches will use basic HTTP.",
354
- );
355
- }
356
-
357
- let urlIdx = 0;
358
- let urlAdded = 0;
359
- for (const { url } of remainingUrls) {
360
- urlIdx++;
361
- console.log(
362
- `\n${ansis.bold(`[${urlIdx}/${remainingUrls.length}]`)} ${ansis.cyan(url)}`,
363
- );
364
- const result = await addUrl(
365
- conn,
366
- config,
367
- url,
368
- mcpxClient,
369
- opts.promptAddition,
370
- policy,
371
- );
372
- if (result.ok) {
373
- urlAdded++;
374
- itemIds.push({ id: result.id, target: result.target });
375
- console.log(
376
- ` ${ansis.green("✔")} stored at ${formatDriveRef(result.target)}`,
377
- );
378
- } else if (result.kind === "conflict") {
379
- conflicts.push({
380
- target: result.target,
381
- existingId: result.existingId,
382
- });
383
- console.log(
384
- ` ${ansis.red("✗")} path already exists: ${formatDriveRef(result.target)}`,
385
- );
386
- } else if (result.kind === "skipped") {
387
- skipped.push(formatDriveRef(result.target));
388
- console.log(
389
- ` ${ansis.yellow("⊘")} skipped (path exists): ${formatDriveRef(result.target)}`,
390
- );
391
- } else if (result.actionable) {
392
- console.log(
393
- ` ${ansis.red("✗")} ${ansis.bold("action required:")}`,
394
- );
395
- for (const line of result.error.split("\n")) {
396
- console.log(` ${ansis.yellow(line)}`);
397
- }
398
- } else {
399
- console.log(
400
- ` ${ansis.red("✗")} failed to fetch: ${result.error}`,
401
- );
402
- }
403
- }
404
-
405
- const urlSummary = `Added ${urlAdded}/${remainingUrls.length} URL(s).`;
406
- if (urlAdded === remainingUrls.length) {
407
- console.log(`\n${ansis.green("✔")} ${urlSummary}`);
408
- } else if (urlAdded === 0) {
409
- console.log(`\n${ansis.red("✗")} ${urlSummary}`);
410
- } else {
411
- console.log(`\n${ansis.yellow("⚠")} ${urlSummary}`);
412
- }
413
- }
414
-
415
- if (conflicts.length > 0) {
416
- logger.error(
417
- `${conflicts.length} (drive, path) collision(s) — nothing written for these items:`,
418
- );
419
- for (const c of conflicts) {
420
- console.log(
421
- ` ${ansis.red("✗")} ${formatDriveRef(c.target)} (existing id: ${c.existingId})`,
422
- );
423
- }
424
- }
425
-
426
- skipped.push(...dedupSkipped);
427
-
428
- if (itemIds.length === 0) {
429
- const msg = buildSummary({
430
- added: itemIds.length,
431
- refreshed: refreshedCount,
432
- skipped: skipped.length,
433
- chunks: refreshedChunks,
434
- totalCount,
435
- handled: itemIds.length + refreshedCount + skipped.length,
436
- });
437
- if (conflicts.length > 0) {
438
- logger.error(msg);
439
- process.exit(1);
440
- }
441
- if (itemIds.length + skipped.length + refreshedCount >= totalCount) {
442
- logger.success(msg);
443
- process.exit(0);
444
- } else if (itemIds.length === 0 && refreshedCount === 0) {
445
- logger.error(msg);
446
- process.exit(1);
447
- } else {
448
- logger.warn(msg);
449
- process.exit(1);
450
- }
451
- }
452
-
453
- let completed = 0;
454
- const embedSpinner = createSpinner(
455
- `Embedding 0/${itemIds.length} items...`,
456
- ).start();
457
-
458
- const prepared: PreparedIngestion[] = [];
459
- for (let i = 0; i < itemIds.length; i += CONCURRENCY) {
460
- const batch = itemIds.slice(i, i + CONCURRENCY);
461
- const results = await Promise.all(
462
- batch.map(async ({ id }) => {
463
- const result = await prepareIngestion(conn, id, config);
464
- completed++;
465
- embedSpinner.update({
466
- text: `Embedding ${completed}/${itemIds.length} items...`,
467
- });
468
- return result;
469
- }),
470
- );
471
- for (const r of results) {
472
- if (r) prepared.push(r);
473
- }
474
- }
475
- embedSpinner.success({
476
- text: `Embedded ${prepared.length} item(s).`,
57
+ spinner.success({
58
+ text: `imported ${fetched.content.length} bytes → ${ansis.bold(`context/${dest}`)} (source: ${fetched.source ?? "http"})`,
477
59
  });
478
-
479
- let chunks = 0;
480
- let filesAdded = 0;
481
- let filesUpdated = 0;
482
- for (const p of prepared) {
483
- const result = await storeIngestion(conn, p);
484
- chunks += result.chunks;
485
- if (result.isUpdate) filesUpdated++;
486
- else filesAdded++;
487
- }
488
-
489
- const summary = buildSummary({
490
- added: filesAdded,
491
- updated: filesUpdated,
492
- refreshed: refreshedCount,
493
- skipped: skipped.length,
494
- chunks: chunks + refreshedChunks,
495
- totalCount,
496
- handled: itemIds.length + refreshedCount + skipped.length,
60
+ } catch (err) {
61
+ spinner.error({
62
+ text: `import failed: ${err instanceof Error ? err.message : String(err)}`,
497
63
  });
498
- if (conflicts.length > 0) {
499
- logger.error(summary);
500
- process.exit(1);
501
- }
502
- if (itemIds.length + skipped.length + refreshedCount >= totalCount) {
503
- logger.success(summary);
504
- process.exit(0);
505
- } else {
506
- logger.warn(summary);
507
- process.exit(1);
508
- }
509
- }),
510
- );
64
+ process.exit(1);
65
+ } finally {
66
+ await mcpxClient?.close();
67
+ }
68
+ });
511
69
 
512
- const search = ctx
513
- .command("search")
514
- .description("Search context entries (hybrid regexp + semantic)")
515
- .argument(
516
- "[query]",
517
- "natural-language query (semantic + BM25). Combine with --pattern for fused regexp + semantic ranking.",
518
- )
519
- .option("-k, --top-k <n>", "max results", Number.parseInt, 20)
520
- .option(
521
- "--pattern <regex>",
522
- "regex pattern (regexp side). May be combined with [query] to fuse signals.",
70
+ // ---- reindex -------------------------------------------------------------
71
+ context
72
+ .command("reindex")
73
+ .description(
74
+ "Walk context/ and reconcile the search index: embed new files, re-embed changed ones, drop rows for removed ones.",
523
75
  )
524
- .option("--drive <drive>", "restrict to a single drive")
525
- .option("--path <path>", "directory prefix within drive (requires --drive)")
526
- .option("--glob <glob>", "filter results to files whose basename matches")
527
- .option("--ignore-case", "case-insensitive regex")
76
+ .action(async () => {
77
+ const dir = program.opts().dir;
78
+ const config = await loadConfig(dir);
79
+ const dbPath = getDbPath(dir);
80
+ // The migrate() call ensures the index DB is initialized, including
81
+ // the context_index table from migration 19, before we try to write.
82
+ await withDb(dbPath, migrate);
83
+ const spinner = createSpinner("reindexing").start();
84
+ const summary = await reindexContext(dir, config, dbPath, {
85
+ onProgress: (msg) => spinner.update({ text: msg }),
86
+ });
87
+ const parts = [
88
+ `${summary.added} added`,
89
+ `${summary.updated} updated`,
90
+ `${summary.unchanged} unchanged`,
91
+ `${summary.removed} removed`,
92
+ `${summary.chunksWritten} chunks written`,
93
+ ];
94
+ spinner.success({ text: parts.join(", ") });
95
+ });
96
+
97
+ // ---- tree ---------------------------------------------------------------
98
+ context
99
+ .command("tree [path]")
100
+ .description("Render the context/ tree (or a subdirectory).")
528
101
  .option(
529
- "--context <n>",
530
- "context lines around each regexp hit",
102
+ "-d, --max-depth <n>",
103
+ "max directory depth to render",
531
104
  Number.parseInt,
105
+ 10,
532
106
  )
533
- .action((query, opts) =>
534
- withDb(program, async (conn, dir) => {
535
- if (!query && !opts.pattern) {
536
- search.help();
537
- return;
538
- }
539
- const config = await loadConfig(dir);
540
- const toolCtx: ToolContext = {
541
- conn,
542
- dbPath: getDbPath(dir),
543
- projectDir: dir,
544
- config,
545
- mcpxClient: null,
546
- };
547
- const result = await searchTool.execute(
548
- {
549
- query,
550
- pattern: opts.pattern,
551
- drive: opts.drive,
552
- path: opts.path,
553
- glob: opts.glob,
554
- ignore_case: opts.ignoreCase,
555
- context: opts.context,
556
- max_results: opts.topK,
557
- },
558
- toolCtx,
559
- );
560
-
561
- if (result.is_error) {
562
- logger.error(result.message ?? "Search failed");
563
- process.exit(1);
564
- }
565
-
566
- if (result.matches.length === 0) {
567
- logger.dim("No results found.");
568
- return;
569
- }
570
-
571
- for (const [i, m] of result.matches.entries()) {
572
- const tagColor =
573
- m.match_type === "both"
574
- ? ansis.green
575
- : m.match_type === "regexp"
576
- ? ansis.yellow
577
- : ansis.cyan;
578
- const tag = tagColor(`[${m.match_type}]`);
579
- const location = m.line != null ? `${m.ref}:${m.line}` : m.ref;
580
- console.log(
581
- `${ansis.bold(`${i + 1}.`)} ${tag} ${ansis.cyan(location)} ${ansis.dim(`score=${m.score.toFixed(4)}`)}`,
582
- );
583
- const snippet = m.content.slice(0, 200).replace(/\n/g, " ");
584
- if (snippet) console.log(` ${snippet}`);
585
- console.log("");
586
- }
587
- }),
588
- );
589
-
590
- ctx
591
- .command("delete <ref>")
592
- .description("Delete a context entry (UUID or drive:/path)")
593
- .action((ref: string) =>
594
- withDb(program, async (conn) => {
595
- const item = await resolveContextItem(conn, ref);
596
- if (!item) {
597
- logger.error(`Context entry not found: ${ref}`);
598
- process.exit(1);
599
- }
600
- await deleteContextItemByPath(conn, {
601
- drive: item.drive,
602
- path: item.path,
603
- });
604
- logger.success(`Deleted context entry: ${formatDriveRef(item)}`);
605
- }),
606
- );
607
- ctx
608
- .command("chunks <ref>")
609
- .description("Show chunks and embeddings for a context entry")
610
- .action((ref: string) =>
611
- withDb(program, async (conn) => {
612
- const item = await resolveContextItem(conn, ref);
613
- if (!item) {
614
- logger.error(`Context entry not found: ${ref}`);
615
- process.exit(1);
616
- }
617
-
618
- if (!item.indexed_at) {
619
- logger.dim("Item has not been indexed yet.");
620
- return;
621
- }
622
-
623
- const embeddings = await getEmbeddingsForItem(conn, item.id);
624
-
625
- console.log(ansis.bold(item.title));
626
- console.log(` Ref: ${formatDriveRef(item)}`);
627
- console.log(` Indexed: ${fmtDate(item.indexed_at)}`);
628
- console.log(` Chunks: ${embeddings.length}`);
629
- console.log("");
630
-
631
- for (const emb of embeddings) {
632
- const preview = emb.chunk_content
633
- ? emb.chunk_content.slice(0, 200).replace(/\n/g, " ") +
634
- (emb.chunk_content.length > 200 ? "..." : "")
635
- : ansis.dim("(no content)");
636
- const chars = emb.chunk_content?.length ?? 0;
637
-
638
- console.log(
639
- `${ansis.bold(`Chunk ${emb.chunk_index}`)} ${ansis.dim(`${chars} chars, ${emb.embedding.length} dims`)}`,
640
- );
641
- console.log(` ${preview}`);
642
- console.log("");
643
- }
644
-
645
- const totalChars = embeddings.reduce(
646
- (sum, e) => sum + (e.chunk_content?.length ?? 0),
647
- 0,
648
- );
649
- console.log(
650
- ansis.dim(`${embeddings.length} chunk(s), ${totalChars} total chars`),
651
- );
652
- }),
653
- );
107
+ .action(async (path: string | undefined, opts) => {
108
+ const dir = program.opts().dir;
109
+ const node = await buildTree(dir, path ?? "", opts.maxDepth);
110
+ console.log(renderTreeAnsi(node));
111
+ });
654
112
 
655
- ctx
656
- .command("refresh [refs...]")
113
+ // ---- stats --------------------------------------------------------------
114
+ context
115
+ .command("stats")
657
116
  .description(
658
- "Re-import items from their origin (disk / URL / MCP) and re-embed if content changed",
117
+ "Counts and sizes for files under context/ and rows in the search index.",
659
118
  )
660
- .option("--all", "refresh every item (except those on drive=agent)")
661
- .action((refs: string[], opts: { all?: boolean }) =>
662
- withDb(program, async (conn, dir) => {
663
- const items = await resolveItems(conn, refs, !!opts.all);
664
- if (items.length === 0) {
665
- logger.error("No matching context entries found.");
666
- process.exit(1);
667
- }
668
-
669
- const refreshable = items.filter((i) => i.drive !== "agent");
670
- if (refreshable.length === 0) {
671
- logger.dim("No refreshable items (everything is on drive=agent).");
672
- return;
673
- }
674
- if (refreshable.length < items.length) {
675
- logger.dim(
676
- `Skipping ${items.length - refreshable.length} agent-drive item(s) with no external origin.`,
677
- );
119
+ .action(async () => {
120
+ const dir = program.opts().dir;
121
+ const dbPath = getDbPath(dir);
122
+ const exists = await fileExists(dir, "");
123
+ if (!exists) {
124
+ logger.dim(`context/ does not exist under ${dir}`);
125
+ return;
126
+ }
127
+ const entries = await listContextDir(dir, "", { recursive: true });
128
+ let files = 0;
129
+ let textual = 0;
130
+ let bytes = 0;
131
+ for (const e of entries) {
132
+ if (e.is_directory) continue;
133
+ files++;
134
+ if (e.is_textual) textual++;
135
+ try {
136
+ const st = await stat(join(dir, CONTEXT_DIR, e.path));
137
+ bytes += st.size;
138
+ } catch {
139
+ // file vanished mid-walk — skip
678
140
  }
679
-
680
- const config = await loadConfig(dir);
681
-
682
- const hasUrls = refreshable.some((i) => i.drive !== "disk");
683
- const mcpxClient = hasUrls ? await createMcpxClient(dir) : null;
684
-
685
- const refreshSpinner = createSpinner(
686
- `Refreshing 0/${refreshable.length} items...`,
687
- ).start();
688
- const embedSpinner = createSpinner("Embedding 0 item(s)...");
689
-
690
- const result = await refreshContextItems(
691
- conn,
692
- refreshable,
693
- config,
694
- mcpxClient,
695
- {
696
- onItemProgress: (done, total) => {
697
- refreshSpinner.update({
698
- text: `Refreshing ${done}/${total} items...`,
699
- });
700
- },
701
- onEmbedProgress: (done, total) => {
702
- if (done === 1) embedSpinner.start();
703
- embedSpinner.update({
704
- text: `Embedding ${done}/${total} item(s)...`,
705
- });
706
- },
707
- },
141
+ }
142
+ const idx = await withDb(dbPath, async (conn) => {
143
+ await migrate(conn);
144
+ return indexStats(conn);
145
+ });
146
+ const rows = [
147
+ ["files", String(files)],
148
+ ["textual", String(textual)],
149
+ ["binary", String(files - textual)],
150
+ ["bytes on disk", formatBytes(bytes)],
151
+ ["indexed paths", String(idx.paths)],
152
+ ["index chunks", String(idx.chunks)],
153
+ ["embedded chunks", String(idx.embedded)],
154
+ ];
155
+ const labelWidth = Math.max(...rows.map((r) => r[0]?.length ?? 0));
156
+ for (const [label, value] of rows) {
157
+ console.log(
158
+ ` ${ansis.dim((label ?? "").padEnd(labelWidth))} ${value}`,
708
159
  );
709
-
710
- refreshSpinner.success({
711
- text: `Checked ${result.checked} item(s): ${result.updated} updated, ${result.unchanged} unchanged, ${result.missing} missing.`,
712
- });
713
-
714
- for (const item of result.items) {
715
- if (item.status === "missing") {
716
- logger.warn(` Missing: ${item.ref}`);
717
- } else if (item.status === "error") {
718
- logger.warn(` Error refreshing ${item.ref}: ${item.error}`);
719
- }
720
- }
721
-
722
- if (result.reembedded > 0) {
723
- embedSpinner.success({
724
- text: `Embedded ${result.reembedded} item(s).`,
725
- });
726
- logger.success(
727
- `Refreshed ${result.updated} item(s), ${result.chunks} chunk(s) re-indexed.`,
728
- );
729
- }
730
- }),
731
- );
732
-
733
- ctx
734
- .command("reembed")
735
- .description(
736
- "Recompute every embedding using the configured local model. Run this after upgrading or after changing embedding_model.",
737
- )
738
- .action(() =>
739
- withDb(program, async (_conn, dir) => {
740
- const config = await loadConfig(dir);
741
- const dbPath = getDbPath(dir);
742
- await reembedMissingVectors(dbPath, config, { mode: "all" });
743
- }),
744
- );
745
-
746
- registerContextToolSubcommands(ctx);
747
- }
748
-
749
- async function resolveItems(
750
- conn: DbConnection,
751
- refs: string[],
752
- all: boolean,
753
- ): Promise<ContextItem[]> {
754
- if (!all && refs.length === 0) {
755
- logger.error("Provide at least one ref or use --all.");
756
- process.exit(1);
757
- }
758
- if (all) return listContextItems(conn);
759
-
760
- const byId = new Map<string, ContextItem>();
761
- const unresolved: string[] = [];
762
- for (const r of refs) {
763
- const matched = await resolveOne(conn, r);
764
- if (matched.length === 0) {
765
- unresolved.push(r);
766
- continue;
767
- }
768
- for (const item of matched) byId.set(item.id, item);
769
- }
770
- for (const r of unresolved) logger.warn(` Not found: ${r}`);
771
- return [...byId.values()];
772
- }
773
-
774
- async function resolveOne(
775
- conn: DbConnection,
776
- ref: string,
777
- ): Promise<ContextItem[]> {
778
- const exact = await resolveContextItem(conn, ref);
779
- if (exact) return [exact];
780
- // Prefix expansion: only valid for `drive:/path` form.
781
- const parsed = parseDriveRef(ref);
782
- if (parsed) {
783
- return listContextItemsByPrefix(conn, parsed.drive, parsed.path, {
784
- recursive: true,
160
+ }
785
161
  });
786
- }
787
- return [];
788
162
  }
789
163
 
790
- type ConflictPolicy = "error" | "overwrite" | "skip";
791
-
792
- function buildSummary(args: {
793
- added: number;
794
- updated?: number;
795
- refreshed: number;
796
- skipped: number;
797
- chunks: number;
798
- totalCount: number;
799
- handled?: number;
800
- }): string {
801
- const parts: string[] = [];
802
- if (args.added > 0) parts.push(`${args.added} added`);
803
- if (args.updated && args.updated > 0) parts.push(`${args.updated} updated`);
804
- if (args.refreshed > 0) parts.push(`${args.refreshed} refreshed`);
805
- if (args.skipped > 0) parts.push(`${args.skipped} skipped`);
806
- const body = parts.length > 0 ? parts.join(", ") : "0 added";
807
- const handled = args.handled ?? args.added + args.refreshed + args.skipped;
808
- return `${body} — ${args.chunks} chunk(s) indexed (${handled}/${args.totalCount} item(s)).`;
164
+ /**
165
+ * Pick a sensible default destination under context/ when the user didn't
166
+ * supply --path. Strategy:
167
+ * - "<source>/<slugified-url>.md" for MCP-served fetches (e.g. google-docs/...)
168
+ * - "url/<slugified-url>.md" for raw HTTP fallbacks
169
+ */
170
+ function deriveContextPath(url: string, source: string | null): string {
171
+ const slug = slugifyUrl(url);
172
+ const root = source ?? "url";
173
+ return `${root}/${slug}.md`;
809
174
  }
810
175
 
811
- type AddFileResult =
812
- | { kind: "added"; id: string; target: DriveTarget }
813
- | { kind: "skipped"; target: DriveTarget }
814
- | { kind: "conflict"; target: DriveTarget; existingId: string }
815
- | { kind: "failed"; target: DriveTarget; error: string };
816
-
817
- async function addFile(
818
- conn: DbConnection,
819
- filePath: string,
820
- target: DriveTarget,
821
- config: Required<BotholomewConfig>,
822
- policy: ConflictPolicy,
823
- ): Promise<AddFileResult | null> {
176
+ function slugifyUrl(url: string): string {
177
+ let parsed: URL;
824
178
  try {
825
- if (policy !== "overwrite") {
826
- const existing = await getContextItem(conn, target);
827
- if (existing) {
828
- if (policy === "skip") {
829
- logger.dim(` ⊘ skipped (exists): ${formatDriveRef(target)}`);
830
- return { kind: "skipped", target };
831
- }
832
- return {
833
- kind: "conflict",
834
- target,
835
- existingId: existing.id,
836
- };
837
- }
838
- }
839
-
840
- const bunFile = Bun.file(filePath);
841
- const mimeType = bunFile.type.split(";")[0] || "application/octet-stream";
842
- const filename = basename(filePath);
843
- const textual = isText(filename) !== false;
844
- const content = textual ? await bunFile.text() : null;
845
-
846
- const description = await generateDescription(config, {
847
- filename,
848
- mimeType,
849
- content,
850
- filePath,
851
- });
852
-
853
- const itemParams = {
854
- title: filename,
855
- description,
856
- content: content ?? undefined,
857
- mimeType,
858
- drive: target.drive,
859
- path: target.path,
860
- isTextual: textual,
861
- } as const;
862
-
863
- const item =
864
- policy === "overwrite"
865
- ? await upsertContextItem(conn, itemParams)
866
- : await createContextItemStrict(conn, itemParams);
867
-
868
- return textual && content ? { kind: "added", id: item.id, target } : null;
869
- } catch (err) {
870
- if (err instanceof PathConflictError) {
871
- return { kind: "conflict", target, existingId: err.existingId };
872
- }
873
- logger.warn(` ! ${formatDriveRef(target)}: ${err}`);
874
- return { kind: "failed", target, error: String(err) };
179
+ parsed = new URL(url);
180
+ } catch {
181
+ return url.replace(/[^a-z0-9]+/gi, "-").slice(0, 80);
875
182
  }
183
+ const path = parsed.pathname.replace(/^\/+|\/+$/g, "").replace(/\//g, "_");
184
+ const base = path || parsed.hostname;
185
+ return `${parsed.hostname}_${base}`
186
+ .replace(/[^a-z0-9._-]+/gi, "-")
187
+ .replace(/-+/g, "-")
188
+ .slice(0, 80);
876
189
  }
877
190
 
878
- type AddUrlResult =
879
- | { ok: true; id: string; target: DriveTarget }
880
- | { ok: false; kind: "conflict"; target: DriveTarget; existingId: string }
881
- | { ok: false; kind: "skipped"; target: DriveTarget }
882
- | { ok: false; kind: "fetch-failed"; error: string; actionable: boolean };
883
-
884
- async function addUrl(
885
- conn: DbConnection,
886
- config: Required<BotholomewConfig>,
887
- url: string,
888
- mcpxClient: Awaited<ReturnType<typeof createMcpxClient>>,
889
- promptAddition: string | undefined,
890
- policy: ConflictPolicy,
891
- ): Promise<AddUrlResult> {
892
- try {
893
- const fetched = await fetchUrl(url, config, mcpxClient, promptAddition);
894
- const target: DriveTarget = { drive: fetched.drive, path: fetched.path };
895
-
896
- if (policy !== "overwrite") {
897
- const existing = await getContextItem(conn, target);
898
- if (existing) {
899
- if (policy === "skip") return { ok: false, kind: "skipped", target };
900
- return { ok: false, kind: "conflict", target, existingId: existing.id };
901
- }
902
- }
903
-
904
- const description = await generateDescription(config, {
905
- filename: new URL(url).hostname,
906
- mimeType: fetched.mimeType,
907
- content: fetched.content,
191
+ function renderTreeAnsi(
192
+ node: TreeNode,
193
+ prefix = "",
194
+ isLast = true,
195
+ isRoot = true,
196
+ ): string {
197
+ const lines: string[] = [];
198
+ const connector = isRoot ? "" : isLast ? "└── " : "├── ";
199
+ const label = node.is_directory
200
+ ? ansis.blue(node.name === "." ? "context/" : `${node.name}/`)
201
+ : node.name;
202
+ lines.push(`${prefix}${connector}${label}`);
203
+ if (node.is_directory && node.children) {
204
+ const childPrefix = isRoot ? "" : prefix + (isLast ? " " : "│ ");
205
+ const children = node.children;
206
+ children.forEach((c, i) => {
207
+ const last = i === children.length - 1;
208
+ lines.push(renderTreeAnsi(c, childPrefix, last, false));
908
209
  });
909
-
910
- const itemParams = {
911
- title: fetched.title,
912
- description,
913
- content: fetched.content,
914
- mimeType: fetched.mimeType,
915
- drive: target.drive,
916
- path: target.path,
917
- isTextual: true,
918
- sourceUrl: fetched.sourceUrl,
919
- };
920
-
921
- const item =
922
- policy === "overwrite"
923
- ? await upsertContextItem(conn, itemParams)
924
- : await createContextItemStrict(conn, itemParams);
925
-
926
- return { ok: true, id: item.id, target };
927
- } catch (err) {
928
- if (err instanceof PathConflictError) {
929
- return {
930
- ok: false,
931
- kind: "conflict",
932
- target: { drive: err.drive, path: err.path },
933
- existingId: err.existingId,
934
- };
935
- }
936
- if (err instanceof FetchFailureError) {
937
- return {
938
- ok: false,
939
- kind: "fetch-failed",
940
- error: err.userMessage,
941
- actionable: true,
942
- };
943
- }
944
- return {
945
- ok: false,
946
- kind: "fetch-failed",
947
- error: String(err),
948
- actionable: false,
949
- };
950
210
  }
211
+ return lines.join("\n");
951
212
  }
952
213
 
953
- async function walkDirectory(dirPath: string): Promise<string[]> {
954
- const files: string[] = [];
955
- const entries = await readdir(dirPath, { withFileTypes: true });
956
-
957
- for (const entry of entries) {
958
- const fullPath = join(dirPath, entry.name);
959
- if (entry.isDirectory()) {
960
- if (entry.name.startsWith(".")) continue; // skip hidden dirs
961
- files.push(...(await walkDirectory(fullPath)));
962
- } else if (entry.isFile()) {
963
- files.push(fullPath);
964
- }
965
- }
966
-
967
- return files;
214
+ function formatBytes(n: number): string {
215
+ if (n === 0) return "0 B";
216
+ const units = ["B", "KB", "MB", "GB"];
217
+ const i = Math.floor(Math.log(n) / Math.log(1024));
218
+ return `${(n / 1024 ** i).toFixed(i > 0 ? 1 : 0)} ${units[i]}`;
968
219
  }