botholomew 0.12.5 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +91 -68
- package/package.json +2 -2
- package/src/chat/agent.ts +42 -82
- package/src/chat/session.ts +29 -25
- package/src/commands/capabilities.ts +1 -1
- package/src/commands/context.ts +177 -926
- package/src/commands/db.ts +9 -13
- package/src/commands/init.ts +4 -1
- package/src/commands/nuke.ts +57 -90
- package/src/commands/schedule.ts +103 -124
- package/src/commands/skill.ts +2 -2
- package/src/commands/task.ts +86 -95
- package/src/commands/thread.ts +107 -112
- package/src/commands/worker.ts +88 -88
- package/src/constants.ts +93 -16
- package/src/context/capabilities.ts +10 -10
- package/src/context/fetcher.ts +9 -10
- package/src/context/reindex.ts +189 -0
- package/src/context/store.ts +630 -0
- package/src/db/doctor.ts +1 -8
- package/src/db/embeddings.ts +227 -175
- package/src/db/sql/19-disk_backed_index.sql +36 -0
- package/src/db/sql/20-drop_db_tables_for_files.sql +19 -0
- package/src/fs/atomic.ts +217 -0
- package/src/fs/compat.ts +86 -0
- package/src/fs/sandbox.ts +279 -0
- package/src/init/index.ts +69 -52
- package/src/init/templates.ts +1 -1
- package/src/mcpx/client.ts +1 -1
- package/src/schedules/schema.ts +19 -0
- package/src/schedules/store.ts +296 -0
- package/src/skills/commands.ts +1 -3
- package/src/tasks/schema.ts +47 -0
- package/src/tasks/store.ts +486 -0
- package/src/threads/store.ts +559 -0
- package/src/tools/capabilities/refresh.ts +42 -21
- package/src/tools/context/pipe.ts +15 -71
- package/src/tools/context/update-beliefs.ts +3 -3
- package/src/tools/context/update-goals.ts +3 -3
- package/src/tools/dir/create.ts +26 -23
- package/src/tools/dir/size.ts +46 -17
- package/src/tools/dir/tree.ts +73 -279
- package/src/tools/file/copy.ts +50 -24
- package/src/tools/file/count-lines.ts +34 -10
- package/src/tools/file/delete.ts +44 -23
- package/src/tools/file/edit.ts +39 -14
- package/src/tools/file/exists.ts +12 -26
- package/src/tools/file/info.ts +25 -85
- package/src/tools/file/move.ts +39 -24
- package/src/tools/file/read.ts +32 -80
- package/src/tools/file/write.ts +14 -91
- package/src/tools/registry.ts +3 -7
- package/src/tools/schedule/create.ts +2 -2
- package/src/tools/schedule/list.ts +7 -3
- package/src/tools/search/fuse.ts +12 -33
- package/src/tools/search/index.ts +36 -43
- package/src/tools/search/regexp.ts +29 -17
- package/src/tools/search/semantic.ts +137 -51
- package/src/tools/skill/delete.ts +1 -1
- package/src/tools/skill/list.ts +1 -1
- package/src/tools/skill/write.ts +1 -1
- package/src/tools/task/create.ts +41 -16
- package/src/tools/task/delete.ts +3 -3
- package/src/tools/task/list.ts +6 -3
- package/src/tools/task/update.ts +31 -9
- package/src/tools/task/view.ts +6 -6
- package/src/tools/thread/list.ts +2 -2
- package/src/tools/thread/search.ts +208 -0
- package/src/tools/thread/view.ts +50 -5
- package/src/tools/worker/spawn.ts +28 -14
- package/src/tui/App.tsx +12 -19
- package/src/tui/components/ContextPanel.tsx +83 -316
- package/src/tui/components/SchedulePanel.tsx +34 -48
- package/src/tui/components/StatusBar.tsx +15 -15
- package/src/tui/components/TaskPanel.tsx +34 -38
- package/src/tui/components/ThreadPanel.tsx +29 -38
- package/src/tui/components/WorkerPanel.tsx +21 -19
- package/src/tui/markdown.ts +2 -8
- package/src/utils/title.ts +5 -7
- package/src/utils/v7-date.ts +47 -0
- package/src/worker/heartbeat.ts +46 -24
- package/src/worker/index.ts +13 -15
- package/src/worker/llm.ts +30 -37
- package/src/worker/prompt.ts +19 -41
- package/src/worker/schedules.ts +48 -69
- package/src/worker/spawn.ts +11 -11
- package/src/worker/tick.ts +39 -43
- package/src/workers/store.ts +247 -0
- package/src/commands/tools.ts +0 -367
- package/src/context/describer.ts +0 -140
- package/src/context/drives.ts +0 -110
- package/src/context/ingest.ts +0 -162
- package/src/context/refresh.ts +0 -183
- package/src/db/context.ts +0 -637
- package/src/db/daemon-state.ts +0 -6
- package/src/db/reembed.ts +0 -113
- package/src/db/schedules.ts +0 -213
- package/src/db/tasks.ts +0 -347
- package/src/db/threads.ts +0 -276
- package/src/db/workers.ts +0 -212
- package/src/tools/context/list-drives.ts +0 -36
- package/src/tools/context/refresh.ts +0 -165
- package/src/tools/context/search.ts +0 -54
package/src/commands/context.ts
CHANGED
|
@@ -1,968 +1,219 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import {
|
|
1
|
+
import { stat } from "node:fs/promises";
|
|
2
|
+
import { join } from "node:path";
|
|
3
3
|
import ansis from "ansis";
|
|
4
4
|
import type { Command } from "commander";
|
|
5
|
-
import { isText } from "istextorbinary";
|
|
6
5
|
import { createSpinner } from "nanospinner";
|
|
7
6
|
import { loadConfig } from "../config/loader.ts";
|
|
8
|
-
import
|
|
9
|
-
import {
|
|
10
|
-
import {
|
|
7
|
+
import { CONTEXT_DIR, getDbPath } from "../constants.ts";
|
|
8
|
+
import { fetchUrl } from "../context/fetcher.ts";
|
|
9
|
+
import { reindexContext } from "../context/reindex.ts";
|
|
11
10
|
import {
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
import {
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
storeIngestion,
|
|
22
|
-
} from "../context/ingest.ts";
|
|
23
|
-
import { refreshContextItems } from "../context/refresh.ts";
|
|
24
|
-
import { isUrl } from "../context/url-utils.ts";
|
|
25
|
-
import type { DbConnection } from "../db/connection.ts";
|
|
26
|
-
import {
|
|
27
|
-
type ContextItem,
|
|
28
|
-
createContextItemStrict,
|
|
29
|
-
deleteContextItemByPath,
|
|
30
|
-
getContextItem,
|
|
31
|
-
getDistinctDirectories,
|
|
32
|
-
listContextItems,
|
|
33
|
-
listContextItemsByPrefix,
|
|
34
|
-
PathConflictError,
|
|
35
|
-
resolveContextItem,
|
|
36
|
-
upsertContextItem,
|
|
37
|
-
} from "../db/context.ts";
|
|
38
|
-
import { getEmbeddingsForItem } from "../db/embeddings.ts";
|
|
39
|
-
import { reembedMissingVectors } from "../db/reembed.ts";
|
|
11
|
+
buildTree,
|
|
12
|
+
fileExists,
|
|
13
|
+
listContextDir,
|
|
14
|
+
type TreeNode,
|
|
15
|
+
writeContextFile,
|
|
16
|
+
} from "../context/store.ts";
|
|
17
|
+
import { withDb } from "../db/connection.ts";
|
|
18
|
+
import { indexStats } from "../db/embeddings.ts";
|
|
19
|
+
import { migrate } from "../db/schema.ts";
|
|
40
20
|
import { createMcpxClient } from "../mcpx/client.ts";
|
|
41
|
-
import { searchTool } from "../tools/search/index.ts";
|
|
42
|
-
import type { ToolContext } from "../tools/tool.ts";
|
|
43
21
|
import { logger } from "../utils/logger.ts";
|
|
44
|
-
import { registerContextToolSubcommands } from "./tools.ts";
|
|
45
|
-
import { withDb } from "./with-db.ts";
|
|
46
|
-
|
|
47
|
-
function fmtDate(d: Date): string {
|
|
48
|
-
const pad = (n: number) => String(n).padStart(2, "0");
|
|
49
|
-
return `${d.getFullYear()}-${pad(d.getMonth() + 1)}-${pad(d.getDate())} ${pad(d.getHours())}:${pad(d.getMinutes())}`;
|
|
50
|
-
}
|
|
51
22
|
|
|
52
23
|
export function registerContextCommand(program: Command) {
|
|
53
|
-
const
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
.description("List context entries")
|
|
58
|
-
.option("--drive <drive>", "filter by drive (e.g. disk, url, agent)")
|
|
59
|
-
.option("--path <prefix>", "filter by path prefix (requires --drive)")
|
|
60
|
-
.option(
|
|
61
|
-
"--non-recursive",
|
|
62
|
-
"list only immediate children; include directories",
|
|
63
|
-
)
|
|
64
|
-
.option("-l, --limit <n>", "max number of items", Number.parseInt)
|
|
65
|
-
.option("-o, --offset <n>", "skip first N items", Number.parseInt)
|
|
66
|
-
.action((opts) =>
|
|
67
|
-
withDb(program, async (conn) => {
|
|
68
|
-
if (opts.path && !opts.drive) {
|
|
69
|
-
logger.error("--path requires --drive to scope the prefix.");
|
|
70
|
-
process.exit(1);
|
|
71
|
-
}
|
|
72
|
-
if (opts.nonRecursive && !opts.drive) {
|
|
73
|
-
logger.error(
|
|
74
|
-
"--non-recursive requires --drive to scope the listing.",
|
|
75
|
-
);
|
|
76
|
-
process.exit(1);
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
const prefix = opts.path ?? (opts.nonRecursive ? "/" : null);
|
|
80
|
-
const items = prefix
|
|
81
|
-
? await listContextItemsByPrefix(conn, opts.drive, prefix, {
|
|
82
|
-
recursive: !opts.nonRecursive,
|
|
83
|
-
limit: opts.limit,
|
|
84
|
-
offset: opts.offset,
|
|
85
|
-
})
|
|
86
|
-
: await listContextItems(conn, {
|
|
87
|
-
drive: opts.drive,
|
|
88
|
-
limit: opts.limit,
|
|
89
|
-
offset: opts.offset,
|
|
90
|
-
});
|
|
91
|
-
|
|
92
|
-
const dirs = opts.nonRecursive
|
|
93
|
-
? await getDistinctDirectories(conn, opts.drive, opts.path ?? "/")
|
|
94
|
-
: [];
|
|
95
|
-
|
|
96
|
-
if (items.length === 0 && dirs.length === 0) {
|
|
97
|
-
logger.dim("No context entries found.");
|
|
98
|
-
return;
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
const header = `${ansis.bold("ID".padEnd(36))} ${ansis.bold("Ref".padEnd(50))} ${"Title".padEnd(20)} ${"Description".padEnd(30)} ${"Type".padEnd(15)} ${"Updated".padEnd(18)} Indexed`;
|
|
102
|
-
console.log(header);
|
|
103
|
-
console.log("-".repeat(header.length));
|
|
104
|
-
|
|
105
|
-
const dash = ansis.dim("—");
|
|
106
|
-
for (const dir of dirs) {
|
|
107
|
-
const ref = formatDriveRef({ drive: opts.drive, path: `${dir}/` });
|
|
108
|
-
console.log(
|
|
109
|
-
`${dash.padEnd(36)} ${ansis.cyan(ref.slice(0, 49).padEnd(50))} ${dash.padEnd(20)} ${dash.padEnd(30)} ${ansis.dim("directory".padEnd(15))} ${dash.padEnd(18)} ${dash}`,
|
|
110
|
-
);
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
for (const item of items) {
|
|
114
|
-
const indexed = item.indexed_at
|
|
115
|
-
? ansis.green("yes")
|
|
116
|
-
: ansis.dim("no");
|
|
117
|
-
const updated = ansis.dim(fmtDate(item.updated_at).padEnd(18));
|
|
118
|
-
const desc = item.description
|
|
119
|
-
? ansis.dim(item.description.slice(0, 29).padEnd(30))
|
|
120
|
-
: ansis.dim("".padEnd(30));
|
|
121
|
-
const id = ansis.dim(item.id.padEnd(36));
|
|
122
|
-
const ref = formatDriveRef(item);
|
|
123
|
-
console.log(
|
|
124
|
-
`${id} ${ref.slice(0, 49).padEnd(50)} ${item.title.slice(0, 19).padEnd(20)} ${desc} ${item.mime_type.slice(0, 14).padEnd(15)} ${updated} ${indexed}`,
|
|
125
|
-
);
|
|
126
|
-
}
|
|
127
|
-
|
|
128
|
-
const totals: string[] = [];
|
|
129
|
-
if (dirs.length > 0) {
|
|
130
|
-
totals.push(`${dirs.length} dir(s)`);
|
|
131
|
-
}
|
|
132
|
-
totals.push(`${items.length} item(s)`);
|
|
133
|
-
console.log(`\n${ansis.dim(totals.join(", "))}`);
|
|
134
|
-
}),
|
|
24
|
+
const context = program
|
|
25
|
+
.command("context")
|
|
26
|
+
.description(
|
|
27
|
+
"Inspect and manage the on-disk context/ tree (the agent's knowledge store)",
|
|
135
28
|
);
|
|
136
29
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
.
|
|
30
|
+
// ---- import --------------------------------------------------------------
|
|
31
|
+
context
|
|
32
|
+
.command("import <url>")
|
|
33
|
+
.description(
|
|
34
|
+
"Fetch a URL via MCP (Google Docs, Firecrawl, GitHub, etc.) and write the result into context/.",
|
|
35
|
+
)
|
|
140
36
|
.option(
|
|
141
|
-
"
|
|
142
|
-
"
|
|
143
|
-
"skip",
|
|
37
|
+
"-p, --path <path>",
|
|
38
|
+
"destination path under context/ (default: derived from the URL)",
|
|
144
39
|
)
|
|
145
40
|
.option(
|
|
146
|
-
"--prompt
|
|
147
|
-
"extra guidance
|
|
41
|
+
"--prompt <text>",
|
|
42
|
+
"extra guidance passed to the LLM-driven fetcher (e.g. 'export as markdown')",
|
|
148
43
|
)
|
|
149
|
-
.
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
type UrlToAdd = { url: string; target: DriveTarget | null };
|
|
162
|
-
const filesToAdd: FileToAdd[] = [];
|
|
163
|
-
const urlsToAdd: UrlToAdd[] = [];
|
|
164
|
-
const spinner = createSpinner("Scanning paths...").start();
|
|
165
|
-
|
|
166
|
-
for (const path of paths) {
|
|
167
|
-
if (isUrl(path)) {
|
|
168
|
-
// We defer drive detection until after the fetch — the MCP server
|
|
169
|
-
// name is a useful hint — but pre-compute a best-guess from the URL
|
|
170
|
-
// alone for dedup against existing (drive, path) rows.
|
|
171
|
-
urlsToAdd.push({
|
|
172
|
-
url: path,
|
|
173
|
-
target: detectDriveFromUrl(path),
|
|
174
|
-
});
|
|
175
|
-
} else {
|
|
176
|
-
const resolvedPath = resolve(path);
|
|
177
|
-
let info: Awaited<ReturnType<typeof stat>>;
|
|
178
|
-
try {
|
|
179
|
-
info = await stat(resolvedPath);
|
|
180
|
-
} catch {
|
|
181
|
-
spinner.error({ text: `Path not found: ${resolvedPath}` });
|
|
182
|
-
process.exit(1);
|
|
183
|
-
}
|
|
184
|
-
|
|
185
|
-
if (info.isDirectory()) {
|
|
186
|
-
const entries = await walkDirectory(resolvedPath);
|
|
187
|
-
for (const filePath of entries) {
|
|
188
|
-
filesToAdd.push({
|
|
189
|
-
filePath,
|
|
190
|
-
target: { drive: "disk", path: filePath },
|
|
191
|
-
});
|
|
192
|
-
}
|
|
193
|
-
} else {
|
|
194
|
-
filesToAdd.push({
|
|
195
|
-
filePath: resolvedPath,
|
|
196
|
-
target: { drive: "disk", path: resolvedPath },
|
|
197
|
-
});
|
|
198
|
-
}
|
|
199
|
-
}
|
|
200
|
-
}
|
|
201
|
-
|
|
202
|
-
const totalCount = filesToAdd.length + urlsToAdd.length;
|
|
203
|
-
spinner.success({
|
|
204
|
-
text: `Found ${totalCount} item(s) to add (${filesToAdd.length} file(s), ${urlsToAdd.length} URL(s)).`,
|
|
44
|
+
.option("--overwrite", "replace an existing file at the destination path")
|
|
45
|
+
.action(async (url: string, opts) => {
|
|
46
|
+
const dir = program.opts().dir;
|
|
47
|
+
const config = await loadConfig(dir);
|
|
48
|
+
const mcpxClient = await createMcpxClient(dir);
|
|
49
|
+
const spinner = createSpinner(`fetching ${url}`).start();
|
|
50
|
+
try {
|
|
51
|
+
const fetched = await fetchUrl(url, config, mcpxClient, opts.prompt);
|
|
52
|
+
spinner.update({ text: "writing to context/" });
|
|
53
|
+
const dest = opts.path ?? deriveContextPath(url, fetched.source);
|
|
54
|
+
await writeContextFile(dir, dest, fetched.content, {
|
|
55
|
+
onConflict: opts.overwrite ? "overwrite" : "error",
|
|
205
56
|
});
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
const CONCURRENCY = 10;
|
|
209
|
-
|
|
210
|
-
// Phase 0: (drive, path) dedup — items already in context are routed
|
|
211
|
-
// per --on-conflict before we pay for the describe or fetch.
|
|
212
|
-
type AlreadyInContext = {
|
|
213
|
-
target: DriveTarget;
|
|
214
|
-
existing: ContextItem;
|
|
215
|
-
};
|
|
216
|
-
const alreadyInContext: AlreadyInContext[] = [];
|
|
217
|
-
const remainingFiles: FileToAdd[] = [];
|
|
218
|
-
const remainingUrls: UrlToAdd[] = [];
|
|
219
|
-
|
|
220
|
-
for (const f of filesToAdd) {
|
|
221
|
-
const existing = await getContextItem(conn, f.target);
|
|
222
|
-
if (existing) {
|
|
223
|
-
alreadyInContext.push({ target: f.target, existing });
|
|
224
|
-
} else {
|
|
225
|
-
remainingFiles.push(f);
|
|
226
|
-
}
|
|
227
|
-
}
|
|
228
|
-
for (const u of urlsToAdd) {
|
|
229
|
-
if (!u.target) {
|
|
230
|
-
remainingUrls.push(u);
|
|
231
|
-
continue;
|
|
232
|
-
}
|
|
233
|
-
const existing = await getContextItem(conn, u.target);
|
|
234
|
-
if (existing) {
|
|
235
|
-
alreadyInContext.push({ target: u.target, existing });
|
|
236
|
-
} else {
|
|
237
|
-
remainingUrls.push(u);
|
|
238
|
-
}
|
|
239
|
-
}
|
|
240
|
-
|
|
241
|
-
let refreshedCount = 0;
|
|
242
|
-
let refreshedChunks = 0;
|
|
243
|
-
const dedupSkipped: string[] = [];
|
|
244
|
-
|
|
245
|
-
if (alreadyInContext.length > 0) {
|
|
246
|
-
if (policy === "error") {
|
|
247
|
-
logger.error(
|
|
248
|
-
`${alreadyInContext.length} item(s) already in context:`,
|
|
249
|
-
);
|
|
250
|
-
for (const a of alreadyInContext) {
|
|
251
|
-
console.log(
|
|
252
|
-
` ${ansis.red("✗")} ${formatDriveRef(a.target)} (id: ${a.existing.id})`,
|
|
253
|
-
);
|
|
254
|
-
}
|
|
255
|
-
logger.dim(
|
|
256
|
-
"Re-run with --on-conflict=skip to ignore these items or --on-conflict=overwrite to refresh them.",
|
|
257
|
-
);
|
|
258
|
-
process.exit(1);
|
|
259
|
-
}
|
|
260
|
-
|
|
261
|
-
if (policy === "skip") {
|
|
262
|
-
for (const a of alreadyInContext) {
|
|
263
|
-
logger.dim(`⊘ already in context: ${formatDriveRef(a.target)}`);
|
|
264
|
-
dedupSkipped.push(formatDriveRef(a.target));
|
|
265
|
-
}
|
|
266
|
-
} else {
|
|
267
|
-
const itemsToRefresh = alreadyInContext.map((a) => a.existing);
|
|
268
|
-
const hasUrls = itemsToRefresh.some((i) => i.drive !== "disk");
|
|
269
|
-
const mcpxClient = hasUrls ? await createMcpxClient(dir) : null;
|
|
270
|
-
|
|
271
|
-
const refreshSpinner = createSpinner(
|
|
272
|
-
`Refreshing 0/${itemsToRefresh.length} existing item(s)...`,
|
|
273
|
-
).start();
|
|
274
|
-
const refreshResult = await refreshContextItems(
|
|
275
|
-
conn,
|
|
276
|
-
itemsToRefresh,
|
|
277
|
-
config,
|
|
278
|
-
mcpxClient,
|
|
279
|
-
{
|
|
280
|
-
onItemProgress: (done, total) => {
|
|
281
|
-
refreshSpinner.update({
|
|
282
|
-
text: `Refreshing ${done}/${total} existing item(s)...`,
|
|
283
|
-
});
|
|
284
|
-
},
|
|
285
|
-
},
|
|
286
|
-
);
|
|
287
|
-
refreshSpinner.success({
|
|
288
|
-
text: `Refreshed ${refreshResult.checked} existing item(s): ${refreshResult.updated} updated, ${refreshResult.unchanged} unchanged, ${refreshResult.missing} missing.`,
|
|
289
|
-
});
|
|
290
|
-
|
|
291
|
-
refreshedCount = refreshResult.updated + refreshResult.unchanged;
|
|
292
|
-
refreshedChunks = refreshResult.chunks;
|
|
293
|
-
for (const item of refreshResult.items) {
|
|
294
|
-
if (item.status === "missing") {
|
|
295
|
-
logger.warn(` Missing: ${item.ref}`);
|
|
296
|
-
} else if (item.status === "error") {
|
|
297
|
-
logger.warn(` Error refreshing ${item.ref}: ${item.error}`);
|
|
298
|
-
}
|
|
299
|
-
}
|
|
300
|
-
}
|
|
301
|
-
}
|
|
302
|
-
|
|
303
|
-
// Phase 1: Upsert DB records (batched, parallel LLM descriptions)
|
|
304
|
-
let addCompleted = 0;
|
|
305
|
-
const itemIds: { id: string; target: DriveTarget }[] = [];
|
|
306
|
-
const conflicts: { target: DriveTarget; existingId: string }[] = [];
|
|
307
|
-
const skipped: string[] = [];
|
|
308
|
-
|
|
309
|
-
if (remainingFiles.length > 0) {
|
|
310
|
-
const fileSpinner = createSpinner(
|
|
311
|
-
`Adding and describing 0/${remainingFiles.length} file(s)...`,
|
|
312
|
-
).start();
|
|
313
|
-
|
|
314
|
-
for (let i = 0; i < remainingFiles.length; i += CONCURRENCY) {
|
|
315
|
-
const batch = remainingFiles.slice(i, i + CONCURRENCY);
|
|
316
|
-
const results = await Promise.all(
|
|
317
|
-
batch.map(async ({ filePath, target }) => {
|
|
318
|
-
const result = await addFile(
|
|
319
|
-
conn,
|
|
320
|
-
filePath,
|
|
321
|
-
target,
|
|
322
|
-
config,
|
|
323
|
-
policy,
|
|
324
|
-
);
|
|
325
|
-
addCompleted++;
|
|
326
|
-
fileSpinner.update({
|
|
327
|
-
text: `Adding and describing ${addCompleted}/${remainingFiles.length} file(s)...`,
|
|
328
|
-
});
|
|
329
|
-
return result;
|
|
330
|
-
}),
|
|
331
|
-
);
|
|
332
|
-
for (const r of results) {
|
|
333
|
-
if (!r) continue;
|
|
334
|
-
if (r.kind === "added") {
|
|
335
|
-
itemIds.push({ id: r.id, target: r.target });
|
|
336
|
-
} else if (r.kind === "conflict") {
|
|
337
|
-
conflicts.push({ target: r.target, existingId: r.existingId });
|
|
338
|
-
} else if (r.kind === "skipped") {
|
|
339
|
-
skipped.push(formatDriveRef(r.target));
|
|
340
|
-
}
|
|
341
|
-
}
|
|
342
|
-
}
|
|
343
|
-
|
|
344
|
-
fileSpinner.success({
|
|
345
|
-
text: `Added and described ${addCompleted} file(s).`,
|
|
346
|
-
});
|
|
347
|
-
}
|
|
348
|
-
|
|
349
|
-
if (remainingUrls.length > 0) {
|
|
350
|
-
const mcpxClient = await createMcpxClient(dir);
|
|
351
|
-
if (!mcpxClient) {
|
|
352
|
-
logger.dim(
|
|
353
|
-
"No MCP servers configured — remote fetches will use basic HTTP.",
|
|
354
|
-
);
|
|
355
|
-
}
|
|
356
|
-
|
|
357
|
-
let urlIdx = 0;
|
|
358
|
-
let urlAdded = 0;
|
|
359
|
-
for (const { url } of remainingUrls) {
|
|
360
|
-
urlIdx++;
|
|
361
|
-
console.log(
|
|
362
|
-
`\n${ansis.bold(`[${urlIdx}/${remainingUrls.length}]`)} ${ansis.cyan(url)}`,
|
|
363
|
-
);
|
|
364
|
-
const result = await addUrl(
|
|
365
|
-
conn,
|
|
366
|
-
config,
|
|
367
|
-
url,
|
|
368
|
-
mcpxClient,
|
|
369
|
-
opts.promptAddition,
|
|
370
|
-
policy,
|
|
371
|
-
);
|
|
372
|
-
if (result.ok) {
|
|
373
|
-
urlAdded++;
|
|
374
|
-
itemIds.push({ id: result.id, target: result.target });
|
|
375
|
-
console.log(
|
|
376
|
-
` ${ansis.green("✔")} stored at ${formatDriveRef(result.target)}`,
|
|
377
|
-
);
|
|
378
|
-
} else if (result.kind === "conflict") {
|
|
379
|
-
conflicts.push({
|
|
380
|
-
target: result.target,
|
|
381
|
-
existingId: result.existingId,
|
|
382
|
-
});
|
|
383
|
-
console.log(
|
|
384
|
-
` ${ansis.red("✗")} path already exists: ${formatDriveRef(result.target)}`,
|
|
385
|
-
);
|
|
386
|
-
} else if (result.kind === "skipped") {
|
|
387
|
-
skipped.push(formatDriveRef(result.target));
|
|
388
|
-
console.log(
|
|
389
|
-
` ${ansis.yellow("⊘")} skipped (path exists): ${formatDriveRef(result.target)}`,
|
|
390
|
-
);
|
|
391
|
-
} else if (result.actionable) {
|
|
392
|
-
console.log(
|
|
393
|
-
` ${ansis.red("✗")} ${ansis.bold("action required:")}`,
|
|
394
|
-
);
|
|
395
|
-
for (const line of result.error.split("\n")) {
|
|
396
|
-
console.log(` ${ansis.yellow(line)}`);
|
|
397
|
-
}
|
|
398
|
-
} else {
|
|
399
|
-
console.log(
|
|
400
|
-
` ${ansis.red("✗")} failed to fetch: ${result.error}`,
|
|
401
|
-
);
|
|
402
|
-
}
|
|
403
|
-
}
|
|
404
|
-
|
|
405
|
-
const urlSummary = `Added ${urlAdded}/${remainingUrls.length} URL(s).`;
|
|
406
|
-
if (urlAdded === remainingUrls.length) {
|
|
407
|
-
console.log(`\n${ansis.green("✔")} ${urlSummary}`);
|
|
408
|
-
} else if (urlAdded === 0) {
|
|
409
|
-
console.log(`\n${ansis.red("✗")} ${urlSummary}`);
|
|
410
|
-
} else {
|
|
411
|
-
console.log(`\n${ansis.yellow("⚠")} ${urlSummary}`);
|
|
412
|
-
}
|
|
413
|
-
}
|
|
414
|
-
|
|
415
|
-
if (conflicts.length > 0) {
|
|
416
|
-
logger.error(
|
|
417
|
-
`${conflicts.length} (drive, path) collision(s) — nothing written for these items:`,
|
|
418
|
-
);
|
|
419
|
-
for (const c of conflicts) {
|
|
420
|
-
console.log(
|
|
421
|
-
` ${ansis.red("✗")} ${formatDriveRef(c.target)} (existing id: ${c.existingId})`,
|
|
422
|
-
);
|
|
423
|
-
}
|
|
424
|
-
}
|
|
425
|
-
|
|
426
|
-
skipped.push(...dedupSkipped);
|
|
427
|
-
|
|
428
|
-
if (itemIds.length === 0) {
|
|
429
|
-
const msg = buildSummary({
|
|
430
|
-
added: itemIds.length,
|
|
431
|
-
refreshed: refreshedCount,
|
|
432
|
-
skipped: skipped.length,
|
|
433
|
-
chunks: refreshedChunks,
|
|
434
|
-
totalCount,
|
|
435
|
-
handled: itemIds.length + refreshedCount + skipped.length,
|
|
436
|
-
});
|
|
437
|
-
if (conflicts.length > 0) {
|
|
438
|
-
logger.error(msg);
|
|
439
|
-
process.exit(1);
|
|
440
|
-
}
|
|
441
|
-
if (itemIds.length + skipped.length + refreshedCount >= totalCount) {
|
|
442
|
-
logger.success(msg);
|
|
443
|
-
process.exit(0);
|
|
444
|
-
} else if (itemIds.length === 0 && refreshedCount === 0) {
|
|
445
|
-
logger.error(msg);
|
|
446
|
-
process.exit(1);
|
|
447
|
-
} else {
|
|
448
|
-
logger.warn(msg);
|
|
449
|
-
process.exit(1);
|
|
450
|
-
}
|
|
451
|
-
}
|
|
452
|
-
|
|
453
|
-
let completed = 0;
|
|
454
|
-
const embedSpinner = createSpinner(
|
|
455
|
-
`Embedding 0/${itemIds.length} items...`,
|
|
456
|
-
).start();
|
|
457
|
-
|
|
458
|
-
const prepared: PreparedIngestion[] = [];
|
|
459
|
-
for (let i = 0; i < itemIds.length; i += CONCURRENCY) {
|
|
460
|
-
const batch = itemIds.slice(i, i + CONCURRENCY);
|
|
461
|
-
const results = await Promise.all(
|
|
462
|
-
batch.map(async ({ id }) => {
|
|
463
|
-
const result = await prepareIngestion(conn, id, config);
|
|
464
|
-
completed++;
|
|
465
|
-
embedSpinner.update({
|
|
466
|
-
text: `Embedding ${completed}/${itemIds.length} items...`,
|
|
467
|
-
});
|
|
468
|
-
return result;
|
|
469
|
-
}),
|
|
470
|
-
);
|
|
471
|
-
for (const r of results) {
|
|
472
|
-
if (r) prepared.push(r);
|
|
473
|
-
}
|
|
474
|
-
}
|
|
475
|
-
embedSpinner.success({
|
|
476
|
-
text: `Embedded ${prepared.length} item(s).`,
|
|
57
|
+
spinner.success({
|
|
58
|
+
text: `imported ${fetched.content.length} bytes → ${ansis.bold(`context/${dest}`)} (source: ${fetched.source ?? "http"})`,
|
|
477
59
|
});
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
let filesUpdated = 0;
|
|
482
|
-
for (const p of prepared) {
|
|
483
|
-
const result = await storeIngestion(conn, p);
|
|
484
|
-
chunks += result.chunks;
|
|
485
|
-
if (result.isUpdate) filesUpdated++;
|
|
486
|
-
else filesAdded++;
|
|
487
|
-
}
|
|
488
|
-
|
|
489
|
-
const summary = buildSummary({
|
|
490
|
-
added: filesAdded,
|
|
491
|
-
updated: filesUpdated,
|
|
492
|
-
refreshed: refreshedCount,
|
|
493
|
-
skipped: skipped.length,
|
|
494
|
-
chunks: chunks + refreshedChunks,
|
|
495
|
-
totalCount,
|
|
496
|
-
handled: itemIds.length + refreshedCount + skipped.length,
|
|
60
|
+
} catch (err) {
|
|
61
|
+
spinner.error({
|
|
62
|
+
text: `import failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
497
63
|
});
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
logger.success(summary);
|
|
504
|
-
process.exit(0);
|
|
505
|
-
} else {
|
|
506
|
-
logger.warn(summary);
|
|
507
|
-
process.exit(1);
|
|
508
|
-
}
|
|
509
|
-
}),
|
|
510
|
-
);
|
|
64
|
+
process.exit(1);
|
|
65
|
+
} finally {
|
|
66
|
+
await mcpxClient?.close();
|
|
67
|
+
}
|
|
68
|
+
});
|
|
511
69
|
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
.
|
|
515
|
-
.
|
|
516
|
-
"
|
|
517
|
-
"natural-language query (semantic + BM25). Combine with --pattern for fused regexp + semantic ranking.",
|
|
518
|
-
)
|
|
519
|
-
.option("-k, --top-k <n>", "max results", Number.parseInt, 20)
|
|
520
|
-
.option(
|
|
521
|
-
"--pattern <regex>",
|
|
522
|
-
"regex pattern (regexp side). May be combined with [query] to fuse signals.",
|
|
70
|
+
// ---- reindex -------------------------------------------------------------
|
|
71
|
+
context
|
|
72
|
+
.command("reindex")
|
|
73
|
+
.description(
|
|
74
|
+
"Walk context/ and reconcile the search index: embed new files, re-embed changed ones, drop rows for removed ones.",
|
|
523
75
|
)
|
|
524
|
-
.
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
76
|
+
.action(async () => {
|
|
77
|
+
const dir = program.opts().dir;
|
|
78
|
+
const config = await loadConfig(dir);
|
|
79
|
+
const dbPath = getDbPath(dir);
|
|
80
|
+
// The migrate() call ensures the index DB is initialized, including
|
|
81
|
+
// the context_index table from migration 19, before we try to write.
|
|
82
|
+
await withDb(dbPath, migrate);
|
|
83
|
+
const spinner = createSpinner("reindexing").start();
|
|
84
|
+
const summary = await reindexContext(dir, config, dbPath, {
|
|
85
|
+
onProgress: (msg) => spinner.update({ text: msg }),
|
|
86
|
+
});
|
|
87
|
+
const parts = [
|
|
88
|
+
`${summary.added} added`,
|
|
89
|
+
`${summary.updated} updated`,
|
|
90
|
+
`${summary.unchanged} unchanged`,
|
|
91
|
+
`${summary.removed} removed`,
|
|
92
|
+
`${summary.chunksWritten} chunks written`,
|
|
93
|
+
];
|
|
94
|
+
spinner.success({ text: parts.join(", ") });
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
// ---- tree ---------------------------------------------------------------
|
|
98
|
+
context
|
|
99
|
+
.command("tree [path]")
|
|
100
|
+
.description("Render the context/ tree (or a subdirectory).")
|
|
528
101
|
.option(
|
|
529
|
-
"--
|
|
530
|
-
"
|
|
102
|
+
"-d, --max-depth <n>",
|
|
103
|
+
"max directory depth to render",
|
|
531
104
|
Number.parseInt,
|
|
105
|
+
10,
|
|
532
106
|
)
|
|
533
|
-
.action((
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
}
|
|
539
|
-
const config = await loadConfig(dir);
|
|
540
|
-
const toolCtx: ToolContext = {
|
|
541
|
-
conn,
|
|
542
|
-
dbPath: getDbPath(dir),
|
|
543
|
-
projectDir: dir,
|
|
544
|
-
config,
|
|
545
|
-
mcpxClient: null,
|
|
546
|
-
};
|
|
547
|
-
const result = await searchTool.execute(
|
|
548
|
-
{
|
|
549
|
-
query,
|
|
550
|
-
pattern: opts.pattern,
|
|
551
|
-
drive: opts.drive,
|
|
552
|
-
path: opts.path,
|
|
553
|
-
glob: opts.glob,
|
|
554
|
-
ignore_case: opts.ignoreCase,
|
|
555
|
-
context: opts.context,
|
|
556
|
-
max_results: opts.topK,
|
|
557
|
-
},
|
|
558
|
-
toolCtx,
|
|
559
|
-
);
|
|
560
|
-
|
|
561
|
-
if (result.is_error) {
|
|
562
|
-
logger.error(result.message ?? "Search failed");
|
|
563
|
-
process.exit(1);
|
|
564
|
-
}
|
|
565
|
-
|
|
566
|
-
if (result.matches.length === 0) {
|
|
567
|
-
logger.dim("No results found.");
|
|
568
|
-
return;
|
|
569
|
-
}
|
|
570
|
-
|
|
571
|
-
for (const [i, m] of result.matches.entries()) {
|
|
572
|
-
const tagColor =
|
|
573
|
-
m.match_type === "both"
|
|
574
|
-
? ansis.green
|
|
575
|
-
: m.match_type === "regexp"
|
|
576
|
-
? ansis.yellow
|
|
577
|
-
: ansis.cyan;
|
|
578
|
-
const tag = tagColor(`[${m.match_type}]`);
|
|
579
|
-
const location = m.line != null ? `${m.ref}:${m.line}` : m.ref;
|
|
580
|
-
console.log(
|
|
581
|
-
`${ansis.bold(`${i + 1}.`)} ${tag} ${ansis.cyan(location)} ${ansis.dim(`score=${m.score.toFixed(4)}`)}`,
|
|
582
|
-
);
|
|
583
|
-
const snippet = m.content.slice(0, 200).replace(/\n/g, " ");
|
|
584
|
-
if (snippet) console.log(` ${snippet}`);
|
|
585
|
-
console.log("");
|
|
586
|
-
}
|
|
587
|
-
}),
|
|
588
|
-
);
|
|
589
|
-
|
|
590
|
-
ctx
|
|
591
|
-
.command("delete <ref>")
|
|
592
|
-
.description("Delete a context entry (UUID or drive:/path)")
|
|
593
|
-
.action((ref: string) =>
|
|
594
|
-
withDb(program, async (conn) => {
|
|
595
|
-
const item = await resolveContextItem(conn, ref);
|
|
596
|
-
if (!item) {
|
|
597
|
-
logger.error(`Context entry not found: ${ref}`);
|
|
598
|
-
process.exit(1);
|
|
599
|
-
}
|
|
600
|
-
await deleteContextItemByPath(conn, {
|
|
601
|
-
drive: item.drive,
|
|
602
|
-
path: item.path,
|
|
603
|
-
});
|
|
604
|
-
logger.success(`Deleted context entry: ${formatDriveRef(item)}`);
|
|
605
|
-
}),
|
|
606
|
-
);
|
|
607
|
-
ctx
|
|
608
|
-
.command("chunks <ref>")
|
|
609
|
-
.description("Show chunks and embeddings for a context entry")
|
|
610
|
-
.action((ref: string) =>
|
|
611
|
-
withDb(program, async (conn) => {
|
|
612
|
-
const item = await resolveContextItem(conn, ref);
|
|
613
|
-
if (!item) {
|
|
614
|
-
logger.error(`Context entry not found: ${ref}`);
|
|
615
|
-
process.exit(1);
|
|
616
|
-
}
|
|
617
|
-
|
|
618
|
-
if (!item.indexed_at) {
|
|
619
|
-
logger.dim("Item has not been indexed yet.");
|
|
620
|
-
return;
|
|
621
|
-
}
|
|
622
|
-
|
|
623
|
-
const embeddings = await getEmbeddingsForItem(conn, item.id);
|
|
624
|
-
|
|
625
|
-
console.log(ansis.bold(item.title));
|
|
626
|
-
console.log(` Ref: ${formatDriveRef(item)}`);
|
|
627
|
-
console.log(` Indexed: ${fmtDate(item.indexed_at)}`);
|
|
628
|
-
console.log(` Chunks: ${embeddings.length}`);
|
|
629
|
-
console.log("");
|
|
630
|
-
|
|
631
|
-
for (const emb of embeddings) {
|
|
632
|
-
const preview = emb.chunk_content
|
|
633
|
-
? emb.chunk_content.slice(0, 200).replace(/\n/g, " ") +
|
|
634
|
-
(emb.chunk_content.length > 200 ? "..." : "")
|
|
635
|
-
: ansis.dim("(no content)");
|
|
636
|
-
const chars = emb.chunk_content?.length ?? 0;
|
|
637
|
-
|
|
638
|
-
console.log(
|
|
639
|
-
`${ansis.bold(`Chunk ${emb.chunk_index}`)} ${ansis.dim(`${chars} chars, ${emb.embedding.length} dims`)}`,
|
|
640
|
-
);
|
|
641
|
-
console.log(` ${preview}`);
|
|
642
|
-
console.log("");
|
|
643
|
-
}
|
|
644
|
-
|
|
645
|
-
const totalChars = embeddings.reduce(
|
|
646
|
-
(sum, e) => sum + (e.chunk_content?.length ?? 0),
|
|
647
|
-
0,
|
|
648
|
-
);
|
|
649
|
-
console.log(
|
|
650
|
-
ansis.dim(`${embeddings.length} chunk(s), ${totalChars} total chars`),
|
|
651
|
-
);
|
|
652
|
-
}),
|
|
653
|
-
);
|
|
107
|
+
.action(async (path: string | undefined, opts) => {
|
|
108
|
+
const dir = program.opts().dir;
|
|
109
|
+
const node = await buildTree(dir, path ?? "", opts.maxDepth);
|
|
110
|
+
console.log(renderTreeAnsi(node));
|
|
111
|
+
});
|
|
654
112
|
|
|
655
|
-
|
|
656
|
-
|
|
113
|
+
// ---- stats --------------------------------------------------------------
|
|
114
|
+
context
|
|
115
|
+
.command("stats")
|
|
657
116
|
.description(
|
|
658
|
-
"
|
|
117
|
+
"Counts and sizes for files under context/ and rows in the search index.",
|
|
659
118
|
)
|
|
660
|
-
.
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
);
|
|
119
|
+
.action(async () => {
|
|
120
|
+
const dir = program.opts().dir;
|
|
121
|
+
const dbPath = getDbPath(dir);
|
|
122
|
+
const exists = await fileExists(dir, "");
|
|
123
|
+
if (!exists) {
|
|
124
|
+
logger.dim(`context/ does not exist under ${dir}`);
|
|
125
|
+
return;
|
|
126
|
+
}
|
|
127
|
+
const entries = await listContextDir(dir, "", { recursive: true });
|
|
128
|
+
let files = 0;
|
|
129
|
+
let textual = 0;
|
|
130
|
+
let bytes = 0;
|
|
131
|
+
for (const e of entries) {
|
|
132
|
+
if (e.is_directory) continue;
|
|
133
|
+
files++;
|
|
134
|
+
if (e.is_textual) textual++;
|
|
135
|
+
try {
|
|
136
|
+
const st = await stat(join(dir, CONTEXT_DIR, e.path));
|
|
137
|
+
bytes += st.size;
|
|
138
|
+
} catch {
|
|
139
|
+
// file vanished mid-walk — skip
|
|
678
140
|
}
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
refreshSpinner.update({
|
|
698
|
-
text: `Refreshing ${done}/${total} items...`,
|
|
699
|
-
});
|
|
700
|
-
},
|
|
701
|
-
onEmbedProgress: (done, total) => {
|
|
702
|
-
if (done === 1) embedSpinner.start();
|
|
703
|
-
embedSpinner.update({
|
|
704
|
-
text: `Embedding ${done}/${total} item(s)...`,
|
|
705
|
-
});
|
|
706
|
-
},
|
|
707
|
-
},
|
|
141
|
+
}
|
|
142
|
+
const idx = await withDb(dbPath, async (conn) => {
|
|
143
|
+
await migrate(conn);
|
|
144
|
+
return indexStats(conn);
|
|
145
|
+
});
|
|
146
|
+
const rows = [
|
|
147
|
+
["files", String(files)],
|
|
148
|
+
["textual", String(textual)],
|
|
149
|
+
["binary", String(files - textual)],
|
|
150
|
+
["bytes on disk", formatBytes(bytes)],
|
|
151
|
+
["indexed paths", String(idx.paths)],
|
|
152
|
+
["index chunks", String(idx.chunks)],
|
|
153
|
+
["embedded chunks", String(idx.embedded)],
|
|
154
|
+
];
|
|
155
|
+
const labelWidth = Math.max(...rows.map((r) => r[0]?.length ?? 0));
|
|
156
|
+
for (const [label, value] of rows) {
|
|
157
|
+
console.log(
|
|
158
|
+
` ${ansis.dim((label ?? "").padEnd(labelWidth))} ${value}`,
|
|
708
159
|
);
|
|
709
|
-
|
|
710
|
-
refreshSpinner.success({
|
|
711
|
-
text: `Checked ${result.checked} item(s): ${result.updated} updated, ${result.unchanged} unchanged, ${result.missing} missing.`,
|
|
712
|
-
});
|
|
713
|
-
|
|
714
|
-
for (const item of result.items) {
|
|
715
|
-
if (item.status === "missing") {
|
|
716
|
-
logger.warn(` Missing: ${item.ref}`);
|
|
717
|
-
} else if (item.status === "error") {
|
|
718
|
-
logger.warn(` Error refreshing ${item.ref}: ${item.error}`);
|
|
719
|
-
}
|
|
720
|
-
}
|
|
721
|
-
|
|
722
|
-
if (result.reembedded > 0) {
|
|
723
|
-
embedSpinner.success({
|
|
724
|
-
text: `Embedded ${result.reembedded} item(s).`,
|
|
725
|
-
});
|
|
726
|
-
logger.success(
|
|
727
|
-
`Refreshed ${result.updated} item(s), ${result.chunks} chunk(s) re-indexed.`,
|
|
728
|
-
);
|
|
729
|
-
}
|
|
730
|
-
}),
|
|
731
|
-
);
|
|
732
|
-
|
|
733
|
-
ctx
|
|
734
|
-
.command("reembed")
|
|
735
|
-
.description(
|
|
736
|
-
"Recompute every embedding using the configured local model. Run this after upgrading or after changing embedding_model.",
|
|
737
|
-
)
|
|
738
|
-
.action(() =>
|
|
739
|
-
withDb(program, async (_conn, dir) => {
|
|
740
|
-
const config = await loadConfig(dir);
|
|
741
|
-
const dbPath = getDbPath(dir);
|
|
742
|
-
await reembedMissingVectors(dbPath, config, { mode: "all" });
|
|
743
|
-
}),
|
|
744
|
-
);
|
|
745
|
-
|
|
746
|
-
registerContextToolSubcommands(ctx);
|
|
747
|
-
}
|
|
748
|
-
|
|
749
|
-
async function resolveItems(
|
|
750
|
-
conn: DbConnection,
|
|
751
|
-
refs: string[],
|
|
752
|
-
all: boolean,
|
|
753
|
-
): Promise<ContextItem[]> {
|
|
754
|
-
if (!all && refs.length === 0) {
|
|
755
|
-
logger.error("Provide at least one ref or use --all.");
|
|
756
|
-
process.exit(1);
|
|
757
|
-
}
|
|
758
|
-
if (all) return listContextItems(conn);
|
|
759
|
-
|
|
760
|
-
const byId = new Map<string, ContextItem>();
|
|
761
|
-
const unresolved: string[] = [];
|
|
762
|
-
for (const r of refs) {
|
|
763
|
-
const matched = await resolveOne(conn, r);
|
|
764
|
-
if (matched.length === 0) {
|
|
765
|
-
unresolved.push(r);
|
|
766
|
-
continue;
|
|
767
|
-
}
|
|
768
|
-
for (const item of matched) byId.set(item.id, item);
|
|
769
|
-
}
|
|
770
|
-
for (const r of unresolved) logger.warn(` Not found: ${r}`);
|
|
771
|
-
return [...byId.values()];
|
|
772
|
-
}
|
|
773
|
-
|
|
774
|
-
async function resolveOne(
|
|
775
|
-
conn: DbConnection,
|
|
776
|
-
ref: string,
|
|
777
|
-
): Promise<ContextItem[]> {
|
|
778
|
-
const exact = await resolveContextItem(conn, ref);
|
|
779
|
-
if (exact) return [exact];
|
|
780
|
-
// Prefix expansion: only valid for `drive:/path` form.
|
|
781
|
-
const parsed = parseDriveRef(ref);
|
|
782
|
-
if (parsed) {
|
|
783
|
-
return listContextItemsByPrefix(conn, parsed.drive, parsed.path, {
|
|
784
|
-
recursive: true,
|
|
160
|
+
}
|
|
785
161
|
});
|
|
786
|
-
}
|
|
787
|
-
return [];
|
|
788
162
|
}
|
|
789
163
|
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
}): string {
|
|
801
|
-
const parts: string[] = [];
|
|
802
|
-
if (args.added > 0) parts.push(`${args.added} added`);
|
|
803
|
-
if (args.updated && args.updated > 0) parts.push(`${args.updated} updated`);
|
|
804
|
-
if (args.refreshed > 0) parts.push(`${args.refreshed} refreshed`);
|
|
805
|
-
if (args.skipped > 0) parts.push(`${args.skipped} skipped`);
|
|
806
|
-
const body = parts.length > 0 ? parts.join(", ") : "0 added";
|
|
807
|
-
const handled = args.handled ?? args.added + args.refreshed + args.skipped;
|
|
808
|
-
return `${body} — ${args.chunks} chunk(s) indexed (${handled}/${args.totalCount} item(s)).`;
|
|
164
|
+
/**
|
|
165
|
+
* Pick a sensible default destination under context/ when the user didn't
|
|
166
|
+
* supply --path. Strategy:
|
|
167
|
+
* - "<source>/<slugified-url>.md" for MCP-served fetches (e.g. google-docs/...)
|
|
168
|
+
* - "url/<slugified-url>.md" for raw HTTP fallbacks
|
|
169
|
+
*/
|
|
170
|
+
function deriveContextPath(url: string, source: string | null): string {
|
|
171
|
+
const slug = slugifyUrl(url);
|
|
172
|
+
const root = source ?? "url";
|
|
173
|
+
return `${root}/${slug}.md`;
|
|
809
174
|
}
|
|
810
175
|
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
| { kind: "skipped"; target: DriveTarget }
|
|
814
|
-
| { kind: "conflict"; target: DriveTarget; existingId: string }
|
|
815
|
-
| { kind: "failed"; target: DriveTarget; error: string };
|
|
816
|
-
|
|
817
|
-
async function addFile(
|
|
818
|
-
conn: DbConnection,
|
|
819
|
-
filePath: string,
|
|
820
|
-
target: DriveTarget,
|
|
821
|
-
config: Required<BotholomewConfig>,
|
|
822
|
-
policy: ConflictPolicy,
|
|
823
|
-
): Promise<AddFileResult | null> {
|
|
176
|
+
function slugifyUrl(url: string): string {
|
|
177
|
+
let parsed: URL;
|
|
824
178
|
try {
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
if (policy === "skip") {
|
|
829
|
-
logger.dim(` ⊘ skipped (exists): ${formatDriveRef(target)}`);
|
|
830
|
-
return { kind: "skipped", target };
|
|
831
|
-
}
|
|
832
|
-
return {
|
|
833
|
-
kind: "conflict",
|
|
834
|
-
target,
|
|
835
|
-
existingId: existing.id,
|
|
836
|
-
};
|
|
837
|
-
}
|
|
838
|
-
}
|
|
839
|
-
|
|
840
|
-
const bunFile = Bun.file(filePath);
|
|
841
|
-
const mimeType = bunFile.type.split(";")[0] || "application/octet-stream";
|
|
842
|
-
const filename = basename(filePath);
|
|
843
|
-
const textual = isText(filename) !== false;
|
|
844
|
-
const content = textual ? await bunFile.text() : null;
|
|
845
|
-
|
|
846
|
-
const description = await generateDescription(config, {
|
|
847
|
-
filename,
|
|
848
|
-
mimeType,
|
|
849
|
-
content,
|
|
850
|
-
filePath,
|
|
851
|
-
});
|
|
852
|
-
|
|
853
|
-
const itemParams = {
|
|
854
|
-
title: filename,
|
|
855
|
-
description,
|
|
856
|
-
content: content ?? undefined,
|
|
857
|
-
mimeType,
|
|
858
|
-
drive: target.drive,
|
|
859
|
-
path: target.path,
|
|
860
|
-
isTextual: textual,
|
|
861
|
-
} as const;
|
|
862
|
-
|
|
863
|
-
const item =
|
|
864
|
-
policy === "overwrite"
|
|
865
|
-
? await upsertContextItem(conn, itemParams)
|
|
866
|
-
: await createContextItemStrict(conn, itemParams);
|
|
867
|
-
|
|
868
|
-
return textual && content ? { kind: "added", id: item.id, target } : null;
|
|
869
|
-
} catch (err) {
|
|
870
|
-
if (err instanceof PathConflictError) {
|
|
871
|
-
return { kind: "conflict", target, existingId: err.existingId };
|
|
872
|
-
}
|
|
873
|
-
logger.warn(` ! ${formatDriveRef(target)}: ${err}`);
|
|
874
|
-
return { kind: "failed", target, error: String(err) };
|
|
179
|
+
parsed = new URL(url);
|
|
180
|
+
} catch {
|
|
181
|
+
return url.replace(/[^a-z0-9]+/gi, "-").slice(0, 80);
|
|
875
182
|
}
|
|
183
|
+
const path = parsed.pathname.replace(/^\/+|\/+$/g, "").replace(/\//g, "_");
|
|
184
|
+
const base = path || parsed.hostname;
|
|
185
|
+
return `${parsed.hostname}_${base}`
|
|
186
|
+
.replace(/[^a-z0-9._-]+/gi, "-")
|
|
187
|
+
.replace(/-+/g, "-")
|
|
188
|
+
.slice(0, 80);
|
|
876
189
|
}
|
|
877
190
|
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
if (policy !== "overwrite") {
|
|
897
|
-
const existing = await getContextItem(conn, target);
|
|
898
|
-
if (existing) {
|
|
899
|
-
if (policy === "skip") return { ok: false, kind: "skipped", target };
|
|
900
|
-
return { ok: false, kind: "conflict", target, existingId: existing.id };
|
|
901
|
-
}
|
|
902
|
-
}
|
|
903
|
-
|
|
904
|
-
const description = await generateDescription(config, {
|
|
905
|
-
filename: new URL(url).hostname,
|
|
906
|
-
mimeType: fetched.mimeType,
|
|
907
|
-
content: fetched.content,
|
|
191
|
+
function renderTreeAnsi(
|
|
192
|
+
node: TreeNode,
|
|
193
|
+
prefix = "",
|
|
194
|
+
isLast = true,
|
|
195
|
+
isRoot = true,
|
|
196
|
+
): string {
|
|
197
|
+
const lines: string[] = [];
|
|
198
|
+
const connector = isRoot ? "" : isLast ? "└── " : "├── ";
|
|
199
|
+
const label = node.is_directory
|
|
200
|
+
? ansis.blue(node.name === "." ? "context/" : `${node.name}/`)
|
|
201
|
+
: node.name;
|
|
202
|
+
lines.push(`${prefix}${connector}${label}`);
|
|
203
|
+
if (node.is_directory && node.children) {
|
|
204
|
+
const childPrefix = isRoot ? "" : prefix + (isLast ? " " : "│ ");
|
|
205
|
+
const children = node.children;
|
|
206
|
+
children.forEach((c, i) => {
|
|
207
|
+
const last = i === children.length - 1;
|
|
208
|
+
lines.push(renderTreeAnsi(c, childPrefix, last, false));
|
|
908
209
|
});
|
|
909
|
-
|
|
910
|
-
const itemParams = {
|
|
911
|
-
title: fetched.title,
|
|
912
|
-
description,
|
|
913
|
-
content: fetched.content,
|
|
914
|
-
mimeType: fetched.mimeType,
|
|
915
|
-
drive: target.drive,
|
|
916
|
-
path: target.path,
|
|
917
|
-
isTextual: true,
|
|
918
|
-
sourceUrl: fetched.sourceUrl,
|
|
919
|
-
};
|
|
920
|
-
|
|
921
|
-
const item =
|
|
922
|
-
policy === "overwrite"
|
|
923
|
-
? await upsertContextItem(conn, itemParams)
|
|
924
|
-
: await createContextItemStrict(conn, itemParams);
|
|
925
|
-
|
|
926
|
-
return { ok: true, id: item.id, target };
|
|
927
|
-
} catch (err) {
|
|
928
|
-
if (err instanceof PathConflictError) {
|
|
929
|
-
return {
|
|
930
|
-
ok: false,
|
|
931
|
-
kind: "conflict",
|
|
932
|
-
target: { drive: err.drive, path: err.path },
|
|
933
|
-
existingId: err.existingId,
|
|
934
|
-
};
|
|
935
|
-
}
|
|
936
|
-
if (err instanceof FetchFailureError) {
|
|
937
|
-
return {
|
|
938
|
-
ok: false,
|
|
939
|
-
kind: "fetch-failed",
|
|
940
|
-
error: err.userMessage,
|
|
941
|
-
actionable: true,
|
|
942
|
-
};
|
|
943
|
-
}
|
|
944
|
-
return {
|
|
945
|
-
ok: false,
|
|
946
|
-
kind: "fetch-failed",
|
|
947
|
-
error: String(err),
|
|
948
|
-
actionable: false,
|
|
949
|
-
};
|
|
950
210
|
}
|
|
211
|
+
return lines.join("\n");
|
|
951
212
|
}
|
|
952
213
|
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
const
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
const fullPath = join(dirPath, entry.name);
|
|
959
|
-
if (entry.isDirectory()) {
|
|
960
|
-
if (entry.name.startsWith(".")) continue; // skip hidden dirs
|
|
961
|
-
files.push(...(await walkDirectory(fullPath)));
|
|
962
|
-
} else if (entry.isFile()) {
|
|
963
|
-
files.push(fullPath);
|
|
964
|
-
}
|
|
965
|
-
}
|
|
966
|
-
|
|
967
|
-
return files;
|
|
214
|
+
function formatBytes(n: number): string {
|
|
215
|
+
if (n === 0) return "0 B";
|
|
216
|
+
const units = ["B", "KB", "MB", "GB"];
|
|
217
|
+
const i = Math.floor(Math.log(n) / Math.log(1024));
|
|
218
|
+
return `${(n / 1024 ** i).toFixed(i > 0 ? 1 : 0)} ${units[i]}`;
|
|
968
219
|
}
|