botholomew 0.8.9 → 0.9.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/chat/agent.ts +34 -31
- package/src/commands/context.ts +223 -373
- package/src/commands/tools.ts +100 -11
- package/src/context/describer.ts +3 -118
- package/src/context/drives.ts +110 -0
- package/src/context/fetcher.ts +11 -1
- package/src/context/ingest.ts +13 -10
- package/src/context/refresh.ts +39 -24
- package/src/context/url-utils.ts +0 -23
- package/src/db/context.ts +195 -119
- package/src/db/embeddings.ts +35 -16
- package/src/db/sql/13-drive-paths.sql +49 -0
- package/src/tools/context/list-drives.ts +36 -0
- package/src/tools/context/refresh.ts +41 -23
- package/src/tools/context/search.ts +8 -3
- package/src/tools/dir/create.ts +14 -11
- package/src/tools/dir/size.ts +3 -2
- package/src/tools/dir/tree.ts +57 -17
- package/src/tools/file/copy.ts +14 -8
- package/src/tools/file/count-lines.ts +6 -3
- package/src/tools/file/delete.ts +12 -5
- package/src/tools/file/edit.ts +5 -3
- package/src/tools/file/exists.ts +25 -3
- package/src/tools/file/info.ts +90 -18
- package/src/tools/file/move.ts +15 -16
- package/src/tools/file/read.ts +79 -5
- package/src/tools/file/write.ts +29 -12
- package/src/tools/registry.ts +2 -2
- package/src/tools/search/grep.ts +44 -11
- package/src/tools/search/semantic.ts +7 -3
- package/src/tui/components/ContextPanel.tsx +73 -35
- package/src/tui/markdown.ts +2 -3
- package/src/worker/prompt.ts +42 -46
- package/src/tools/dir/list.ts +0 -89
package/src/commands/context.ts
CHANGED
|
@@ -6,10 +6,13 @@ import { isText } from "istextorbinary";
|
|
|
6
6
|
import { createSpinner } from "nanospinner";
|
|
7
7
|
import { loadConfig } from "../config/loader.ts";
|
|
8
8
|
import type { BotholomewConfig } from "../config/schemas.ts";
|
|
9
|
+
import { generateDescription } from "../context/describer.ts";
|
|
9
10
|
import {
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
11
|
+
type DriveTarget,
|
|
12
|
+
detectDriveFromUrl,
|
|
13
|
+
formatDriveRef,
|
|
14
|
+
parseDriveRef,
|
|
15
|
+
} from "../context/drives.ts";
|
|
13
16
|
import { embedSingle } from "../context/embedder.ts";
|
|
14
17
|
import { FetchFailureError, fetchUrl } from "../context/fetcher.ts";
|
|
15
18
|
import {
|
|
@@ -18,14 +21,14 @@ import {
|
|
|
18
21
|
storeIngestion,
|
|
19
22
|
} from "../context/ingest.ts";
|
|
20
23
|
import { refreshContextItems } from "../context/refresh.ts";
|
|
21
|
-
import { isUrl
|
|
24
|
+
import { isUrl } from "../context/url-utils.ts";
|
|
22
25
|
import type { DbConnection } from "../db/connection.ts";
|
|
23
26
|
import {
|
|
24
27
|
type ContextItem,
|
|
25
28
|
createContextItemStrict,
|
|
26
29
|
deleteContextItemByPath,
|
|
27
|
-
|
|
28
|
-
|
|
30
|
+
getContextItem,
|
|
31
|
+
getDistinctDirectories,
|
|
29
32
|
listContextItems,
|
|
30
33
|
listContextItemsByPrefix,
|
|
31
34
|
PathConflictError,
|
|
@@ -52,31 +55,61 @@ export function registerContextCommand(program: Command) {
|
|
|
52
55
|
ctx
|
|
53
56
|
.command("list")
|
|
54
57
|
.description("List context entries")
|
|
55
|
-
.option("--
|
|
58
|
+
.option("--drive <drive>", "filter by drive (e.g. disk, url, agent)")
|
|
59
|
+
.option("--path <prefix>", "filter by path prefix (requires --drive)")
|
|
60
|
+
.option(
|
|
61
|
+
"--non-recursive",
|
|
62
|
+
"list only immediate children; include directories",
|
|
63
|
+
)
|
|
56
64
|
.option("-l, --limit <n>", "max number of items", Number.parseInt)
|
|
57
65
|
.option("-o, --offset <n>", "skip first N items", Number.parseInt)
|
|
58
66
|
.action((opts) =>
|
|
59
67
|
withDb(program, async (conn) => {
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
68
|
+
if (opts.path && !opts.drive) {
|
|
69
|
+
logger.error("--path requires --drive to scope the prefix.");
|
|
70
|
+
process.exit(1);
|
|
71
|
+
}
|
|
72
|
+
if (opts.nonRecursive && !opts.drive) {
|
|
73
|
+
logger.error(
|
|
74
|
+
"--non-recursive requires --drive to scope the listing.",
|
|
75
|
+
);
|
|
76
|
+
process.exit(1);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
const prefix = opts.path ?? (opts.nonRecursive ? "/" : null);
|
|
80
|
+
const items = prefix
|
|
81
|
+
? await listContextItemsByPrefix(conn, opts.drive, prefix, {
|
|
82
|
+
recursive: !opts.nonRecursive,
|
|
63
83
|
limit: opts.limit,
|
|
64
84
|
offset: opts.offset,
|
|
65
85
|
})
|
|
66
86
|
: await listContextItems(conn, {
|
|
87
|
+
drive: opts.drive,
|
|
67
88
|
limit: opts.limit,
|
|
68
89
|
offset: opts.offset,
|
|
69
90
|
});
|
|
70
91
|
|
|
71
|
-
|
|
92
|
+
const dirs = opts.nonRecursive
|
|
93
|
+
? await getDistinctDirectories(conn, opts.drive, opts.path ?? "/")
|
|
94
|
+
: [];
|
|
95
|
+
|
|
96
|
+
if (items.length === 0 && dirs.length === 0) {
|
|
72
97
|
logger.dim("No context entries found.");
|
|
73
98
|
return;
|
|
74
99
|
}
|
|
75
100
|
|
|
76
|
-
const header = `${ansis.bold("ID".padEnd(36))} ${ansis.bold("
|
|
101
|
+
const header = `${ansis.bold("ID".padEnd(36))} ${ansis.bold("Ref".padEnd(50))} ${"Title".padEnd(20)} ${"Description".padEnd(30)} ${"Type".padEnd(15)} ${"Updated".padEnd(18)} Indexed`;
|
|
77
102
|
console.log(header);
|
|
78
103
|
console.log("-".repeat(header.length));
|
|
79
104
|
|
|
105
|
+
const dash = ansis.dim("—");
|
|
106
|
+
for (const dir of dirs) {
|
|
107
|
+
const ref = formatDriveRef({ drive: opts.drive, path: `${dir}/` });
|
|
108
|
+
console.log(
|
|
109
|
+
`${dash.padEnd(36)} ${ansis.cyan(ref.slice(0, 49).padEnd(50))} ${dash.padEnd(20)} ${dash.padEnd(30)} ${ansis.dim("directory".padEnd(15))} ${dash.padEnd(18)} ${dash}`,
|
|
110
|
+
);
|
|
111
|
+
}
|
|
112
|
+
|
|
80
113
|
for (const item of items) {
|
|
81
114
|
const indexed = item.indexed_at
|
|
82
115
|
? ansis.green("yes")
|
|
@@ -85,36 +118,29 @@ export function registerContextCommand(program: Command) {
|
|
|
85
118
|
const desc = item.description
|
|
86
119
|
? ansis.dim(item.description.slice(0, 29).padEnd(30))
|
|
87
120
|
: ansis.dim("".padEnd(30));
|
|
88
|
-
const source =
|
|
89
|
-
item.source_type === "url"
|
|
90
|
-
? ansis.cyan("url".padEnd(6))
|
|
91
|
-
: ansis.dim("file".padEnd(6));
|
|
92
121
|
const id = ansis.dim(item.id.padEnd(36));
|
|
122
|
+
const ref = formatDriveRef(item);
|
|
93
123
|
console.log(
|
|
94
|
-
`${id} ${
|
|
124
|
+
`${id} ${ref.slice(0, 49).padEnd(50)} ${item.title.slice(0, 19).padEnd(20)} ${desc} ${item.mime_type.slice(0, 14).padEnd(15)} ${updated} ${indexed}`,
|
|
95
125
|
);
|
|
96
126
|
}
|
|
97
127
|
|
|
98
|
-
|
|
128
|
+
const totals: string[] = [];
|
|
129
|
+
if (dirs.length > 0) {
|
|
130
|
+
totals.push(`${dirs.length} dir(s)`);
|
|
131
|
+
}
|
|
132
|
+
totals.push(`${items.length} item(s)`);
|
|
133
|
+
console.log(`\n${ansis.dim(totals.join(", "))}`);
|
|
99
134
|
}),
|
|
100
135
|
);
|
|
101
136
|
|
|
102
137
|
ctx
|
|
103
138
|
.command("add <paths...>")
|
|
104
139
|
.description("Add files, directories, or URLs to context")
|
|
105
|
-
.option(
|
|
106
|
-
"--prefix <prefix>",
|
|
107
|
-
"virtual path prefix (if omitted, an LLM suggests a folder for each file)",
|
|
108
|
-
)
|
|
109
|
-
.option("--name <path>", "custom context path (single URL only)")
|
|
110
140
|
.option(
|
|
111
141
|
"--on-conflict <policy>",
|
|
112
142
|
"collision policy: error | overwrite | skip",
|
|
113
|
-
"
|
|
114
|
-
)
|
|
115
|
-
.option(
|
|
116
|
-
"--auto-place",
|
|
117
|
-
"accept all LLM-suggested paths without confirmation",
|
|
143
|
+
"skip",
|
|
118
144
|
)
|
|
119
145
|
.option(
|
|
120
146
|
"--prompt-addition <text>",
|
|
@@ -131,32 +157,21 @@ export function registerContextCommand(program: Command) {
|
|
|
131
157
|
process.exit(1);
|
|
132
158
|
}
|
|
133
159
|
|
|
134
|
-
|
|
135
|
-
type
|
|
136
|
-
filePath: string;
|
|
137
|
-
contextPath: string | null; // null = defer to LLM placement
|
|
138
|
-
};
|
|
160
|
+
type FileToAdd = { filePath: string; target: DriveTarget };
|
|
161
|
+
type UrlToAdd = { url: string; target: DriveTarget | null };
|
|
139
162
|
const filesToAdd: FileToAdd[] = [];
|
|
140
|
-
const urlsToAdd:
|
|
163
|
+
const urlsToAdd: UrlToAdd[] = [];
|
|
141
164
|
const spinner = createSpinner("Scanning paths...").start();
|
|
142
165
|
|
|
143
|
-
// Validate --name: only valid with a single URL
|
|
144
|
-
if (opts.name && (paths.length > 1 || !paths[0] || !isUrl(paths[0]))) {
|
|
145
|
-
spinner.error({
|
|
146
|
-
text: "--name can only be used with a single URL",
|
|
147
|
-
});
|
|
148
|
-
process.exit(1);
|
|
149
|
-
}
|
|
150
|
-
|
|
151
|
-
// Explicit placement: user passed --prefix (or --name for URLs).
|
|
152
|
-
// Implicit placement: LLM decides per-file.
|
|
153
|
-
const explicitPlacement = typeof opts.prefix === "string";
|
|
154
|
-
const urlPrefix = opts.prefix ?? "/";
|
|
155
|
-
|
|
156
166
|
for (const path of paths) {
|
|
157
167
|
if (isUrl(path)) {
|
|
158
|
-
|
|
159
|
-
|
|
168
|
+
// We defer drive detection until after the fetch — the MCP server
|
|
169
|
+
// name is a useful hint — but pre-compute a best-guess from the URL
|
|
170
|
+
// alone for dedup against existing (drive, path) rows.
|
|
171
|
+
urlsToAdd.push({
|
|
172
|
+
url: path,
|
|
173
|
+
target: detectDriveFromUrl(path),
|
|
174
|
+
});
|
|
160
175
|
} else {
|
|
161
176
|
const resolvedPath = resolve(path);
|
|
162
177
|
let info: Awaited<ReturnType<typeof stat>>;
|
|
@@ -170,20 +185,15 @@ export function registerContextCommand(program: Command) {
|
|
|
170
185
|
if (info.isDirectory()) {
|
|
171
186
|
const entries = await walkDirectory(resolvedPath);
|
|
172
187
|
for (const filePath of entries) {
|
|
173
|
-
const relativePath = filePath.slice(resolvedPath.length);
|
|
174
188
|
filesToAdd.push({
|
|
175
189
|
filePath,
|
|
176
|
-
|
|
177
|
-
? join(opts.prefix, relativePath)
|
|
178
|
-
: null,
|
|
190
|
+
target: { drive: "disk", path: filePath },
|
|
179
191
|
});
|
|
180
192
|
}
|
|
181
193
|
} else {
|
|
182
194
|
filesToAdd.push({
|
|
183
195
|
filePath: resolvedPath,
|
|
184
|
-
|
|
185
|
-
? join(opts.prefix, basename(resolvedPath))
|
|
186
|
-
: null,
|
|
196
|
+
target: { drive: "disk", path: resolvedPath },
|
|
187
197
|
});
|
|
188
198
|
}
|
|
189
199
|
}
|
|
@@ -197,41 +207,32 @@ export function registerContextCommand(program: Command) {
|
|
|
197
207
|
const config = await loadConfig(dir);
|
|
198
208
|
const CONCURRENCY = 10;
|
|
199
209
|
|
|
200
|
-
// Phase 0:
|
|
201
|
-
//
|
|
210
|
+
// Phase 0: (drive, path) dedup — items already in context are routed
|
|
211
|
+
// per --on-conflict before we pay for the describe or fetch.
|
|
202
212
|
type AlreadyInContext = {
|
|
203
|
-
|
|
204
|
-
sourceType: "file" | "url";
|
|
213
|
+
target: DriveTarget;
|
|
205
214
|
existing: ContextItem;
|
|
206
215
|
};
|
|
207
216
|
const alreadyInContext: AlreadyInContext[] = [];
|
|
208
217
|
const remainingFiles: FileToAdd[] = [];
|
|
209
|
-
const remainingUrls:
|
|
218
|
+
const remainingUrls: UrlToAdd[] = [];
|
|
210
219
|
|
|
211
220
|
for (const f of filesToAdd) {
|
|
212
|
-
const existing = await
|
|
213
|
-
conn,
|
|
214
|
-
f.filePath,
|
|
215
|
-
"file",
|
|
216
|
-
);
|
|
221
|
+
const existing = await getContextItem(conn, f.target);
|
|
217
222
|
if (existing) {
|
|
218
|
-
alreadyInContext.push({
|
|
219
|
-
sourcePath: f.filePath,
|
|
220
|
-
sourceType: "file",
|
|
221
|
-
existing,
|
|
222
|
-
});
|
|
223
|
+
alreadyInContext.push({ target: f.target, existing });
|
|
223
224
|
} else {
|
|
224
225
|
remainingFiles.push(f);
|
|
225
226
|
}
|
|
226
227
|
}
|
|
227
228
|
for (const u of urlsToAdd) {
|
|
228
|
-
|
|
229
|
+
if (!u.target) {
|
|
230
|
+
remainingUrls.push(u);
|
|
231
|
+
continue;
|
|
232
|
+
}
|
|
233
|
+
const existing = await getContextItem(conn, u.target);
|
|
229
234
|
if (existing) {
|
|
230
|
-
alreadyInContext.push({
|
|
231
|
-
sourcePath: u.url,
|
|
232
|
-
sourceType: "url",
|
|
233
|
-
existing,
|
|
234
|
-
});
|
|
235
|
+
alreadyInContext.push({ target: u.target, existing });
|
|
235
236
|
} else {
|
|
236
237
|
remainingUrls.push(u);
|
|
237
238
|
}
|
|
@@ -244,31 +245,27 @@ export function registerContextCommand(program: Command) {
|
|
|
244
245
|
if (alreadyInContext.length > 0) {
|
|
245
246
|
if (policy === "error") {
|
|
246
247
|
logger.error(
|
|
247
|
-
`${alreadyInContext.length} item(s) already in context
|
|
248
|
+
`${alreadyInContext.length} item(s) already in context:`,
|
|
248
249
|
);
|
|
249
250
|
for (const a of alreadyInContext) {
|
|
250
251
|
console.log(
|
|
251
|
-
` ${ansis.red("✗")} ${a.
|
|
252
|
+
` ${ansis.red("✗")} ${formatDriveRef(a.target)} (id: ${a.existing.id})`,
|
|
252
253
|
);
|
|
253
254
|
}
|
|
254
255
|
logger.dim(
|
|
255
|
-
"Re-run with --on-conflict=skip to ignore these items or --on-conflict=overwrite to refresh them
|
|
256
|
+
"Re-run with --on-conflict=skip to ignore these items or --on-conflict=overwrite to refresh them.",
|
|
256
257
|
);
|
|
257
258
|
process.exit(1);
|
|
258
259
|
}
|
|
259
260
|
|
|
260
261
|
if (policy === "skip") {
|
|
261
262
|
for (const a of alreadyInContext) {
|
|
262
|
-
logger.dim(
|
|
263
|
-
|
|
264
|
-
);
|
|
265
|
-
dedupSkipped.push(a.existing.context_path);
|
|
263
|
+
logger.dim(`⊘ already in context: ${formatDriveRef(a.target)}`);
|
|
264
|
+
dedupSkipped.push(formatDriveRef(a.target));
|
|
266
265
|
}
|
|
267
266
|
} else {
|
|
268
|
-
// overwrite: refresh existing items (diff + selective re-embed),
|
|
269
|
-
// preserving their original context_path.
|
|
270
267
|
const itemsToRefresh = alreadyInContext.map((a) => a.existing);
|
|
271
|
-
const hasUrls = itemsToRefresh.some((i) => i.
|
|
268
|
+
const hasUrls = itemsToRefresh.some((i) => i.drive !== "disk");
|
|
272
269
|
const mcpxClient = hasUrls ? await createMcpxClient(dir) : null;
|
|
273
270
|
|
|
274
271
|
const refreshSpinner = createSpinner(
|
|
@@ -291,121 +288,43 @@ export function registerContextCommand(program: Command) {
|
|
|
291
288
|
text: `Refreshed ${refreshResult.checked} existing item(s): ${refreshResult.updated} updated, ${refreshResult.unchanged} unchanged, ${refreshResult.missing} missing.`,
|
|
292
289
|
});
|
|
293
290
|
|
|
294
|
-
// Count everything we processed OK (updated + unchanged) as
|
|
295
|
-
// "refreshed" for the summary. Missing/error items are reported
|
|
296
|
-
// inline below and don't count toward success.
|
|
297
291
|
refreshedCount = refreshResult.updated + refreshResult.unchanged;
|
|
298
292
|
refreshedChunks = refreshResult.chunks;
|
|
299
293
|
for (const item of refreshResult.items) {
|
|
300
294
|
if (item.status === "missing") {
|
|
301
|
-
logger.warn(` Missing: ${item.
|
|
295
|
+
logger.warn(` Missing: ${item.ref}`);
|
|
302
296
|
} else if (item.status === "error") {
|
|
303
|
-
logger.warn(
|
|
304
|
-
` Error refreshing ${item.source_path}: ${item.error}`,
|
|
305
|
-
);
|
|
297
|
+
logger.warn(` Error refreshing ${item.ref}: ${item.error}`);
|
|
306
298
|
}
|
|
307
299
|
}
|
|
308
300
|
}
|
|
309
301
|
}
|
|
310
302
|
|
|
311
|
-
//
|
|
312
|
-
// (LLM placement, description, insert, embed) see only truly-new items.
|
|
313
|
-
filesToAdd.splice(0, filesToAdd.length, ...remainingFiles);
|
|
314
|
-
urlsToAdd.splice(0, urlsToAdd.length, ...remainingUrls);
|
|
315
|
-
|
|
316
|
-
// Phase 1.5: LLM placement for files without an explicit path
|
|
317
|
-
const needsPlacement = filesToAdd.filter((f) => f.contextPath === null);
|
|
318
|
-
// description cache keyed by filePath — populated when LLM placement runs,
|
|
319
|
-
// reused in addFile to avoid a second describe call.
|
|
320
|
-
const descriptionCache = new Map<string, string>();
|
|
321
|
-
|
|
322
|
-
if (needsPlacement.length > 0) {
|
|
323
|
-
if (!config.anthropic_api_key) {
|
|
324
|
-
logger.error(
|
|
325
|
-
"No anthropic_api_key configured — cannot auto-place files. Pass --prefix to specify a folder.",
|
|
326
|
-
);
|
|
327
|
-
process.exit(1);
|
|
328
|
-
}
|
|
329
|
-
|
|
330
|
-
const existingTree = await renderExistingTree(conn);
|
|
331
|
-
const placeSpinner = createSpinner(
|
|
332
|
-
`Choosing paths for 0/${needsPlacement.length} file(s)...`,
|
|
333
|
-
).start();
|
|
334
|
-
let placed = 0;
|
|
335
|
-
|
|
336
|
-
for (let i = 0; i < needsPlacement.length; i += CONCURRENCY) {
|
|
337
|
-
const batch = needsPlacement.slice(i, i + CONCURRENCY);
|
|
338
|
-
await Promise.all(
|
|
339
|
-
batch.map(async (entry) => {
|
|
340
|
-
const suggestion = await suggestPathForFile(
|
|
341
|
-
entry.filePath,
|
|
342
|
-
config,
|
|
343
|
-
existingTree,
|
|
344
|
-
);
|
|
345
|
-
entry.contextPath =
|
|
346
|
-
suggestion?.suggested_path ?? `/${basename(entry.filePath)}`;
|
|
347
|
-
if (suggestion?.description) {
|
|
348
|
-
descriptionCache.set(entry.filePath, suggestion.description);
|
|
349
|
-
}
|
|
350
|
-
placed++;
|
|
351
|
-
placeSpinner.update({
|
|
352
|
-
text: `Choosing paths for ${placed}/${needsPlacement.length} file(s)...`,
|
|
353
|
-
});
|
|
354
|
-
}),
|
|
355
|
-
);
|
|
356
|
-
}
|
|
357
|
-
placeSpinner.success({
|
|
358
|
-
text: `Chose paths for ${placed} file(s).`,
|
|
359
|
-
});
|
|
360
|
-
|
|
361
|
-
// Confirm in TTY unless --auto-place
|
|
362
|
-
const isTTY = Boolean(process.stdin.isTTY && process.stdout.isTTY);
|
|
363
|
-
if (isTTY && !opts.autoPlace) {
|
|
364
|
-
console.log("");
|
|
365
|
-
console.log(ansis.bold("Suggested paths:"));
|
|
366
|
-
for (const entry of needsPlacement) {
|
|
367
|
-
console.log(
|
|
368
|
-
` ${ansis.dim(entry.filePath)} → ${ansis.cyan(entry.contextPath ?? "")}`,
|
|
369
|
-
);
|
|
370
|
-
}
|
|
371
|
-
const accepted = await confirmYesNo("Accept these paths? (Y/n): ");
|
|
372
|
-
if (!accepted) {
|
|
373
|
-
logger.warn(
|
|
374
|
-
"Aborted. Re-run with --prefix to place files manually, or --auto-place to skip this prompt.",
|
|
375
|
-
);
|
|
376
|
-
process.exit(1);
|
|
377
|
-
}
|
|
378
|
-
}
|
|
379
|
-
}
|
|
380
|
-
|
|
381
|
-
// Phase 2: Upsert DB records (batched, parallel LLM descriptions)
|
|
303
|
+
// Phase 1: Upsert DB records (batched, parallel LLM descriptions)
|
|
382
304
|
let addCompleted = 0;
|
|
383
|
-
const itemIds: { id: string;
|
|
384
|
-
const conflicts: {
|
|
305
|
+
const itemIds: { id: string; target: DriveTarget }[] = [];
|
|
306
|
+
const conflicts: { target: DriveTarget; existingId: string }[] = [];
|
|
385
307
|
const skipped: string[] = [];
|
|
386
308
|
|
|
387
|
-
|
|
388
|
-
if (filesToAdd.length > 0) {
|
|
309
|
+
if (remainingFiles.length > 0) {
|
|
389
310
|
const fileSpinner = createSpinner(
|
|
390
|
-
`Adding and describing 0/${
|
|
311
|
+
`Adding and describing 0/${remainingFiles.length} file(s)...`,
|
|
391
312
|
).start();
|
|
392
313
|
|
|
393
|
-
for (let i = 0; i <
|
|
394
|
-
const batch =
|
|
314
|
+
for (let i = 0; i < remainingFiles.length; i += CONCURRENCY) {
|
|
315
|
+
const batch = remainingFiles.slice(i, i + CONCURRENCY);
|
|
395
316
|
const results = await Promise.all(
|
|
396
|
-
batch.map(async ({ filePath,
|
|
397
|
-
if (contextPath === null) return null; // unreachable — placement filled it
|
|
317
|
+
batch.map(async ({ filePath, target }) => {
|
|
398
318
|
const result = await addFile(
|
|
399
319
|
conn,
|
|
400
320
|
filePath,
|
|
401
|
-
|
|
321
|
+
target,
|
|
402
322
|
config,
|
|
403
323
|
policy,
|
|
404
|
-
descriptionCache.get(filePath),
|
|
405
324
|
);
|
|
406
325
|
addCompleted++;
|
|
407
326
|
fileSpinner.update({
|
|
408
|
-
text: `Adding and describing ${addCompleted}/${
|
|
327
|
+
text: `Adding and describing ${addCompleted}/${remainingFiles.length} file(s)...`,
|
|
409
328
|
});
|
|
410
329
|
return result;
|
|
411
330
|
}),
|
|
@@ -413,14 +332,11 @@ export function registerContextCommand(program: Command) {
|
|
|
413
332
|
for (const r of results) {
|
|
414
333
|
if (!r) continue;
|
|
415
334
|
if (r.kind === "added") {
|
|
416
|
-
itemIds.push({ id: r.id,
|
|
335
|
+
itemIds.push({ id: r.id, target: r.target });
|
|
417
336
|
} else if (r.kind === "conflict") {
|
|
418
|
-
conflicts.push({
|
|
419
|
-
contextPath: r.contextPath,
|
|
420
|
-
existingId: r.existingId,
|
|
421
|
-
});
|
|
337
|
+
conflicts.push({ target: r.target, existingId: r.existingId });
|
|
422
338
|
} else if (r.kind === "skipped") {
|
|
423
|
-
skipped.push(r.
|
|
339
|
+
skipped.push(formatDriveRef(r.target));
|
|
424
340
|
}
|
|
425
341
|
}
|
|
426
342
|
}
|
|
@@ -430,8 +346,7 @@ export function registerContextCommand(program: Command) {
|
|
|
430
346
|
});
|
|
431
347
|
}
|
|
432
348
|
|
|
433
|
-
|
|
434
|
-
if (urlsToAdd.length > 0) {
|
|
349
|
+
if (remainingUrls.length > 0) {
|
|
435
350
|
const mcpxClient = await createMcpxClient(dir);
|
|
436
351
|
if (!mcpxClient) {
|
|
437
352
|
logger.dim(
|
|
@@ -441,36 +356,37 @@ export function registerContextCommand(program: Command) {
|
|
|
441
356
|
|
|
442
357
|
let urlIdx = 0;
|
|
443
358
|
let urlAdded = 0;
|
|
444
|
-
for (const { url
|
|
359
|
+
for (const { url } of remainingUrls) {
|
|
445
360
|
urlIdx++;
|
|
446
361
|
console.log(
|
|
447
|
-
`\n${ansis.bold(`[${urlIdx}/${
|
|
362
|
+
`\n${ansis.bold(`[${urlIdx}/${remainingUrls.length}]`)} ${ansis.cyan(url)}`,
|
|
448
363
|
);
|
|
449
364
|
const result = await addUrl(
|
|
450
365
|
conn,
|
|
451
366
|
config,
|
|
452
367
|
url,
|
|
453
|
-
contextPath,
|
|
454
368
|
mcpxClient,
|
|
455
369
|
opts.promptAddition,
|
|
456
370
|
policy,
|
|
457
371
|
);
|
|
458
372
|
if (result.ok) {
|
|
459
373
|
urlAdded++;
|
|
460
|
-
itemIds.push({ id: result.id,
|
|
461
|
-
console.log(
|
|
374
|
+
itemIds.push({ id: result.id, target: result.target });
|
|
375
|
+
console.log(
|
|
376
|
+
` ${ansis.green("✔")} stored at ${formatDriveRef(result.target)}`,
|
|
377
|
+
);
|
|
462
378
|
} else if (result.kind === "conflict") {
|
|
463
379
|
conflicts.push({
|
|
464
|
-
|
|
380
|
+
target: result.target,
|
|
465
381
|
existingId: result.existingId,
|
|
466
382
|
});
|
|
467
383
|
console.log(
|
|
468
|
-
` ${ansis.red("✗")} path already exists: ${
|
|
384
|
+
` ${ansis.red("✗")} path already exists: ${formatDriveRef(result.target)}`,
|
|
469
385
|
);
|
|
470
386
|
} else if (result.kind === "skipped") {
|
|
471
|
-
skipped.push(
|
|
387
|
+
skipped.push(formatDriveRef(result.target));
|
|
472
388
|
console.log(
|
|
473
|
-
` ${ansis.yellow("⊘")} skipped (path exists): ${
|
|
389
|
+
` ${ansis.yellow("⊘")} skipped (path exists): ${formatDriveRef(result.target)}`,
|
|
474
390
|
);
|
|
475
391
|
} else if (result.actionable) {
|
|
476
392
|
console.log(
|
|
@@ -486,8 +402,8 @@ export function registerContextCommand(program: Command) {
|
|
|
486
402
|
}
|
|
487
403
|
}
|
|
488
404
|
|
|
489
|
-
const urlSummary = `Added ${urlAdded}/${
|
|
490
|
-
if (urlAdded ===
|
|
405
|
+
const urlSummary = `Added ${urlAdded}/${remainingUrls.length} URL(s).`;
|
|
406
|
+
if (urlAdded === remainingUrls.length) {
|
|
491
407
|
console.log(`\n${ansis.green("✔")} ${urlSummary}`);
|
|
492
408
|
} else if (urlAdded === 0) {
|
|
493
409
|
console.log(`\n${ansis.red("✗")} ${urlSummary}`);
|
|
@@ -496,28 +412,19 @@ export function registerContextCommand(program: Command) {
|
|
|
496
412
|
}
|
|
497
413
|
}
|
|
498
414
|
|
|
499
|
-
// Report conflicts before embeddings so the user sees them prominently.
|
|
500
|
-
// Phase 0 already handled source-path matches, so anything here is a
|
|
501
|
-
// target-path collision — an LLM-suggested (or explicit) path that
|
|
502
|
-
// another unrelated item already occupies.
|
|
503
415
|
if (conflicts.length > 0) {
|
|
504
416
|
logger.error(
|
|
505
|
-
`${conflicts.length}
|
|
417
|
+
`${conflicts.length} (drive, path) collision(s) — nothing written for these items:`,
|
|
506
418
|
);
|
|
507
419
|
for (const c of conflicts) {
|
|
508
420
|
console.log(
|
|
509
|
-
` ${ansis.red("✗")} ${c.
|
|
421
|
+
` ${ansis.red("✗")} ${formatDriveRef(c.target)} (existing id: ${c.existingId})`,
|
|
510
422
|
);
|
|
511
423
|
}
|
|
512
|
-
logger.dim(
|
|
513
|
-
"The suggested path is already in use by a different source. Re-run with --prefix to place these items elsewhere, or delete the existing item first.",
|
|
514
|
-
);
|
|
515
424
|
}
|
|
516
425
|
|
|
517
|
-
// Merge Phase 0 skips into the skip list used by the final summary.
|
|
518
426
|
skipped.push(...dedupSkipped);
|
|
519
427
|
|
|
520
|
-
// Phase 3: Chunk + embed in parallel (network I/O)
|
|
521
428
|
if (itemIds.length === 0 || !config.openai_api_key) {
|
|
522
429
|
if (!config.openai_api_key) {
|
|
523
430
|
logger.dim("Skipping embeddings (no OpenAI API key configured).");
|
|
@@ -572,7 +479,6 @@ export function registerContextCommand(program: Command) {
|
|
|
572
479
|
text: `Embedded ${prepared.length} item(s).`,
|
|
573
480
|
});
|
|
574
481
|
|
|
575
|
-
// Phase 4: Store embeddings (sequential, fast DB writes)
|
|
576
482
|
let chunks = 0;
|
|
577
483
|
let filesAdded = 0;
|
|
578
484
|
let filesUpdated = 0;
|
|
@@ -631,8 +537,12 @@ export function registerContextCommand(program: Command) {
|
|
|
631
537
|
console.log(
|
|
632
538
|
`${ansis.bold(`${i + 1}.`)} ${ansis.cyan(r.title)} ${ansis.dim(`(${score}%)`)}`,
|
|
633
539
|
);
|
|
540
|
+
const ref =
|
|
541
|
+
r.drive && r.path
|
|
542
|
+
? formatDriveRef({ drive: r.drive, path: r.path })
|
|
543
|
+
: r.context_item_id;
|
|
634
544
|
console.log(
|
|
635
|
-
` ${ansis.dim(
|
|
545
|
+
` ${ansis.dim(ref)} ${ansis.dim(fmtDate(r.created_at))}`,
|
|
636
546
|
);
|
|
637
547
|
if (r.chunk_content) {
|
|
638
548
|
const snippet = r.chunk_content.slice(0, 120).replace(/\n/g, " ");
|
|
@@ -645,26 +555,30 @@ export function registerContextCommand(program: Command) {
|
|
|
645
555
|
|
|
646
556
|
registerSearchToolSubcommands(search);
|
|
647
557
|
ctx
|
|
648
|
-
.command("delete <
|
|
649
|
-
.description("Delete a context entry
|
|
650
|
-
.action((
|
|
558
|
+
.command("delete <ref>")
|
|
559
|
+
.description("Delete a context entry (UUID or drive:/path)")
|
|
560
|
+
.action((ref: string) =>
|
|
651
561
|
withDb(program, async (conn) => {
|
|
652
|
-
const
|
|
653
|
-
if (!
|
|
654
|
-
logger.error(`Context entry not found: ${
|
|
562
|
+
const item = await resolveContextItem(conn, ref);
|
|
563
|
+
if (!item) {
|
|
564
|
+
logger.error(`Context entry not found: ${ref}`);
|
|
655
565
|
process.exit(1);
|
|
656
566
|
}
|
|
657
|
-
|
|
567
|
+
await deleteContextItemByPath(conn, {
|
|
568
|
+
drive: item.drive,
|
|
569
|
+
path: item.path,
|
|
570
|
+
});
|
|
571
|
+
logger.success(`Deleted context entry: ${formatDriveRef(item)}`);
|
|
658
572
|
}),
|
|
659
573
|
);
|
|
660
574
|
ctx
|
|
661
|
-
.command("chunks <
|
|
575
|
+
.command("chunks <ref>")
|
|
662
576
|
.description("Show chunks and embeddings for a context entry")
|
|
663
|
-
.action((
|
|
577
|
+
.action((ref: string) =>
|
|
664
578
|
withDb(program, async (conn) => {
|
|
665
|
-
const item = await resolveContextItem(conn,
|
|
579
|
+
const item = await resolveContextItem(conn, ref);
|
|
666
580
|
if (!item) {
|
|
667
|
-
logger.error(`Context entry not found: ${
|
|
581
|
+
logger.error(`Context entry not found: ${ref}`);
|
|
668
582
|
process.exit(1);
|
|
669
583
|
}
|
|
670
584
|
|
|
@@ -676,7 +590,7 @@ export function registerContextCommand(program: Command) {
|
|
|
676
590
|
const embeddings = await getEmbeddingsForItem(conn, item.id);
|
|
677
591
|
|
|
678
592
|
console.log(ansis.bold(item.title));
|
|
679
|
-
console.log(`
|
|
593
|
+
console.log(` Ref: ${formatDriveRef(item)}`);
|
|
680
594
|
console.log(` Indexed: ${fmtDate(item.indexed_at)}`);
|
|
681
595
|
console.log(` Chunks: ${embeddings.length}`);
|
|
682
596
|
console.log("");
|
|
@@ -706,44 +620,43 @@ export function registerContextCommand(program: Command) {
|
|
|
706
620
|
);
|
|
707
621
|
|
|
708
622
|
ctx
|
|
709
|
-
.command("refresh [
|
|
623
|
+
.command("refresh [refs...]")
|
|
710
624
|
.description(
|
|
711
|
-
"Re-import
|
|
625
|
+
"Re-import items from their origin (disk / URL / MCP) and re-embed if content changed",
|
|
712
626
|
)
|
|
713
|
-
.option("--all", "refresh
|
|
714
|
-
.action((
|
|
627
|
+
.option("--all", "refresh every item (except those on drive=agent)")
|
|
628
|
+
.action((refs: string[], opts: { all?: boolean }) =>
|
|
715
629
|
withDb(program, async (conn, dir) => {
|
|
716
|
-
const items = await resolveItems(conn,
|
|
630
|
+
const items = await resolveItems(conn, refs, !!opts.all);
|
|
717
631
|
if (items.length === 0) {
|
|
718
632
|
logger.error("No matching context entries found.");
|
|
719
633
|
process.exit(1);
|
|
720
634
|
}
|
|
721
635
|
|
|
722
|
-
const
|
|
723
|
-
if (
|
|
724
|
-
logger.dim("No items
|
|
636
|
+
const refreshable = items.filter((i) => i.drive !== "agent");
|
|
637
|
+
if (refreshable.length === 0) {
|
|
638
|
+
logger.dim("No refreshable items (everything is on drive=agent).");
|
|
725
639
|
return;
|
|
726
640
|
}
|
|
727
|
-
if (
|
|
641
|
+
if (refreshable.length < items.length) {
|
|
728
642
|
logger.dim(
|
|
729
|
-
`Skipping ${items.length -
|
|
643
|
+
`Skipping ${items.length - refreshable.length} agent-drive item(s) with no external origin.`,
|
|
730
644
|
);
|
|
731
645
|
}
|
|
732
646
|
|
|
733
647
|
const config = await loadConfig(dir);
|
|
734
648
|
|
|
735
|
-
|
|
736
|
-
const hasUrls = sourced.some((i) => i.source_type === "url");
|
|
649
|
+
const hasUrls = refreshable.some((i) => i.drive !== "disk");
|
|
737
650
|
const mcpxClient = hasUrls ? await createMcpxClient(dir) : null;
|
|
738
651
|
|
|
739
652
|
const refreshSpinner = createSpinner(
|
|
740
|
-
`Refreshing 0/${
|
|
653
|
+
`Refreshing 0/${refreshable.length} items...`,
|
|
741
654
|
).start();
|
|
742
655
|
const embedSpinner = createSpinner("Embedding 0 item(s)...");
|
|
743
656
|
|
|
744
657
|
const result = await refreshContextItems(
|
|
745
658
|
conn,
|
|
746
|
-
|
|
659
|
+
refreshable,
|
|
747
660
|
config,
|
|
748
661
|
mcpxClient,
|
|
749
662
|
{
|
|
@@ -767,11 +680,9 @@ export function registerContextCommand(program: Command) {
|
|
|
767
680
|
|
|
768
681
|
for (const item of result.items) {
|
|
769
682
|
if (item.status === "missing") {
|
|
770
|
-
logger.warn(` Missing: ${item.
|
|
683
|
+
logger.warn(` Missing: ${item.ref}`);
|
|
771
684
|
} else if (item.status === "error") {
|
|
772
|
-
logger.warn(
|
|
773
|
-
` Error refreshing ${item.source_path}: ${item.error}`,
|
|
774
|
-
);
|
|
685
|
+
logger.warn(` Error refreshing ${item.ref}: ${item.error}`);
|
|
775
686
|
}
|
|
776
687
|
}
|
|
777
688
|
|
|
@@ -788,30 +699,52 @@ export function registerContextCommand(program: Command) {
|
|
|
788
699
|
}),
|
|
789
700
|
);
|
|
790
701
|
|
|
791
|
-
// Register context tool subcommands (read, write, edit, list-dir, etc.)
|
|
792
|
-
// Must come after management subcommands so collision detection works.
|
|
793
702
|
registerContextToolSubcommands(ctx);
|
|
794
703
|
}
|
|
795
704
|
|
|
796
705
|
async function resolveItems(
|
|
797
706
|
conn: DbConnection,
|
|
798
|
-
|
|
707
|
+
refs: string[],
|
|
799
708
|
all: boolean,
|
|
800
709
|
): Promise<ContextItem[]> {
|
|
801
|
-
if (!
|
|
802
|
-
logger.error("Provide
|
|
710
|
+
if (!all && refs.length === 0) {
|
|
711
|
+
logger.error("Provide at least one ref or use --all.");
|
|
803
712
|
process.exit(1);
|
|
804
713
|
}
|
|
805
714
|
if (all) return listContextItems(conn);
|
|
806
|
-
|
|
807
|
-
const
|
|
715
|
+
|
|
716
|
+
const byId = new Map<string, ContextItem>();
|
|
717
|
+
const unresolved: string[] = [];
|
|
718
|
+
for (const r of refs) {
|
|
719
|
+
const matched = await resolveOne(conn, r);
|
|
720
|
+
if (matched.length === 0) {
|
|
721
|
+
unresolved.push(r);
|
|
722
|
+
continue;
|
|
723
|
+
}
|
|
724
|
+
for (const item of matched) byId.set(item.id, item);
|
|
725
|
+
}
|
|
726
|
+
for (const r of unresolved) logger.warn(` Not found: ${r}`);
|
|
727
|
+
return [...byId.values()];
|
|
728
|
+
}
|
|
729
|
+
|
|
730
|
+
async function resolveOne(
|
|
731
|
+
conn: DbConnection,
|
|
732
|
+
ref: string,
|
|
733
|
+
): Promise<ContextItem[]> {
|
|
734
|
+
const exact = await resolveContextItem(conn, ref);
|
|
808
735
|
if (exact) return [exact];
|
|
809
|
-
|
|
736
|
+
// Prefix expansion: only valid for `drive:/path` form.
|
|
737
|
+
const parsed = parseDriveRef(ref);
|
|
738
|
+
if (parsed) {
|
|
739
|
+
return listContextItemsByPrefix(conn, parsed.drive, parsed.path, {
|
|
740
|
+
recursive: true,
|
|
741
|
+
});
|
|
742
|
+
}
|
|
743
|
+
return [];
|
|
810
744
|
}
|
|
811
745
|
|
|
812
746
|
type ConflictPolicy = "error" | "overwrite" | "skip";
|
|
813
747
|
|
|
814
|
-
/** Format the final "X added, Y refreshed, Z skipped — N chunks" line. */
|
|
815
748
|
function buildSummary(args: {
|
|
816
749
|
added: number;
|
|
817
750
|
updated?: number;
|
|
@@ -832,32 +765,29 @@ function buildSummary(args: {
|
|
|
832
765
|
}
|
|
833
766
|
|
|
834
767
|
type AddFileResult =
|
|
835
|
-
| { kind: "added"; id: string;
|
|
836
|
-
| { kind: "skipped";
|
|
837
|
-
| { kind: "conflict";
|
|
838
|
-
| { kind: "failed";
|
|
768
|
+
| { kind: "added"; id: string; target: DriveTarget }
|
|
769
|
+
| { kind: "skipped"; target: DriveTarget }
|
|
770
|
+
| { kind: "conflict"; target: DriveTarget; existingId: string }
|
|
771
|
+
| { kind: "failed"; target: DriveTarget; error: string };
|
|
839
772
|
|
|
840
|
-
/** Upsert a file into context honoring the collision policy. */
|
|
841
773
|
async function addFile(
|
|
842
774
|
conn: DbConnection,
|
|
843
775
|
filePath: string,
|
|
844
|
-
|
|
776
|
+
target: DriveTarget,
|
|
845
777
|
config: Required<BotholomewConfig>,
|
|
846
778
|
policy: ConflictPolicy,
|
|
847
|
-
cachedDescription?: string,
|
|
848
779
|
): Promise<AddFileResult | null> {
|
|
849
780
|
try {
|
|
850
|
-
// Pre-flight conflict check so we don't waste a describe call.
|
|
851
781
|
if (policy !== "overwrite") {
|
|
852
|
-
const existing = await
|
|
782
|
+
const existing = await getContextItem(conn, target);
|
|
853
783
|
if (existing) {
|
|
854
784
|
if (policy === "skip") {
|
|
855
|
-
logger.dim(` ⊘ skipped (
|
|
856
|
-
return { kind: "skipped",
|
|
785
|
+
logger.dim(` ⊘ skipped (exists): ${formatDriveRef(target)}`);
|
|
786
|
+
return { kind: "skipped", target };
|
|
857
787
|
}
|
|
858
788
|
return {
|
|
859
789
|
kind: "conflict",
|
|
860
|
-
|
|
790
|
+
target,
|
|
861
791
|
existingId: existing.id,
|
|
862
792
|
};
|
|
863
793
|
}
|
|
@@ -869,22 +799,20 @@ async function addFile(
|
|
|
869
799
|
const textual = isText(filename) !== false;
|
|
870
800
|
const content = textual ? await bunFile.text() : null;
|
|
871
801
|
|
|
872
|
-
const description =
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
filePath,
|
|
879
|
-
}));
|
|
802
|
+
const description = await generateDescription(config, {
|
|
803
|
+
filename,
|
|
804
|
+
mimeType,
|
|
805
|
+
content,
|
|
806
|
+
filePath,
|
|
807
|
+
});
|
|
880
808
|
|
|
881
809
|
const itemParams = {
|
|
882
810
|
title: filename,
|
|
883
811
|
description,
|
|
884
812
|
content: content ?? undefined,
|
|
885
813
|
mimeType,
|
|
886
|
-
|
|
887
|
-
|
|
814
|
+
drive: target.drive,
|
|
815
|
+
path: target.path,
|
|
888
816
|
isTextual: textual,
|
|
889
817
|
} as const;
|
|
890
818
|
|
|
@@ -893,50 +821,41 @@ async function addFile(
|
|
|
893
821
|
? await upsertContextItem(conn, itemParams)
|
|
894
822
|
: await createContextItemStrict(conn, itemParams);
|
|
895
823
|
|
|
896
|
-
return textual && content
|
|
897
|
-
? { kind: "added", id: item.id, contextPath: item.context_path }
|
|
898
|
-
: null;
|
|
824
|
+
return textual && content ? { kind: "added", id: item.id, target } : null;
|
|
899
825
|
} catch (err) {
|
|
900
826
|
if (err instanceof PathConflictError) {
|
|
901
|
-
|
|
902
|
-
return {
|
|
903
|
-
kind: "conflict",
|
|
904
|
-
contextPath,
|
|
905
|
-
existingId: err.existingId,
|
|
906
|
-
};
|
|
827
|
+
return { kind: "conflict", target, existingId: err.existingId };
|
|
907
828
|
}
|
|
908
|
-
logger.warn(` ! ${
|
|
909
|
-
return { kind: "failed",
|
|
829
|
+
logger.warn(` ! ${formatDriveRef(target)}: ${err}`);
|
|
830
|
+
return { kind: "failed", target, error: String(err) };
|
|
910
831
|
}
|
|
911
832
|
}
|
|
912
833
|
|
|
913
|
-
/** Fetch a URL and upsert into context. */
|
|
914
834
|
type AddUrlResult =
|
|
915
|
-
| { ok: true; id: string }
|
|
916
|
-
| { ok: false; kind: "conflict"; existingId: string }
|
|
917
|
-
| { ok: false; kind: "skipped" }
|
|
835
|
+
| { ok: true; id: string; target: DriveTarget }
|
|
836
|
+
| { ok: false; kind: "conflict"; target: DriveTarget; existingId: string }
|
|
837
|
+
| { ok: false; kind: "skipped"; target: DriveTarget }
|
|
918
838
|
| { ok: false; kind: "fetch-failed"; error: string; actionable: boolean };
|
|
919
839
|
|
|
920
840
|
async function addUrl(
|
|
921
841
|
conn: DbConnection,
|
|
922
842
|
config: Required<BotholomewConfig>,
|
|
923
843
|
url: string,
|
|
924
|
-
contextPath: string,
|
|
925
844
|
mcpxClient: Awaited<ReturnType<typeof createMcpxClient>>,
|
|
926
845
|
promptAddition: string | undefined,
|
|
927
846
|
policy: ConflictPolicy,
|
|
928
847
|
): Promise<AddUrlResult> {
|
|
929
|
-
// Pre-flight conflict check — skip the expensive fetch if we'd collide.
|
|
930
|
-
if (policy !== "overwrite") {
|
|
931
|
-
const existing = await getContextItemByPath(conn, contextPath);
|
|
932
|
-
if (existing) {
|
|
933
|
-
if (policy === "skip") return { ok: false, kind: "skipped" };
|
|
934
|
-
return { ok: false, kind: "conflict", existingId: existing.id };
|
|
935
|
-
}
|
|
936
|
-
}
|
|
937
|
-
|
|
938
848
|
try {
|
|
939
849
|
const fetched = await fetchUrl(url, config, mcpxClient, promptAddition);
|
|
850
|
+
const target: DriveTarget = { drive: fetched.drive, path: fetched.path };
|
|
851
|
+
|
|
852
|
+
if (policy !== "overwrite") {
|
|
853
|
+
const existing = await getContextItem(conn, target);
|
|
854
|
+
if (existing) {
|
|
855
|
+
if (policy === "skip") return { ok: false, kind: "skipped", target };
|
|
856
|
+
return { ok: false, kind: "conflict", target, existingId: existing.id };
|
|
857
|
+
}
|
|
858
|
+
}
|
|
940
859
|
|
|
941
860
|
const description = await generateDescription(config, {
|
|
942
861
|
filename: new URL(url).hostname,
|
|
@@ -949,9 +868,8 @@ async function addUrl(
|
|
|
949
868
|
description,
|
|
950
869
|
content: fetched.content,
|
|
951
870
|
mimeType: fetched.mimeType,
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
contextPath,
|
|
871
|
+
drive: target.drive,
|
|
872
|
+
path: target.path,
|
|
955
873
|
isTextual: true,
|
|
956
874
|
};
|
|
957
875
|
|
|
@@ -960,10 +878,15 @@ async function addUrl(
|
|
|
960
878
|
? await upsertContextItem(conn, itemParams)
|
|
961
879
|
: await createContextItemStrict(conn, itemParams);
|
|
962
880
|
|
|
963
|
-
return { ok: true, id: item.id };
|
|
881
|
+
return { ok: true, id: item.id, target };
|
|
964
882
|
} catch (err) {
|
|
965
883
|
if (err instanceof PathConflictError) {
|
|
966
|
-
return {
|
|
884
|
+
return {
|
|
885
|
+
ok: false,
|
|
886
|
+
kind: "conflict",
|
|
887
|
+
target: { drive: err.drive, path: err.path },
|
|
888
|
+
existingId: err.existingId,
|
|
889
|
+
};
|
|
967
890
|
}
|
|
968
891
|
if (err instanceof FetchFailureError) {
|
|
969
892
|
return {
|
|
@@ -982,79 +905,6 @@ async function addUrl(
|
|
|
982
905
|
}
|
|
983
906
|
}
|
|
984
907
|
|
|
985
|
-
/**
|
|
986
|
-
* Build a listing of every existing path (folders + files) to feed the LLM
|
|
987
|
-
* placer. Seeing actual files in each folder helps the LLM place new content
|
|
988
|
-
* alongside similar documents instead of inventing parallel folder names.
|
|
989
|
-
*/
|
|
990
|
-
async function renderExistingTree(conn: DbConnection): Promise<string> {
|
|
991
|
-
const items = await listContextItems(conn);
|
|
992
|
-
if (items.length === 0) return "";
|
|
993
|
-
|
|
994
|
-
// Every implicit ancestor folder of every item.
|
|
995
|
-
const folders = new Set<string>();
|
|
996
|
-
for (const item of items) {
|
|
997
|
-
const parts = item.context_path.split("/").filter(Boolean);
|
|
998
|
-
const isExplicitDir = item.mime_type === "inode/directory";
|
|
999
|
-
const folderDepth = isExplicitDir ? parts.length : parts.length - 1;
|
|
1000
|
-
for (let i = 1; i <= folderDepth; i++) {
|
|
1001
|
-
folders.add(`/${parts.slice(0, i).join("/")}/`);
|
|
1002
|
-
}
|
|
1003
|
-
}
|
|
1004
|
-
|
|
1005
|
-
const files = items
|
|
1006
|
-
.filter((i) => i.mime_type !== "inode/directory")
|
|
1007
|
-
.map((i) => i.context_path);
|
|
1008
|
-
|
|
1009
|
-
const all = [...folders, ...files].sort();
|
|
1010
|
-
const cap = 500;
|
|
1011
|
-
const truncated = all.slice(0, cap);
|
|
1012
|
-
const suffix =
|
|
1013
|
-
all.length > cap ? `\n (+${all.length - cap} more entries)` : "";
|
|
1014
|
-
return truncated.map((p) => ` ${p}`).join("\n") + suffix;
|
|
1015
|
-
}
|
|
1016
|
-
|
|
1017
|
-
/** Call the describer LLM to suggest a path + description for a file. */
|
|
1018
|
-
async function suggestPathForFile(
|
|
1019
|
-
filePath: string,
|
|
1020
|
-
config: Required<BotholomewConfig>,
|
|
1021
|
-
existingTree: string,
|
|
1022
|
-
): Promise<{ description: string; suggested_path: string } | null> {
|
|
1023
|
-
try {
|
|
1024
|
-
const bunFile = Bun.file(filePath);
|
|
1025
|
-
const mimeType = bunFile.type.split(";")[0] || "application/octet-stream";
|
|
1026
|
-
const filename = basename(filePath);
|
|
1027
|
-
const textual = isText(filename) !== false;
|
|
1028
|
-
const content = textual ? await bunFile.text() : null;
|
|
1029
|
-
return await generateDescriptionAndPath(config, {
|
|
1030
|
-
filename,
|
|
1031
|
-
mimeType,
|
|
1032
|
-
content,
|
|
1033
|
-
filePath,
|
|
1034
|
-
sourcePath: filePath,
|
|
1035
|
-
existingTree,
|
|
1036
|
-
});
|
|
1037
|
-
} catch {
|
|
1038
|
-
return null;
|
|
1039
|
-
}
|
|
1040
|
-
}
|
|
1041
|
-
|
|
1042
|
-
/** Minimal stdin-based yes/no prompt, defaults to yes (empty input accepts). */
|
|
1043
|
-
async function confirmYesNo(prompt: string): Promise<boolean> {
|
|
1044
|
-
process.stdout.write(prompt);
|
|
1045
|
-
return new Promise((resolvePromise) => {
|
|
1046
|
-
const onData = (chunk: Buffer) => {
|
|
1047
|
-
const line = chunk.toString().trim().toLowerCase();
|
|
1048
|
-
process.stdin.off("data", onData);
|
|
1049
|
-
process.stdin.pause();
|
|
1050
|
-
// Empty input (just Enter) or y/yes → accept; only n/no rejects.
|
|
1051
|
-
resolvePromise(line !== "n" && line !== "no");
|
|
1052
|
-
};
|
|
1053
|
-
process.stdin.resume();
|
|
1054
|
-
process.stdin.once("data", onData);
|
|
1055
|
-
});
|
|
1056
|
-
}
|
|
1057
|
-
|
|
1058
908
|
async function walkDirectory(dirPath: string): Promise<string[]> {
|
|
1059
909
|
const files: string[] = [];
|
|
1060
910
|
const entries = await readdir(dirPath, { withFileTypes: true });
|