botholomew 0.7.9 → 0.7.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/commands/context.ts +168 -12
- package/src/db/context.ts +13 -0
package/package.json
CHANGED
package/src/commands/context.ts
CHANGED
|
@@ -25,6 +25,7 @@ import {
|
|
|
25
25
|
createContextItemStrict,
|
|
26
26
|
deleteContextItemByPath,
|
|
27
27
|
getContextItemByPath,
|
|
28
|
+
getContextItemBySourcePath,
|
|
28
29
|
listContextItems,
|
|
29
30
|
listContextItemsByPrefix,
|
|
30
31
|
PathConflictError,
|
|
@@ -193,9 +194,126 @@ export function registerContextCommand(program: Command) {
|
|
|
193
194
|
text: `Found ${totalCount} item(s) to add (${filesToAdd.length} file(s), ${urlsToAdd.length} URL(s)).`,
|
|
194
195
|
});
|
|
195
196
|
|
|
196
|
-
// Phase 1.5: LLM placement for files without an explicit path
|
|
197
197
|
const config = await loadConfig(dir);
|
|
198
198
|
const CONCURRENCY = 10;
|
|
199
|
+
|
|
200
|
+
// Phase 0: Source-path dedup — items whose source_path is already in
|
|
201
|
+
// context are routed per --on-conflict before we pay for LLM placement.
|
|
202
|
+
type AlreadyInContext = {
|
|
203
|
+
sourcePath: string;
|
|
204
|
+
sourceType: "file" | "url";
|
|
205
|
+
existing: ContextItem;
|
|
206
|
+
};
|
|
207
|
+
const alreadyInContext: AlreadyInContext[] = [];
|
|
208
|
+
const remainingFiles: FileToAdd[] = [];
|
|
209
|
+
const remainingUrls: { url: string; contextPath: string }[] = [];
|
|
210
|
+
|
|
211
|
+
for (const f of filesToAdd) {
|
|
212
|
+
const existing = await getContextItemBySourcePath(
|
|
213
|
+
conn,
|
|
214
|
+
f.filePath,
|
|
215
|
+
"file",
|
|
216
|
+
);
|
|
217
|
+
if (existing) {
|
|
218
|
+
alreadyInContext.push({
|
|
219
|
+
sourcePath: f.filePath,
|
|
220
|
+
sourceType: "file",
|
|
221
|
+
existing,
|
|
222
|
+
});
|
|
223
|
+
} else {
|
|
224
|
+
remainingFiles.push(f);
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
for (const u of urlsToAdd) {
|
|
228
|
+
const existing = await getContextItemBySourcePath(conn, u.url, "url");
|
|
229
|
+
if (existing) {
|
|
230
|
+
alreadyInContext.push({
|
|
231
|
+
sourcePath: u.url,
|
|
232
|
+
sourceType: "url",
|
|
233
|
+
existing,
|
|
234
|
+
});
|
|
235
|
+
} else {
|
|
236
|
+
remainingUrls.push(u);
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
let refreshedCount = 0;
|
|
241
|
+
let refreshedChunks = 0;
|
|
242
|
+
const dedupSkipped: string[] = [];
|
|
243
|
+
|
|
244
|
+
if (alreadyInContext.length > 0) {
|
|
245
|
+
if (policy === "error") {
|
|
246
|
+
logger.error(
|
|
247
|
+
`${alreadyInContext.length} item(s) already in context (matched by source path):`,
|
|
248
|
+
);
|
|
249
|
+
for (const a of alreadyInContext) {
|
|
250
|
+
console.log(
|
|
251
|
+
` ${ansis.red("✗")} ${a.sourcePath} → ${a.existing.context_path} (id: ${a.existing.id})`,
|
|
252
|
+
);
|
|
253
|
+
}
|
|
254
|
+
logger.dim(
|
|
255
|
+
"Re-run with --on-conflict=skip to ignore these items or --on-conflict=overwrite to refresh them from disk.",
|
|
256
|
+
);
|
|
257
|
+
process.exit(1);
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
if (policy === "skip") {
|
|
261
|
+
for (const a of alreadyInContext) {
|
|
262
|
+
logger.dim(
|
|
263
|
+
`⊘ already in context: ${a.sourcePath} → ${a.existing.context_path}`,
|
|
264
|
+
);
|
|
265
|
+
dedupSkipped.push(a.existing.context_path);
|
|
266
|
+
}
|
|
267
|
+
} else {
|
|
268
|
+
// overwrite: refresh existing items (diff + selective re-embed),
|
|
269
|
+
// preserving their original context_path.
|
|
270
|
+
const itemsToRefresh = alreadyInContext.map((a) => a.existing);
|
|
271
|
+
const hasUrls = itemsToRefresh.some((i) => i.source_type === "url");
|
|
272
|
+
const mcpxClient = hasUrls ? await createMcpxClient(dir) : null;
|
|
273
|
+
|
|
274
|
+
const refreshSpinner = createSpinner(
|
|
275
|
+
`Refreshing 0/${itemsToRefresh.length} existing item(s)...`,
|
|
276
|
+
).start();
|
|
277
|
+
const refreshResult = await refreshContextItems(
|
|
278
|
+
conn,
|
|
279
|
+
itemsToRefresh,
|
|
280
|
+
config,
|
|
281
|
+
mcpxClient,
|
|
282
|
+
{
|
|
283
|
+
onItemProgress: (done, total) => {
|
|
284
|
+
refreshSpinner.update({
|
|
285
|
+
text: `Refreshing ${done}/${total} existing item(s)...`,
|
|
286
|
+
});
|
|
287
|
+
},
|
|
288
|
+
},
|
|
289
|
+
);
|
|
290
|
+
refreshSpinner.success({
|
|
291
|
+
text: `Refreshed ${refreshResult.checked} existing item(s): ${refreshResult.updated} updated, ${refreshResult.unchanged} unchanged, ${refreshResult.missing} missing.`,
|
|
292
|
+
});
|
|
293
|
+
|
|
294
|
+
// Count everything we processed OK (updated + unchanged) as
|
|
295
|
+
// "refreshed" for the summary. Missing/error items are reported
|
|
296
|
+
// inline below and don't count toward success.
|
|
297
|
+
refreshedCount = refreshResult.updated + refreshResult.unchanged;
|
|
298
|
+
refreshedChunks = refreshResult.chunks;
|
|
299
|
+
for (const item of refreshResult.items) {
|
|
300
|
+
if (item.status === "missing") {
|
|
301
|
+
logger.warn(` Missing: ${item.source_path}`);
|
|
302
|
+
} else if (item.status === "error") {
|
|
303
|
+
logger.warn(
|
|
304
|
+
` Error refreshing ${item.source_path}: ${item.error}`,
|
|
305
|
+
);
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
// Drop already-handled items from the work lists so downstream phases
|
|
312
|
+
// (LLM placement, description, insert, embed) see only truly-new items.
|
|
313
|
+
filesToAdd.splice(0, filesToAdd.length, ...remainingFiles);
|
|
314
|
+
urlsToAdd.splice(0, urlsToAdd.length, ...remainingUrls);
|
|
315
|
+
|
|
316
|
+
// Phase 1.5: LLM placement for files without an explicit path
|
|
199
317
|
const needsPlacement = filesToAdd.filter((f) => f.contextPath === null);
|
|
200
318
|
// description cache keyed by filePath — populated when LLM placement runs,
|
|
201
319
|
// reused in addFile to avoid a second describe call.
|
|
@@ -378,10 +496,13 @@ export function registerContextCommand(program: Command) {
|
|
|
378
496
|
}
|
|
379
497
|
}
|
|
380
498
|
|
|
381
|
-
// Report conflicts before embeddings so the user sees them prominently
|
|
499
|
+
// Report conflicts before embeddings so the user sees them prominently.
|
|
500
|
+
// Phase 0 already handled source-path matches, so anything here is a
|
|
501
|
+
// target-path collision — an LLM-suggested (or explicit) path that
|
|
502
|
+
// another unrelated item already occupies.
|
|
382
503
|
if (conflicts.length > 0) {
|
|
383
504
|
logger.error(
|
|
384
|
-
`${conflicts.length} path collision(s) — nothing written for these items:`,
|
|
505
|
+
`${conflicts.length} target-path collision(s) — nothing written for these items:`,
|
|
385
506
|
);
|
|
386
507
|
for (const c of conflicts) {
|
|
387
508
|
console.log(
|
|
@@ -389,24 +510,34 @@ export function registerContextCommand(program: Command) {
|
|
|
389
510
|
);
|
|
390
511
|
}
|
|
391
512
|
logger.dim(
|
|
392
|
-
"Re-run with --
|
|
513
|
+
"The suggested path is already in use by a different source. Re-run with --prefix to place these items elsewhere, or delete the existing item first.",
|
|
393
514
|
);
|
|
394
515
|
}
|
|
395
516
|
|
|
517
|
+
// Merge Phase 0 skips into the skip list used by the final summary.
|
|
518
|
+
skipped.push(...dedupSkipped);
|
|
519
|
+
|
|
396
520
|
// Phase 3: Chunk + embed in parallel (network I/O)
|
|
397
521
|
if (itemIds.length === 0 || !config.openai_api_key) {
|
|
398
522
|
if (!config.openai_api_key) {
|
|
399
523
|
logger.dim("Skipping embeddings (no OpenAI API key configured).");
|
|
400
524
|
}
|
|
401
|
-
const msg =
|
|
525
|
+
const msg = buildSummary({
|
|
526
|
+
added: itemIds.length,
|
|
527
|
+
refreshed: refreshedCount,
|
|
528
|
+
skipped: skipped.length,
|
|
529
|
+
chunks: refreshedChunks,
|
|
530
|
+
totalCount,
|
|
531
|
+
handled: itemIds.length + refreshedCount + skipped.length,
|
|
532
|
+
});
|
|
402
533
|
if (conflicts.length > 0) {
|
|
403
534
|
logger.error(msg);
|
|
404
535
|
process.exit(1);
|
|
405
536
|
}
|
|
406
|
-
if (itemIds.length
|
|
537
|
+
if (itemIds.length + skipped.length + refreshedCount >= totalCount) {
|
|
407
538
|
logger.success(msg);
|
|
408
539
|
process.exit(0);
|
|
409
|
-
} else if (itemIds.length === 0) {
|
|
540
|
+
} else if (itemIds.length === 0 && refreshedCount === 0) {
|
|
410
541
|
logger.error(msg);
|
|
411
542
|
process.exit(1);
|
|
412
543
|
} else {
|
|
@@ -452,15 +583,20 @@ export function registerContextCommand(program: Command) {
|
|
|
452
583
|
else filesAdded++;
|
|
453
584
|
}
|
|
454
585
|
|
|
455
|
-
const
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
586
|
+
const summary = buildSummary({
|
|
587
|
+
added: filesAdded,
|
|
588
|
+
updated: filesUpdated,
|
|
589
|
+
refreshed: refreshedCount,
|
|
590
|
+
skipped: skipped.length,
|
|
591
|
+
chunks: chunks + refreshedChunks,
|
|
592
|
+
totalCount,
|
|
593
|
+
handled: itemIds.length + refreshedCount + skipped.length,
|
|
594
|
+
});
|
|
459
595
|
if (conflicts.length > 0) {
|
|
460
596
|
logger.error(summary);
|
|
461
597
|
process.exit(1);
|
|
462
598
|
}
|
|
463
|
-
if (itemIds.length
|
|
599
|
+
if (itemIds.length + skipped.length + refreshedCount >= totalCount) {
|
|
464
600
|
logger.success(summary);
|
|
465
601
|
process.exit(0);
|
|
466
602
|
} else {
|
|
@@ -675,6 +811,26 @@ async function resolveItems(
|
|
|
675
811
|
|
|
676
812
|
type ConflictPolicy = "error" | "overwrite" | "skip";
|
|
677
813
|
|
|
814
|
+
/** Format the final "X added, Y refreshed, Z skipped — N chunks" line. */
|
|
815
|
+
function buildSummary(args: {
|
|
816
|
+
added: number;
|
|
817
|
+
updated?: number;
|
|
818
|
+
refreshed: number;
|
|
819
|
+
skipped: number;
|
|
820
|
+
chunks: number;
|
|
821
|
+
totalCount: number;
|
|
822
|
+
handled?: number;
|
|
823
|
+
}): string {
|
|
824
|
+
const parts: string[] = [];
|
|
825
|
+
if (args.added > 0) parts.push(`${args.added} added`);
|
|
826
|
+
if (args.updated && args.updated > 0) parts.push(`${args.updated} updated`);
|
|
827
|
+
if (args.refreshed > 0) parts.push(`${args.refreshed} refreshed`);
|
|
828
|
+
if (args.skipped > 0) parts.push(`${args.skipped} skipped`);
|
|
829
|
+
const body = parts.length > 0 ? parts.join(", ") : "0 added";
|
|
830
|
+
const handled = args.handled ?? args.added + args.refreshed + args.skipped;
|
|
831
|
+
return `${body} — ${args.chunks} chunk(s) indexed (${handled}/${args.totalCount} item(s)).`;
|
|
832
|
+
}
|
|
833
|
+
|
|
678
834
|
type AddFileResult =
|
|
679
835
|
| { kind: "added"; id: string; contextPath: string }
|
|
680
836
|
| { kind: "skipped"; contextPath: string }
|
package/src/db/context.ts
CHANGED
|
@@ -179,6 +179,19 @@ export async function getContextItemByPath(
|
|
|
179
179
|
return row ? rowToContextItem(row) : null;
|
|
180
180
|
}
|
|
181
181
|
|
|
182
|
+
export async function getContextItemBySourcePath(
|
|
183
|
+
db: DbConnection,
|
|
184
|
+
sourcePath: string,
|
|
185
|
+
sourceType: "file" | "url",
|
|
186
|
+
): Promise<ContextItem | null> {
|
|
187
|
+
const row = await db.queryGet<ContextItemRow>(
|
|
188
|
+
"SELECT * FROM context_items WHERE source_path = ?1 AND source_type = ?2 LIMIT 1",
|
|
189
|
+
sourcePath,
|
|
190
|
+
sourceType,
|
|
191
|
+
);
|
|
192
|
+
return row ? rowToContextItem(row) : null;
|
|
193
|
+
}
|
|
194
|
+
|
|
182
195
|
/**
|
|
183
196
|
* Look up a context item by UUID (if the value looks like one) or by context_path.
|
|
184
197
|
*/
|