botholomew 0.7.9 → 0.7.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "botholomew",
3
- "version": "0.7.9",
3
+ "version": "0.7.10",
4
4
  "description": "Local, autonomous AI agent for knowledge work — works your task queue while you sleep.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -25,6 +25,7 @@ import {
25
25
  createContextItemStrict,
26
26
  deleteContextItemByPath,
27
27
  getContextItemByPath,
28
+ getContextItemBySourcePath,
28
29
  listContextItems,
29
30
  listContextItemsByPrefix,
30
31
  PathConflictError,
@@ -193,9 +194,126 @@ export function registerContextCommand(program: Command) {
193
194
  text: `Found ${totalCount} item(s) to add (${filesToAdd.length} file(s), ${urlsToAdd.length} URL(s)).`,
194
195
  });
195
196
 
196
- // Phase 1.5: LLM placement for files without an explicit path
197
197
  const config = await loadConfig(dir);
198
198
  const CONCURRENCY = 10;
199
+
200
+ // Phase 0: Source-path dedup — items whose source_path is already in
201
+ // context are routed per --on-conflict before we pay for LLM placement.
202
+ type AlreadyInContext = {
203
+ sourcePath: string;
204
+ sourceType: "file" | "url";
205
+ existing: ContextItem;
206
+ };
207
+ const alreadyInContext: AlreadyInContext[] = [];
208
+ const remainingFiles: FileToAdd[] = [];
209
+ const remainingUrls: { url: string; contextPath: string }[] = [];
210
+
211
+ for (const f of filesToAdd) {
212
+ const existing = await getContextItemBySourcePath(
213
+ conn,
214
+ f.filePath,
215
+ "file",
216
+ );
217
+ if (existing) {
218
+ alreadyInContext.push({
219
+ sourcePath: f.filePath,
220
+ sourceType: "file",
221
+ existing,
222
+ });
223
+ } else {
224
+ remainingFiles.push(f);
225
+ }
226
+ }
227
+ for (const u of urlsToAdd) {
228
+ const existing = await getContextItemBySourcePath(conn, u.url, "url");
229
+ if (existing) {
230
+ alreadyInContext.push({
231
+ sourcePath: u.url,
232
+ sourceType: "url",
233
+ existing,
234
+ });
235
+ } else {
236
+ remainingUrls.push(u);
237
+ }
238
+ }
239
+
240
+ let refreshedCount = 0;
241
+ let refreshedChunks = 0;
242
+ const dedupSkipped: string[] = [];
243
+
244
+ if (alreadyInContext.length > 0) {
245
+ if (policy === "error") {
246
+ logger.error(
247
+ `${alreadyInContext.length} item(s) already in context (matched by source path):`,
248
+ );
249
+ for (const a of alreadyInContext) {
250
+ console.log(
251
+ ` ${ansis.red("✗")} ${a.sourcePath} → ${a.existing.context_path} (id: ${a.existing.id})`,
252
+ );
253
+ }
254
+ logger.dim(
255
+ "Re-run with --on-conflict=skip to ignore these items or --on-conflict=overwrite to refresh them from disk.",
256
+ );
257
+ process.exit(1);
258
+ }
259
+
260
+ if (policy === "skip") {
261
+ for (const a of alreadyInContext) {
262
+ logger.dim(
263
+ `⊘ already in context: ${a.sourcePath} → ${a.existing.context_path}`,
264
+ );
265
+ dedupSkipped.push(a.existing.context_path);
266
+ }
267
+ } else {
268
+ // overwrite: refresh existing items (diff + selective re-embed),
269
+ // preserving their original context_path.
270
+ const itemsToRefresh = alreadyInContext.map((a) => a.existing);
271
+ const hasUrls = itemsToRefresh.some((i) => i.source_type === "url");
272
+ const mcpxClient = hasUrls ? await createMcpxClient(dir) : null;
273
+
274
+ const refreshSpinner = createSpinner(
275
+ `Refreshing 0/${itemsToRefresh.length} existing item(s)...`,
276
+ ).start();
277
+ const refreshResult = await refreshContextItems(
278
+ conn,
279
+ itemsToRefresh,
280
+ config,
281
+ mcpxClient,
282
+ {
283
+ onItemProgress: (done, total) => {
284
+ refreshSpinner.update({
285
+ text: `Refreshing ${done}/${total} existing item(s)...`,
286
+ });
287
+ },
288
+ },
289
+ );
290
+ refreshSpinner.success({
291
+ text: `Refreshed ${refreshResult.checked} existing item(s): ${refreshResult.updated} updated, ${refreshResult.unchanged} unchanged, ${refreshResult.missing} missing.`,
292
+ });
293
+
294
+ // Count everything we processed OK (updated + unchanged) as
295
+ // "refreshed" for the summary. Missing/error items are reported
296
+ // inline below and don't count toward success.
297
+ refreshedCount = refreshResult.updated + refreshResult.unchanged;
298
+ refreshedChunks = refreshResult.chunks;
299
+ for (const item of refreshResult.items) {
300
+ if (item.status === "missing") {
301
+ logger.warn(` Missing: ${item.source_path}`);
302
+ } else if (item.status === "error") {
303
+ logger.warn(
304
+ ` Error refreshing ${item.source_path}: ${item.error}`,
305
+ );
306
+ }
307
+ }
308
+ }
309
+ }
310
+
311
+ // Drop already-handled items from the work lists so downstream phases
312
+ // (LLM placement, description, insert, embed) see only truly-new items.
313
+ filesToAdd.splice(0, filesToAdd.length, ...remainingFiles);
314
+ urlsToAdd.splice(0, urlsToAdd.length, ...remainingUrls);
315
+
316
+ // Phase 1.5: LLM placement for files without an explicit path
199
317
  const needsPlacement = filesToAdd.filter((f) => f.contextPath === null);
200
318
  // description cache keyed by filePath — populated when LLM placement runs,
201
319
  // reused in addFile to avoid a second describe call.
@@ -378,10 +496,13 @@ export function registerContextCommand(program: Command) {
378
496
  }
379
497
  }
380
498
 
381
- // Report conflicts before embeddings so the user sees them prominently
499
+ // Report conflicts before embeddings so the user sees them prominently.
500
+ // Phase 0 already handled source-path matches, so anything here is a
501
+ // target-path collision — an LLM-suggested (or explicit) path that
502
+ // another unrelated item already occupies.
382
503
  if (conflicts.length > 0) {
383
504
  logger.error(
384
- `${conflicts.length} path collision(s) — nothing written for these items:`,
505
+ `${conflicts.length} target-path collision(s) — nothing written for these items:`,
385
506
  );
386
507
  for (const c of conflicts) {
387
508
  console.log(
@@ -389,24 +510,34 @@ export function registerContextCommand(program: Command) {
389
510
  );
390
511
  }
391
512
  logger.dim(
392
- "Re-run with --on-conflict=overwrite to replace, --on-conflict=skip to ignore, or --name / --prefix to place elsewhere.",
513
+ "The suggested path is already in use by a different source. Re-run with --prefix to place these items elsewhere, or delete the existing item first.",
393
514
  );
394
515
  }
395
516
 
517
+ // Merge Phase 0 skips into the skip list used by the final summary.
518
+ skipped.push(...dedupSkipped);
519
+
396
520
  // Phase 3: Chunk + embed in parallel (network I/O)
397
521
  if (itemIds.length === 0 || !config.openai_api_key) {
398
522
  if (!config.openai_api_key) {
399
523
  logger.dim("Skipping embeddings (no OpenAI API key configured).");
400
524
  }
401
- const msg = `Added ${itemIds.length}/${totalCount} item(s), 0 chunks indexed.`;
525
+ const msg = buildSummary({
526
+ added: itemIds.length,
527
+ refreshed: refreshedCount,
528
+ skipped: skipped.length,
529
+ chunks: refreshedChunks,
530
+ totalCount,
531
+ handled: itemIds.length + refreshedCount + skipped.length,
532
+ });
402
533
  if (conflicts.length > 0) {
403
534
  logger.error(msg);
404
535
  process.exit(1);
405
536
  }
406
- if (itemIds.length === totalCount - skipped.length) {
537
+ if (itemIds.length + skipped.length + refreshedCount >= totalCount) {
407
538
  logger.success(msg);
408
539
  process.exit(0);
409
- } else if (itemIds.length === 0) {
540
+ } else if (itemIds.length === 0 && refreshedCount === 0) {
410
541
  logger.error(msg);
411
542
  process.exit(1);
412
543
  } else {
@@ -452,15 +583,20 @@ export function registerContextCommand(program: Command) {
452
583
  else filesAdded++;
453
584
  }
454
585
 
455
- const parts: string[] = [];
456
- if (filesAdded > 0) parts.push(`${filesAdded} added`);
457
- if (filesUpdated > 0) parts.push(`${filesUpdated} updated`);
458
- const summary = `${parts.join(", ")} — ${chunks} chunk(s) indexed (${itemIds.length}/${totalCount} item(s)).`;
586
+ const summary = buildSummary({
587
+ added: filesAdded,
588
+ updated: filesUpdated,
589
+ refreshed: refreshedCount,
590
+ skipped: skipped.length,
591
+ chunks: chunks + refreshedChunks,
592
+ totalCount,
593
+ handled: itemIds.length + refreshedCount + skipped.length,
594
+ });
459
595
  if (conflicts.length > 0) {
460
596
  logger.error(summary);
461
597
  process.exit(1);
462
598
  }
463
- if (itemIds.length === totalCount - skipped.length) {
599
+ if (itemIds.length + skipped.length + refreshedCount >= totalCount) {
464
600
  logger.success(summary);
465
601
  process.exit(0);
466
602
  } else {
@@ -675,6 +811,26 @@ async function resolveItems(
675
811
 
676
812
  type ConflictPolicy = "error" | "overwrite" | "skip";
677
813
 
814
+ /** Format the final "X added, Y refreshed, Z skipped — N chunks" line. */
815
+ function buildSummary(args: {
816
+ added: number;
817
+ updated?: number;
818
+ refreshed: number;
819
+ skipped: number;
820
+ chunks: number;
821
+ totalCount: number;
822
+ handled?: number;
823
+ }): string {
824
+ const parts: string[] = [];
825
+ if (args.added > 0) parts.push(`${args.added} added`);
826
+ if (args.updated && args.updated > 0) parts.push(`${args.updated} updated`);
827
+ if (args.refreshed > 0) parts.push(`${args.refreshed} refreshed`);
828
+ if (args.skipped > 0) parts.push(`${args.skipped} skipped`);
829
+ const body = parts.length > 0 ? parts.join(", ") : "0 added";
830
+ const handled = args.handled ?? args.added + args.refreshed + args.skipped;
831
+ return `${body} — ${args.chunks} chunk(s) indexed (${handled}/${args.totalCount} item(s)).`;
832
+ }
833
+
678
834
  type AddFileResult =
679
835
  | { kind: "added"; id: string; contextPath: string }
680
836
  | { kind: "skipped"; contextPath: string }
package/src/db/context.ts CHANGED
@@ -179,6 +179,19 @@ export async function getContextItemByPath(
179
179
  return row ? rowToContextItem(row) : null;
180
180
  }
181
181
 
182
+ export async function getContextItemBySourcePath(
183
+ db: DbConnection,
184
+ sourcePath: string,
185
+ sourceType: "file" | "url",
186
+ ): Promise<ContextItem | null> {
187
+ const row = await db.queryGet<ContextItemRow>(
188
+ "SELECT * FROM context_items WHERE source_path = ?1 AND source_type = ?2 LIMIT 1",
189
+ sourcePath,
190
+ sourceType,
191
+ );
192
+ return row ? rowToContextItem(row) : null;
193
+ }
194
+
182
195
  /**
183
196
  * Look up a context item by UUID (if the value looks like one) or by context_path.
184
197
  */