botholomew 0.7.7 → 0.7.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "botholomew",
3
- "version": "0.7.7",
3
+ "version": "0.7.8",
4
4
  "description": "Local, autonomous AI agent for knowledge work — works your task queue while you sleep.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -6,7 +6,10 @@ import { isText } from "istextorbinary";
6
6
  import { createSpinner } from "nanospinner";
7
7
  import { loadConfig } from "../config/loader.ts";
8
8
  import type { BotholomewConfig } from "../config/schemas.ts";
9
- import { generateDescription } from "../context/describer.ts";
9
+ import {
10
+ generateDescription,
11
+ generateDescriptionAndPath,
12
+ } from "../context/describer.ts";
10
13
  import { embedSingle } from "../context/embedder.ts";
11
14
  import { FetchFailureError, fetchUrl } from "../context/fetcher.ts";
12
15
  import {
@@ -19,9 +22,12 @@ import { isUrl, urlToContextPath } from "../context/url-utils.ts";
19
22
  import type { DbConnection } from "../db/connection.ts";
20
23
  import {
21
24
  type ContextItem,
25
+ createContextItemStrict,
22
26
  deleteContextItemByPath,
27
+ getContextItemByPath,
23
28
  listContextItems,
24
29
  listContextItemsByPrefix,
30
+ PathConflictError,
25
31
  resolveContextItem,
26
32
  upsertContextItem,
27
33
  } from "../db/context.ts";
@@ -95,16 +101,41 @@ export function registerContextCommand(program: Command) {
95
101
  ctx
96
102
  .command("add <paths...>")
97
103
  .description("Add files, directories, or URLs to context")
98
- .option("--prefix <prefix>", "virtual path prefix", "/")
104
+ .option(
105
+ "--prefix <prefix>",
106
+ "virtual path prefix (if omitted, an LLM suggests a folder for each file)",
107
+ )
99
108
  .option("--name <path>", "custom context path (single URL only)")
109
+ .option(
110
+ "--on-conflict <policy>",
111
+ "collision policy: error | overwrite | skip",
112
+ "error",
113
+ )
114
+ .option(
115
+ "--auto-place",
116
+ "accept all LLM-suggested paths without confirmation",
117
+ )
100
118
  .option(
101
119
  "--prompt-addition <text>",
102
120
  "extra guidance for the URL fetcher agent (e.g., auth notes, tool hints)",
103
121
  )
104
122
  .action((paths: string[], opts) =>
105
123
  withDb(program, async (conn, dir) => {
124
+ type ConflictPolicy = "error" | "overwrite" | "skip";
125
+ const policy = opts.onConflict as ConflictPolicy;
126
+ if (!["error", "overwrite", "skip"].includes(policy)) {
127
+ logger.error(
128
+ `Invalid --on-conflict value: ${policy} (must be error, overwrite, or skip)`,
129
+ );
130
+ process.exit(1);
131
+ }
132
+
106
133
  // Phase 1: Scan all paths — separate URLs from local files
107
- const filesToAdd: { filePath: string; contextPath: string }[] = [];
134
+ type FileToAdd = {
135
+ filePath: string;
136
+ contextPath: string | null; // null = defer to LLM placement
137
+ };
138
+ const filesToAdd: FileToAdd[] = [];
108
139
  const urlsToAdd: { url: string; contextPath: string }[] = [];
109
140
  const spinner = createSpinner("Scanning paths...").start();
110
141
 
@@ -116,10 +147,14 @@ export function registerContextCommand(program: Command) {
116
147
  process.exit(1);
117
148
  }
118
149
 
150
+ // Explicit placement: user passed --prefix (or --name for URLs).
151
+ // Implicit placement: LLM decides per-file.
152
+ const explicitPlacement = typeof opts.prefix === "string";
153
+ const urlPrefix = opts.prefix ?? "/";
154
+
119
155
  for (const path of paths) {
120
156
  if (isUrl(path)) {
121
- const contextPath =
122
- opts.name || urlToContextPath(path, opts.prefix);
157
+ const contextPath = opts.name || urlToContextPath(path, urlPrefix);
123
158
  urlsToAdd.push({ url: path, contextPath });
124
159
  } else {
125
160
  const resolvedPath = resolve(path);
@@ -137,13 +172,17 @@ export function registerContextCommand(program: Command) {
137
172
  const relativePath = filePath.slice(resolvedPath.length);
138
173
  filesToAdd.push({
139
174
  filePath,
140
- contextPath: join(opts.prefix, relativePath),
175
+ contextPath: explicitPlacement
176
+ ? join(opts.prefix, relativePath)
177
+ : null,
141
178
  });
142
179
  }
143
180
  } else {
144
181
  filesToAdd.push({
145
182
  filePath: resolvedPath,
146
- contextPath: join(opts.prefix, basename(resolvedPath)),
183
+ contextPath: explicitPlacement
184
+ ? join(opts.prefix, basename(resolvedPath))
185
+ : null,
147
186
  });
148
187
  }
149
188
  }
@@ -154,11 +193,78 @@ export function registerContextCommand(program: Command) {
154
193
  text: `Found ${totalCount} item(s) to add (${filesToAdd.length} file(s), ${urlsToAdd.length} URL(s)).`,
155
194
  });
156
195
 
157
- // Phase 2: Load config and upsert DB records (batched, parallel LLM descriptions)
196
+ // Phase 1.5: LLM placement for files without an explicit path
158
197
  const config = await loadConfig(dir);
159
198
  const CONCURRENCY = 10;
199
+ const needsPlacement = filesToAdd.filter((f) => f.contextPath === null);
200
+ // description cache keyed by filePath — populated when LLM placement runs,
201
+ // reused in addFile to avoid a second describe call.
202
+ const descriptionCache = new Map<string, string>();
203
+
204
+ if (needsPlacement.length > 0) {
205
+ if (!config.anthropic_api_key) {
206
+ logger.error(
207
+ "No anthropic_api_key configured — cannot auto-place files. Pass --prefix to specify a folder.",
208
+ );
209
+ process.exit(1);
210
+ }
211
+
212
+ const existingTree = await renderExistingTree(conn);
213
+ const placeSpinner = createSpinner(
214
+ `Choosing paths for 0/${needsPlacement.length} file(s)...`,
215
+ ).start();
216
+ let placed = 0;
217
+
218
+ for (let i = 0; i < needsPlacement.length; i += CONCURRENCY) {
219
+ const batch = needsPlacement.slice(i, i + CONCURRENCY);
220
+ await Promise.all(
221
+ batch.map(async (entry) => {
222
+ const suggestion = await suggestPathForFile(
223
+ entry.filePath,
224
+ config,
225
+ existingTree,
226
+ );
227
+ entry.contextPath =
228
+ suggestion?.suggested_path ?? `/${basename(entry.filePath)}`;
229
+ if (suggestion?.description) {
230
+ descriptionCache.set(entry.filePath, suggestion.description);
231
+ }
232
+ placed++;
233
+ placeSpinner.update({
234
+ text: `Choosing paths for ${placed}/${needsPlacement.length} file(s)...`,
235
+ });
236
+ }),
237
+ );
238
+ }
239
+ placeSpinner.success({
240
+ text: `Chose paths for ${placed} file(s).`,
241
+ });
242
+
243
+ // Confirm in TTY unless --auto-place
244
+ const isTTY = Boolean(process.stdin.isTTY && process.stdout.isTTY);
245
+ if (isTTY && !opts.autoPlace) {
246
+ console.log("");
247
+ console.log(ansis.bold("Suggested paths:"));
248
+ for (const entry of needsPlacement) {
249
+ console.log(
250
+ ` ${ansis.dim(entry.filePath)} → ${ansis.cyan(entry.contextPath ?? "")}`,
251
+ );
252
+ }
253
+ const accepted = await confirmYesNo("Accept these paths? (Y/n): ");
254
+ if (!accepted) {
255
+ logger.warn(
256
+ "Aborted. Re-run with --prefix to place files manually, or --auto-place to skip this prompt.",
257
+ );
258
+ process.exit(1);
259
+ }
260
+ }
261
+ }
262
+
263
+ // Phase 2: Upsert DB records (batched, parallel LLM descriptions)
160
264
  let addCompleted = 0;
161
265
  const itemIds: { id: string; contextPath: string }[] = [];
266
+ const conflicts: { contextPath: string; existingId: string }[] = [];
267
+ const skipped: string[] = [];
162
268
 
163
269
  // Process local files (with spinner — these are quick, no chatty logs)
164
270
  if (filesToAdd.length > 0) {
@@ -170,21 +276,34 @@ export function registerContextCommand(program: Command) {
170
276
  const batch = filesToAdd.slice(i, i + CONCURRENCY);
171
277
  const results = await Promise.all(
172
278
  batch.map(async ({ filePath, contextPath }) => {
279
+ if (contextPath === null) return null; // unreachable — placement filled it
173
280
  const result = await addFile(
174
281
  conn,
175
282
  filePath,
176
283
  contextPath,
177
284
  config,
285
+ policy,
286
+ descriptionCache.get(filePath),
178
287
  );
179
288
  addCompleted++;
180
289
  fileSpinner.update({
181
290
  text: `Adding and describing ${addCompleted}/${filesToAdd.length} file(s)...`,
182
291
  });
183
- return result ? { id: result, contextPath } : null;
292
+ return result;
184
293
  }),
185
294
  );
186
295
  for (const r of results) {
187
- if (r) itemIds.push(r);
296
+ if (!r) continue;
297
+ if (r.kind === "added") {
298
+ itemIds.push({ id: r.id, contextPath: r.contextPath });
299
+ } else if (r.kind === "conflict") {
300
+ conflicts.push({
301
+ contextPath: r.contextPath,
302
+ existingId: r.existingId,
303
+ });
304
+ } else if (r.kind === "skipped") {
305
+ skipped.push(r.contextPath);
306
+ }
188
307
  }
189
308
  }
190
309
 
@@ -216,11 +335,25 @@ export function registerContextCommand(program: Command) {
216
335
  contextPath,
217
336
  mcpxClient,
218
337
  opts.promptAddition,
338
+ policy,
219
339
  );
220
340
  if (result.ok) {
221
341
  urlAdded++;
222
342
  itemIds.push({ id: result.id, contextPath });
223
343
  console.log(` ${ansis.green("✔")} stored at ${contextPath}`);
344
+ } else if (result.kind === "conflict") {
345
+ conflicts.push({
346
+ contextPath,
347
+ existingId: result.existingId,
348
+ });
349
+ console.log(
350
+ ` ${ansis.red("✗")} path already exists: ${contextPath}`,
351
+ );
352
+ } else if (result.kind === "skipped") {
353
+ skipped.push(contextPath);
354
+ console.log(
355
+ ` ${ansis.yellow("⊘")} skipped (path exists): ${contextPath}`,
356
+ );
224
357
  } else if (result.actionable) {
225
358
  console.log(
226
359
  ` ${ansis.red("✗")} ${ansis.bold("action required:")}`,
@@ -245,13 +378,32 @@ export function registerContextCommand(program: Command) {
245
378
  }
246
379
  }
247
380
 
381
+ // Report conflicts before embeddings so the user sees them prominently
382
+ if (conflicts.length > 0) {
383
+ logger.error(
384
+ `${conflicts.length} path collision(s) — nothing written for these items:`,
385
+ );
386
+ for (const c of conflicts) {
387
+ console.log(
388
+ ` ${ansis.red("✗")} ${c.contextPath} (existing id: ${c.existingId})`,
389
+ );
390
+ }
391
+ logger.dim(
392
+ "Re-run with --on-conflict=overwrite to replace, --on-conflict=skip to ignore, or --name / --prefix to place elsewhere.",
393
+ );
394
+ }
395
+
248
396
  // Phase 3: Chunk + embed in parallel (network I/O)
249
397
  if (itemIds.length === 0 || !config.openai_api_key) {
250
398
  if (!config.openai_api_key) {
251
399
  logger.dim("Skipping embeddings (no OpenAI API key configured).");
252
400
  }
253
401
  const msg = `Added ${itemIds.length}/${totalCount} item(s), 0 chunks indexed.`;
254
- if (itemIds.length === totalCount) {
402
+ if (conflicts.length > 0) {
403
+ logger.error(msg);
404
+ process.exit(1);
405
+ }
406
+ if (itemIds.length === totalCount - skipped.length) {
255
407
  logger.success(msg);
256
408
  process.exit(0);
257
409
  } else if (itemIds.length === 0) {
@@ -304,7 +456,11 @@ export function registerContextCommand(program: Command) {
304
456
  if (filesAdded > 0) parts.push(`${filesAdded} added`);
305
457
  if (filesUpdated > 0) parts.push(`${filesUpdated} updated`);
306
458
  const summary = `${parts.join(", ")} — ${chunks} chunk(s) indexed (${itemIds.length}/${totalCount} item(s)).`;
307
- if (itemIds.length === totalCount) {
459
+ if (conflicts.length > 0) {
460
+ logger.error(summary);
461
+ process.exit(1);
462
+ }
463
+ if (itemIds.length === totalCount - skipped.length) {
308
464
  logger.success(summary);
309
465
  process.exit(0);
310
466
  } else {
@@ -517,28 +673,56 @@ async function resolveItems(
517
673
  return listContextItemsByPrefix(conn, p, { recursive: true });
518
674
  }
519
675
 
520
- /** Upsert a file into context. Returns the item ID if textual, null otherwise. */
676
+ type ConflictPolicy = "error" | "overwrite" | "skip";
677
+
678
+ type AddFileResult =
679
+ | { kind: "added"; id: string; contextPath: string }
680
+ | { kind: "skipped"; contextPath: string }
681
+ | { kind: "conflict"; contextPath: string; existingId: string }
682
+ | { kind: "failed"; contextPath: string; error: string };
683
+
684
+ /** Upsert a file into context honoring the collision policy. */
521
685
  async function addFile(
522
686
  conn: DbConnection,
523
687
  filePath: string,
524
688
  contextPath: string,
525
689
  config: Required<BotholomewConfig>,
526
- ): Promise<string | null> {
690
+ policy: ConflictPolicy,
691
+ cachedDescription?: string,
692
+ ): Promise<AddFileResult | null> {
527
693
  try {
694
+ // Pre-flight conflict check so we don't waste a describe call.
695
+ if (policy !== "overwrite") {
696
+ const existing = await getContextItemByPath(conn, contextPath);
697
+ if (existing) {
698
+ if (policy === "skip") {
699
+ logger.dim(` ⊘ skipped (path exists): ${contextPath}`);
700
+ return { kind: "skipped", contextPath };
701
+ }
702
+ return {
703
+ kind: "conflict",
704
+ contextPath,
705
+ existingId: existing.id,
706
+ };
707
+ }
708
+ }
709
+
528
710
  const bunFile = Bun.file(filePath);
529
711
  const mimeType = bunFile.type.split(";")[0] || "application/octet-stream";
530
712
  const filename = basename(filePath);
531
713
  const textual = isText(filename) !== false;
532
714
  const content = textual ? await bunFile.text() : null;
533
715
 
534
- const description = await generateDescription(config, {
535
- filename,
536
- mimeType,
537
- content,
538
- filePath,
539
- });
716
+ const description =
717
+ cachedDescription ??
718
+ (await generateDescription(config, {
719
+ filename,
720
+ mimeType,
721
+ content,
722
+ filePath,
723
+ }));
540
724
 
541
- const item = await upsertContextItem(conn, {
725
+ const itemParams = {
542
726
  title: filename,
543
727
  description,
544
728
  content: content ?? undefined,
@@ -546,19 +730,36 @@ async function addFile(
546
730
  sourcePath: filePath,
547
731
  contextPath,
548
732
  isTextual: textual,
549
- });
733
+ } as const;
734
+
735
+ const item =
736
+ policy === "overwrite"
737
+ ? await upsertContextItem(conn, itemParams)
738
+ : await createContextItemStrict(conn, itemParams);
550
739
 
551
- return textual && content ? item.id : null;
740
+ return textual && content
741
+ ? { kind: "added", id: item.id, contextPath: item.context_path }
742
+ : null;
552
743
  } catch (err) {
744
+ if (err instanceof PathConflictError) {
745
+ // Race between pre-flight check and insert — still a conflict.
746
+ return {
747
+ kind: "conflict",
748
+ contextPath,
749
+ existingId: err.existingId,
750
+ };
751
+ }
553
752
  logger.warn(` ! ${contextPath}: ${err}`);
554
- return null;
753
+ return { kind: "failed", contextPath, error: String(err) };
555
754
  }
556
755
  }
557
756
 
558
- /** Fetch a URL and upsert into context. Returns the item ID, or null on failure. */
757
+ /** Fetch a URL and upsert into context. */
559
758
  type AddUrlResult =
560
759
  | { ok: true; id: string }
561
- | { ok: false; error: string; actionable: boolean };
760
+ | { ok: false; kind: "conflict"; existingId: string }
761
+ | { ok: false; kind: "skipped" }
762
+ | { ok: false; kind: "fetch-failed"; error: string; actionable: boolean };
562
763
 
563
764
  async function addUrl(
564
765
  conn: DbConnection,
@@ -566,8 +767,18 @@ async function addUrl(
566
767
  url: string,
567
768
  contextPath: string,
568
769
  mcpxClient: Awaited<ReturnType<typeof createMcpxClient>>,
569
- promptAddition?: string,
770
+ promptAddition: string | undefined,
771
+ policy: ConflictPolicy,
570
772
  ): Promise<AddUrlResult> {
773
+ // Pre-flight conflict check — skip the expensive fetch if we'd collide.
774
+ if (policy !== "overwrite") {
775
+ const existing = await getContextItemByPath(conn, contextPath);
776
+ if (existing) {
777
+ if (policy === "skip") return { ok: false, kind: "skipped" };
778
+ return { ok: false, kind: "conflict", existingId: existing.id };
779
+ }
780
+ }
781
+
571
782
  try {
572
783
  const fetched = await fetchUrl(url, config, mcpxClient, promptAddition);
573
784
 
@@ -577,24 +788,115 @@ async function addUrl(
577
788
  content: fetched.content,
578
789
  });
579
790
 
580
- const item = await upsertContextItem(conn, {
791
+ const itemParams = {
581
792
  title: fetched.title,
582
793
  description,
583
794
  content: fetched.content,
584
795
  mimeType: fetched.mimeType,
585
- sourceType: "url",
796
+ sourceType: "url" as const,
586
797
  sourcePath: url,
587
798
  contextPath,
588
799
  isTextual: true,
589
- });
800
+ };
801
+
802
+ const item =
803
+ policy === "overwrite"
804
+ ? await upsertContextItem(conn, itemParams)
805
+ : await createContextItemStrict(conn, itemParams);
590
806
 
591
807
  return { ok: true, id: item.id };
592
808
  } catch (err) {
809
+ if (err instanceof PathConflictError) {
810
+ return { ok: false, kind: "conflict", existingId: err.existingId };
811
+ }
593
812
  if (err instanceof FetchFailureError) {
594
- return { ok: false, error: err.userMessage, actionable: true };
813
+ return {
814
+ ok: false,
815
+ kind: "fetch-failed",
816
+ error: err.userMessage,
817
+ actionable: true,
818
+ };
819
+ }
820
+ return {
821
+ ok: false,
822
+ kind: "fetch-failed",
823
+ error: String(err),
824
+ actionable: false,
825
+ };
826
+ }
827
+ }
828
+
829
+ /**
830
+ * Build a listing of every existing path (folders + files) to feed the LLM
831
+ * placer. Seeing actual files in each folder helps the LLM place new content
832
+ * alongside similar documents instead of inventing parallel folder names.
833
+ */
834
+ async function renderExistingTree(conn: DbConnection): Promise<string> {
835
+ const items = await listContextItems(conn);
836
+ if (items.length === 0) return "";
837
+
838
+ // Every implicit ancestor folder of every item.
839
+ const folders = new Set<string>();
840
+ for (const item of items) {
841
+ const parts = item.context_path.split("/").filter(Boolean);
842
+ const isExplicitDir = item.mime_type === "inode/directory";
843
+ const folderDepth = isExplicitDir ? parts.length : parts.length - 1;
844
+ for (let i = 1; i <= folderDepth; i++) {
845
+ folders.add(`/${parts.slice(0, i).join("/")}/`);
595
846
  }
596
- return { ok: false, error: String(err), actionable: false };
597
847
  }
848
+
849
+ const files = items
850
+ .filter((i) => i.mime_type !== "inode/directory")
851
+ .map((i) => i.context_path);
852
+
853
+ const all = [...folders, ...files].sort();
854
+ const cap = 500;
855
+ const truncated = all.slice(0, cap);
856
+ const suffix =
857
+ all.length > cap ? `\n (+${all.length - cap} more entries)` : "";
858
+ return truncated.map((p) => ` ${p}`).join("\n") + suffix;
859
+ }
860
+
861
+ /** Call the describer LLM to suggest a path + description for a file. */
862
+ async function suggestPathForFile(
863
+ filePath: string,
864
+ config: Required<BotholomewConfig>,
865
+ existingTree: string,
866
+ ): Promise<{ description: string; suggested_path: string } | null> {
867
+ try {
868
+ const bunFile = Bun.file(filePath);
869
+ const mimeType = bunFile.type.split(";")[0] || "application/octet-stream";
870
+ const filename = basename(filePath);
871
+ const textual = isText(filename) !== false;
872
+ const content = textual ? await bunFile.text() : null;
873
+ return await generateDescriptionAndPath(config, {
874
+ filename,
875
+ mimeType,
876
+ content,
877
+ filePath,
878
+ sourcePath: filePath,
879
+ existingTree,
880
+ });
881
+ } catch {
882
+ return null;
883
+ }
884
+ }
885
+
886
+ /** Minimal stdin-based yes/no prompt, defaults to yes (empty input accepts). */
887
+ async function confirmYesNo(prompt: string): Promise<boolean> {
888
+ process.stdout.write(prompt);
889
+ return new Promise((resolvePromise) => {
890
+ const onData = (chunk: Buffer) => {
891
+ const line = chunk.toString().trim().toLowerCase();
892
+ process.stdin.off("data", onData);
893
+ process.stdin.pause();
894
+ // Empty input (just Enter) or y/yes → accept; only n/no rejects.
895
+ resolvePromise(line !== "n" && line !== "no");
896
+ };
897
+ process.stdin.resume();
898
+ process.stdin.once("data", onData);
899
+ });
598
900
  }
599
901
 
600
902
  async function walkDirectory(dirPath: string): Promise<string[]> {
@@ -3,6 +3,8 @@ import type { BotholomewConfig } from "../config/schemas.ts";
3
3
  import { logger } from "../utils/logger.ts";
4
4
 
5
5
  const DESCRIBE_TOOL_NAME = "return_description";
6
+ const DESCRIBE_AND_PLACE_TOOL_NAME = "return_description_and_path";
7
+
6
8
  const DESCRIBE_TOOL = {
7
9
  name: DESCRIBE_TOOL_NAME,
8
10
  description: "Return a one-sentence description of this content.",
@@ -19,6 +21,28 @@ const DESCRIBE_TOOL = {
19
21
  },
20
22
  };
21
23
 
24
+ const DESCRIBE_AND_PLACE_TOOL = {
25
+ name: DESCRIBE_AND_PLACE_TOOL_NAME,
26
+ description:
27
+ "Return a one-sentence description AND a suggested absolute folder path for this file.",
28
+ input_schema: {
29
+ type: "object" as const,
30
+ properties: {
31
+ description: {
32
+ type: "string",
33
+ description:
34
+ "A concise one-sentence summary of what this content is about.",
35
+ },
36
+ suggested_path: {
37
+ type: "string",
38
+ description:
39
+ "Absolute virtual-filesystem path (starts with /) where this file should live, including the filename. Prefer existing folders. Include a project/source disambiguator (e.g. /projects/<source-dir>/README.md) when the basename is likely to collide.",
40
+ },
41
+ },
42
+ required: ["description", "suggested_path"],
43
+ },
44
+ };
45
+
22
46
  const TIMEOUT_MS = 10_000;
23
47
  const MAX_CONTENT_CHARS = 8000;
24
48
  const MAX_FILE_BYTES = 10 * 1024 * 1024; // 10 MB
@@ -38,8 +62,27 @@ type ImageMediaType = "image/jpeg" | "image/png" | "image/gif" | "image/webp";
38
62
  */
39
63
  async function buildMessageContent(
40
64
  opts: DescriberOpts,
65
+ includePlacement: boolean,
41
66
  ): Promise<Anthropic.Messages.ContentBlockParam[]> {
42
- const textPrompt = `Describe this file in one sentence. Be specific about what it contains, not generic.\n\nFilename: ${opts.filename}\nMIME type: ${opts.mimeType}`;
67
+ const placementBlock = includePlacement
68
+ ? [
69
+ "",
70
+ "Also suggest an absolute folder path where this file should live in the virtual filesystem. Rules:",
71
+ "- Start with /",
72
+ "- Keep the basename close to the source filename",
73
+ "- STRONGLY prefer folders that already exist below — reuse them unless the new file is clearly unrelated to everything there. Do NOT invent a new folder that is a near-synonym of an existing one.",
74
+ "- Use at most 3 nested folders unless an existing folder already goes deeper",
75
+ "- If the basename is common (README.md, index.md, notes.md), include a project/source disambiguator from the source path",
76
+ opts.existingTree
77
+ ? `\nExisting filesystem (folders end with /, files are listed under the folders they live in so you can see what kinds of documents are already there):\n${opts.existingTree}`
78
+ : "\nExisting filesystem: (empty — you are placing the first file)",
79
+ opts.sourcePath ? `\nSource filesystem path: ${opts.sourcePath}` : "",
80
+ ]
81
+ .filter((s) => s.length > 0)
82
+ .join("\n")
83
+ : "";
84
+
85
+ const textPrompt = `Describe this file in one sentence. Be specific about what it contains, not generic.\n\nFilename: ${opts.filename}\nMIME type: ${opts.mimeType}${placementBlock ? `\n${placementBlock}` : ""}`;
43
86
 
44
87
  // Text file — include content inline
45
88
  if (opts.content) {
@@ -98,6 +141,20 @@ interface DescriberOpts {
98
141
  mimeType: string;
99
142
  content: string | null;
100
143
  filePath?: string;
144
+ sourcePath?: string;
145
+ existingTree?: string;
146
+ }
147
+
148
+ /** Normalize and validate an LLM-suggested path. Returns null if invalid. */
149
+ export function sanitizeSuggestedPath(raw: string): string | null {
150
+ const trimmed = raw.trim();
151
+ if (!trimmed) return null;
152
+ if (!trimmed.startsWith("/")) return null;
153
+ if (trimmed.includes("..")) return null;
154
+ // Collapse repeated slashes, strip trailing slash (unless root).
155
+ const collapsed = trimmed.replace(/\/+/g, "/");
156
+ if (collapsed === "/") return null; // needs a filename
157
+ return collapsed.endsWith("/") ? collapsed.slice(0, -1) : collapsed;
101
158
  }
102
159
 
103
160
  /**
@@ -116,7 +173,7 @@ export async function generateDescription(
116
173
  const client = new Anthropic({ apiKey: config.anthropic_api_key });
117
174
 
118
175
  try {
119
- const content = await buildMessageContent(opts);
176
+ const content = await buildMessageContent(opts, false);
120
177
 
121
178
  const response = await Promise.race([
122
179
  client.messages.create({
@@ -144,3 +201,55 @@ export async function generateDescription(
144
201
  return "";
145
202
  }
146
203
  }
204
+
205
+ /**
206
+ * Generate description + suggested_path in a single LLM call.
207
+ * Returns { description, suggested_path } on success, or null on failure.
208
+ */
209
+ export async function generateDescriptionAndPath(
210
+ config: Required<BotholomewConfig>,
211
+ opts: DescriberOpts,
212
+ ): Promise<{ description: string; suggested_path: string } | null> {
213
+ if (!config.anthropic_api_key) return null;
214
+
215
+ const client = new Anthropic({ apiKey: config.anthropic_api_key });
216
+
217
+ try {
218
+ const content = await buildMessageContent(opts, true);
219
+
220
+ const response = await Promise.race([
221
+ client.messages.create({
222
+ model: config.chunker_model,
223
+ max_tokens: 512,
224
+ tools: [DESCRIBE_AND_PLACE_TOOL],
225
+ tool_choice: { type: "tool", name: DESCRIBE_AND_PLACE_TOOL_NAME },
226
+ messages: [{ role: "user", content }],
227
+ }),
228
+ new Promise<never>((_, reject) =>
229
+ setTimeout(
230
+ () => reject(new Error("Description+path generation timeout")),
231
+ TIMEOUT_MS,
232
+ ),
233
+ ),
234
+ ]);
235
+
236
+ const toolBlock = response.content.find((b) => b.type === "tool_use");
237
+ if (!toolBlock || toolBlock.type !== "tool_use") return null;
238
+
239
+ const input = toolBlock.input as {
240
+ description?: string;
241
+ suggested_path?: string;
242
+ };
243
+ const suggested = input.suggested_path
244
+ ? sanitizeSuggestedPath(input.suggested_path)
245
+ : null;
246
+ if (!suggested) return null;
247
+ return {
248
+ description: input.description || "",
249
+ suggested_path: suggested,
250
+ };
251
+ } catch (err) {
252
+ logger.debug(`Description+path generation failed: ${err}`);
253
+ return null;
254
+ }
255
+ }
@@ -86,13 +86,13 @@ export async function tick(
86
86
  callbacks,
87
87
  });
88
88
 
89
- // Update task status and store output
89
+ const isComplete = result.status === "complete";
90
90
  await updateTaskStatus(
91
91
  conn,
92
92
  task.id,
93
93
  result.status,
94
- result.reason,
95
- result.reason,
94
+ isComplete ? null : result.reason,
95
+ isComplete ? result.reason : null,
96
96
  );
97
97
 
98
98
  // Log the status change
@@ -112,7 +112,7 @@ export async function tick(
112
112
  `Task: ${task.name}\nDescription: ${task.description}\nOutcome: ${result.status}${result.reason ? ` — ${result.reason}` : ""}`,
113
113
  );
114
114
  } catch (err) {
115
- await updateTaskStatus(conn, task.id, "failed", String(err), String(err));
115
+ await updateTaskStatus(conn, task.id, "failed", String(err), null);
116
116
 
117
117
  await logInteraction(conn, threadId, {
118
118
  role: "system",
package/src/db/context.ts CHANGED
@@ -56,6 +56,17 @@ function rowToContextItem(row: ContextItemRow): ContextItem {
56
56
  };
57
57
  }
58
58
 
59
+ export class PathConflictError extends Error {
60
+ existingId: string;
61
+ contextPath: string;
62
+ constructor(existingId: string, contextPath: string) {
63
+ super(`context_path already exists: ${contextPath}`);
64
+ this.name = "PathConflictError";
65
+ this.existingId = existingId;
66
+ this.contextPath = contextPath;
67
+ }
68
+ }
69
+
59
70
  // --- Basic CRUD ---
60
71
 
61
72
  export async function createContextItem(
@@ -124,6 +135,28 @@ export async function upsertContextItem(
124
135
  return createContextItem(db, params);
125
136
  }
126
137
 
138
+ /**
139
+ * Strict creator: throws PathConflictError if context_path already exists.
140
+ * Use when callers want to surface collisions instead of silently overwriting.
141
+ */
142
+ export async function createContextItemStrict(
143
+ db: DbConnection,
144
+ params: {
145
+ title: string;
146
+ content?: string;
147
+ mimeType?: string;
148
+ sourceType?: "file" | "url";
149
+ sourcePath?: string;
150
+ contextPath: string;
151
+ description?: string;
152
+ isTextual?: boolean;
153
+ },
154
+ ): Promise<ContextItem> {
155
+ const existing = await getContextItemByPath(db, params.contextPath);
156
+ if (existing) throw new PathConflictError(existing.id, params.contextPath);
157
+ return createContextItem(db, params);
158
+ }
159
+
127
160
  export async function getContextItem(
128
161
  db: DbConnection,
129
162
  id: string,
package/src/db/tasks.ts CHANGED
@@ -132,8 +132,8 @@ export async function updateTaskStatus(
132
132
  db: DbConnection,
133
133
  id: string,
134
134
  status: Task["status"],
135
- reason?: string,
136
- output?: string,
135
+ reason?: string | null,
136
+ output?: string | null,
137
137
  ): Promise<void> {
138
138
  await db.queryRun(
139
139
  `UPDATE tasks
@@ -1,7 +1,11 @@
1
1
  import { isText } from "istextorbinary";
2
2
  import { z } from "zod";
3
3
  import { ingestByPath } from "../../context/ingest.ts";
4
- import { upsertContextItem } from "../../db/context.ts";
4
+ import {
5
+ createContextItemStrict,
6
+ PathConflictError,
7
+ upsertContextItem,
8
+ } from "../../db/context.ts";
5
9
  import type { ToolDefinition } from "../tool.ts";
6
10
 
7
11
  function mimeFromPath(path: string): string {
@@ -30,18 +34,27 @@ const inputSchema = z.object({
30
34
  .optional()
31
35
  .describe("Title for the file (defaults to filename)"),
32
36
  description: z.string().optional().describe("Description of the file"),
37
+ on_conflict: z
38
+ .enum(["error", "overwrite"])
39
+ .optional()
40
+ .describe(
41
+ "What to do if a file already exists at this path. Defaults to 'error'. Pass 'overwrite' to replace.",
42
+ ),
33
43
  });
34
44
 
35
45
  const outputSchema = z.object({
36
- id: z.string(),
46
+ id: z.string().nullable(),
37
47
  path: z.string(),
38
48
  is_error: z.boolean(),
49
+ error_type: z.string().optional(),
50
+ message: z.string().optional(),
51
+ next_action_hint: z.string().optional(),
39
52
  });
40
53
 
41
54
  export const contextWriteTool = {
42
55
  name: "context_write",
43
56
  description:
44
- "Write content to a context item. Creates the item if it doesn't exist, or overwrites if it does.",
57
+ "Write content to a context item. By default, fails if the path already exists pass on_conflict='overwrite' to replace.",
45
58
  group: "context",
46
59
  inputSchema,
47
60
  outputSchema,
@@ -50,17 +63,43 @@ export const contextWriteTool = {
50
63
  const isTextual = isTextualPath(input.path);
51
64
  const title =
52
65
  input.title ?? input.path.split("/").filter(Boolean).pop() ?? input.path;
66
+ const onConflict = input.on_conflict ?? "error";
53
67
 
54
- const item = await upsertContextItem(ctx.conn, {
55
- title,
56
- description: input.description,
57
- content: input.content_base64 ?? input.content,
58
- contextPath: input.path,
59
- mimeType,
60
- isTextual,
61
- });
68
+ try {
69
+ const item =
70
+ onConflict === "overwrite"
71
+ ? await upsertContextItem(ctx.conn, {
72
+ title,
73
+ description: input.description,
74
+ content: input.content_base64 ?? input.content,
75
+ contextPath: input.path,
76
+ mimeType,
77
+ isTextual,
78
+ })
79
+ : await createContextItemStrict(ctx.conn, {
80
+ title,
81
+ description: input.description,
82
+ content: input.content_base64 ?? input.content,
83
+ contextPath: input.path,
84
+ mimeType,
85
+ isTextual,
86
+ });
62
87
 
63
- await ingestByPath(ctx.conn, input.path, ctx.config);
64
- return { id: item.id, path: item.context_path, is_error: false };
88
+ await ingestByPath(ctx.conn, input.path, ctx.config);
89
+ return { id: item.id, path: item.context_path, is_error: false };
90
+ } catch (err) {
91
+ if (err instanceof PathConflictError) {
92
+ return {
93
+ id: null,
94
+ path: input.path,
95
+ is_error: true,
96
+ error_type: "path_conflict",
97
+ message: `A file already exists at ${input.path} (id: ${err.existingId}).`,
98
+ next_action_hint:
99
+ "Call context_read to inspect the existing file, or retry with on_conflict='overwrite' to replace it.",
100
+ };
101
+ }
102
+ throw err;
103
+ }
65
104
  },
66
105
  } satisfies ToolDefinition<typeof inputSchema, typeof outputSchema>;