botholomew 0.8.9 → 0.9.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,6 +3,10 @@ import type { Command } from "commander";
3
3
  import { z } from "zod";
4
4
  import { loadConfig } from "../config/loader.ts";
5
5
  import { getDbPath } from "../constants.ts";
6
+ import { parseDriveRef } from "../context/drives.ts";
7
+ import type { DbConnection } from "../db/connection.ts";
8
+ import { getContextItemById } from "../db/context.ts";
9
+ import { isUuid } from "../db/uuid.ts";
6
10
  import { registerAllTools } from "../tools/registry.ts";
7
11
  import {
8
12
  type AnyToolDefinition,
@@ -43,7 +47,7 @@ export function registerSearchToolSubcommands(parent: Command) {
43
47
  }
44
48
  }
45
49
 
46
- /** Derive CLI subcommand name from tool name: "context_read" → "read", "context_list_dir" → "list-dir" */
50
+ /** Derive CLI subcommand name from tool name: "context_read" → "read", "context_create_dir" → "create-dir" */
47
51
  function deriveSubName(toolName: string): string {
48
52
  return toolName.replace(/^[^_]+_/, "").replace(/_/g, "-");
49
53
  }
@@ -64,7 +68,7 @@ function registerToolAsCLI(parent: Command, tool: AnyToolDefinition) {
64
68
  for (const [key, schema] of Object.entries(shape)) {
65
69
  const desc = schema.description ?? key;
66
70
  const isOptional = schema.isOptional();
67
- const unwrapped = unwrapOptional(schema);
71
+ const unwrapped = unwrapSchema(schema);
68
72
 
69
73
  if (isPositionalArg(key, tool.name)) {
70
74
  positionals.push(isOptional ? `[${key}]` : `<${key}>`);
@@ -109,7 +113,14 @@ function registerToolAsCLI(parent: Command, tool: AnyToolDefinition) {
109
113
  while (root.parent) root = root.parent;
110
114
  return withDb(root, async (conn, dir) => {
111
115
  try {
112
- const input = buildInput(tool, positionals, options, shape, args);
116
+ const input = await buildInput(
117
+ tool,
118
+ positionals,
119
+ options,
120
+ shape,
121
+ args,
122
+ conn,
123
+ );
113
124
 
114
125
  const ctx: ToolContext = {
115
126
  conn,
@@ -129,7 +140,7 @@ function registerToolAsCLI(parent: Command, tool: AnyToolDefinition) {
129
140
  });
130
141
  }
131
142
 
132
- function buildInput(
143
+ async function buildInput(
133
144
  tool: AnyToolDefinition,
134
145
  positionals: string[],
135
146
  options: {
@@ -140,14 +151,36 @@ function buildInput(
140
151
  }[],
141
152
  shape: Record<string, z.ZodType>,
142
153
  args: unknown[],
143
- ): Record<string, unknown> {
154
+ conn: DbConnection,
155
+ ): Promise<Record<string, unknown>> {
144
156
  const input: Record<string, unknown> = {};
145
157
 
146
- // Positional args come first in Commander's action callback
158
+ // Positional args come first in Commander's action callback. Context tools
159
+ // carry `(drive, path)` or `(src_drive, src_path, …)` in their schema but
160
+ // accept a friendlier `drive:/path` or bare-UUID form as a single positional
161
+ // on the CLI.
147
162
  for (let i = 0; i < positionals.length; i++) {
148
163
  const key = positionals[i]?.replace(/[<>[\]]/g, "");
149
164
  const value = args[i];
150
- if (key !== undefined && value !== undefined) input[key] = value;
165
+ if (key === undefined || value === undefined) continue;
166
+ const splitTargets = driveRefSplitTargets(key, shape);
167
+ if (splitTargets && typeof value === "string") {
168
+ const parsed = parseDriveRef(value);
169
+ if (parsed) {
170
+ input[splitTargets.drive] = parsed.drive;
171
+ input[splitTargets.path] = parsed.path;
172
+ continue;
173
+ }
174
+ if (isUuid(value)) {
175
+ const item = await getContextItemById(conn, value);
176
+ if (item) {
177
+ input[splitTargets.drive] = item.drive;
178
+ input[splitTargets.path] = item.path;
179
+ continue;
180
+ }
181
+ }
182
+ }
183
+ input[key] = value;
151
184
  }
152
185
 
153
186
  // Options object is the last argument before the Command object
@@ -163,7 +196,7 @@ function buildInput(
163
196
 
164
197
  const schemaForKey = shape[opt.key];
165
198
  if (!schemaForKey) continue;
166
- const unwrapped = unwrapOptional(schemaForKey);
199
+ const unwrapped = unwrapSchema(schemaForKey);
167
200
 
168
201
  // Parse JSON for array types
169
202
  if (opt.isArray && typeof value === "string") {
@@ -192,6 +225,19 @@ function formatOutput(result: unknown, _toolName: string) {
192
225
  if (typeof result === "object") {
193
226
  const obj = result as Record<string, unknown>;
194
227
 
228
+ // Structured error shape: { is_error: true, message, next_action_hint? }
229
+ if (obj.is_error === true) {
230
+ const msg = typeof obj.message === "string" ? obj.message : "Error";
231
+ logger.error(msg);
232
+ if (
233
+ typeof obj.next_action_hint === "string" &&
234
+ obj.next_action_hint.length > 0
235
+ ) {
236
+ console.log(ansis.dim(obj.next_action_hint));
237
+ }
238
+ process.exit(1);
239
+ }
240
+
195
241
  // Special formatting for known output shapes
196
242
  if ("tree" in obj && typeof obj.tree === "string") {
197
243
  console.log(obj.tree);
@@ -217,6 +263,26 @@ function formatOutput(result: unknown, _toolName: string) {
217
263
  return;
218
264
  }
219
265
 
266
+ if ("drives" in obj && Array.isArray(obj.drives)) {
267
+ const drives = obj.drives as { drive: string; count: number }[];
268
+ if (drives.length === 0) {
269
+ if (typeof obj.hint === "string") console.log(ansis.dim(obj.hint));
270
+ return;
271
+ }
272
+ const widest = Math.max(...drives.map((d) => d.drive.length));
273
+ for (const d of drives) {
274
+ const label = `${d.drive}:/`.padEnd(widest + 2);
275
+ const plural = d.count === 1 ? "item" : "items";
276
+ console.log(
277
+ ` ${ansis.cyan(label)} ${ansis.dim(`(${d.count} ${plural})`)}`,
278
+ );
279
+ }
280
+ if (typeof obj.hint === "string") {
281
+ console.log(`\n${ansis.dim(obj.hint)}`);
282
+ }
283
+ return;
284
+ }
285
+
220
286
  if ("matches" in obj && Array.isArray(obj.matches)) {
221
287
  for (const match of obj.matches) {
222
288
  if (typeof match === "string") {
@@ -263,7 +329,6 @@ function isPositionalArg(key: string, toolName: string): boolean {
263
329
  // These keys are treated as positional arguments
264
330
  const positionalKeys: Record<string, string[]> = {
265
331
  context_create_dir: ["path"],
266
- context_list_dir: ["path"],
267
332
  context_tree: ["path"],
268
333
  context_dir_size: ["path"],
269
334
  context_read: ["path"],
@@ -282,9 +347,33 @@ function isPositionalArg(key: string, toolName: string): boolean {
282
347
  return positionalKeys[toolName]?.includes(key) ?? false;
283
348
  }
284
349
 
285
- function unwrapOptional(schema: z.ZodType): z.ZodType {
350
+ function unwrapSchema(schema: z.ZodType): z.ZodType {
286
351
  if (schema instanceof z.ZodOptional) {
287
- return schema.unwrap() as z.ZodType;
352
+ return unwrapSchema(schema.unwrap() as z.ZodType);
353
+ }
354
+ if (schema instanceof z.ZodDefault) {
355
+ return unwrapSchema(schema.unwrap() as z.ZodType);
288
356
  }
289
357
  return schema;
290
358
  }
359
+
360
+ /**
361
+ * Decide how to expand a positional `path`/`src`/`dst` value into the tool's
362
+ * schema when it carries a `drive:/path` prefix. Returns the drive+path field
363
+ * names in the schema, or null if the schema has no matching drive field.
364
+ */
365
+ function driveRefSplitTargets(
366
+ positionalKey: string,
367
+ shape: Record<string, z.ZodType>,
368
+ ): { drive: string; path: string } | null {
369
+ if (positionalKey === "path" && "drive" in shape && "path" in shape) {
370
+ return { drive: "drive", path: "path" };
371
+ }
372
+ if (positionalKey === "src" && "src_drive" in shape && "src_path" in shape) {
373
+ return { drive: "src_drive", path: "src_path" };
374
+ }
375
+ if (positionalKey === "dst" && "dst_drive" in shape && "dst_path" in shape) {
376
+ return { drive: "dst_drive", path: "dst_path" };
377
+ }
378
+ return null;
379
+ }
@@ -3,7 +3,6 @@ import type { BotholomewConfig } from "../config/schemas.ts";
3
3
  import { logger } from "../utils/logger.ts";
4
4
 
5
5
  const DESCRIBE_TOOL_NAME = "return_description";
6
- const DESCRIBE_AND_PLACE_TOOL_NAME = "return_description_and_path";
7
6
 
8
7
  const DESCRIBE_TOOL = {
9
8
  name: DESCRIBE_TOOL_NAME,
@@ -21,28 +20,6 @@ const DESCRIBE_TOOL = {
21
20
  },
22
21
  };
23
22
 
24
- const DESCRIBE_AND_PLACE_TOOL = {
25
- name: DESCRIBE_AND_PLACE_TOOL_NAME,
26
- description:
27
- "Return a one-sentence description AND a suggested absolute folder path for this file.",
28
- input_schema: {
29
- type: "object" as const,
30
- properties: {
31
- description: {
32
- type: "string",
33
- description:
34
- "A concise one-sentence summary of what this content is about.",
35
- },
36
- suggested_path: {
37
- type: "string",
38
- description:
39
- "Absolute virtual-filesystem path (starts with /) where this file should live, including the filename. Prefer existing folders. Include a project/source disambiguator (e.g. /projects/<source-dir>/README.md) when the basename is likely to collide.",
40
- },
41
- },
42
- required: ["description", "suggested_path"],
43
- },
44
- };
45
-
46
23
  const TIMEOUT_MS = 10_000;
47
24
  const MAX_CONTENT_CHARS = 8000;
48
25
  const MAX_FILE_BYTES = 10 * 1024 * 1024; // 10 MB
@@ -56,35 +33,11 @@ const IMAGE_TYPES = new Set([
56
33
 
57
34
  type ImageMediaType = "image/jpeg" | "image/png" | "image/gif" | "image/webp";
58
35
 
59
- /**
60
- * Build the message content array for the LLM description request.
61
- * Attaches the file as an image or document block when possible.
62
- */
63
36
  async function buildMessageContent(
64
37
  opts: DescriberOpts,
65
- includePlacement: boolean,
66
38
  ): Promise<Anthropic.Messages.ContentBlockParam[]> {
67
- const placementBlock = includePlacement
68
- ? [
69
- "",
70
- "Also suggest an absolute folder path where this file should live in the virtual filesystem. Rules:",
71
- "- Start with /",
72
- "- Keep the basename close to the source filename",
73
- "- STRONGLY prefer folders that already exist below — reuse them unless the new file is clearly unrelated to everything there. Do NOT invent a new folder that is a near-synonym of an existing one.",
74
- "- Use at most 3 nested folders unless an existing folder already goes deeper",
75
- "- If the basename is common (README.md, index.md, notes.md), include a project/source disambiguator from the source path",
76
- opts.existingTree
77
- ? `\nExisting filesystem (folders end with /, files are listed under the folders they live in so you can see what kinds of documents are already there):\n${opts.existingTree}`
78
- : "\nExisting filesystem: (empty — you are placing the first file)",
79
- opts.sourcePath ? `\nSource filesystem path: ${opts.sourcePath}` : "",
80
- ]
81
- .filter((s) => s.length > 0)
82
- .join("\n")
83
- : "";
84
-
85
- const textPrompt = `Describe this file in one sentence. Be specific about what it contains, not generic.\n\nFilename: ${opts.filename}\nMIME type: ${opts.mimeType}${placementBlock ? `\n${placementBlock}` : ""}`;
86
-
87
- // Text file — include content inline
39
+ const textPrompt = `Describe this file in one sentence. Be specific about what it contains, not generic.\n\nFilename: ${opts.filename}\nMIME type: ${opts.mimeType}`;
40
+
88
41
  if (opts.content) {
89
42
  const truncated =
90
43
  opts.content.length > MAX_CONTENT_CHARS
@@ -93,7 +46,6 @@ async function buildMessageContent(
93
46
  return [{ type: "text", text: `${textPrompt}\n\nContent:\n${truncated}` }];
94
47
  }
95
48
 
96
- // Binary file — try to attach if we have a file path
97
49
  if (opts.filePath) {
98
50
  const file = Bun.file(opts.filePath);
99
51
  const size = file.size;
@@ -127,7 +79,6 @@ async function buildMessageContent(
127
79
  }
128
80
  }
129
81
 
130
- // Fallback — describe from filename and MIME type only
131
82
  return [
132
83
  {
133
84
  type: "text",
@@ -141,20 +92,6 @@ interface DescriberOpts {
141
92
  mimeType: string;
142
93
  content: string | null;
143
94
  filePath?: string;
144
- sourcePath?: string;
145
- existingTree?: string;
146
- }
147
-
148
- /** Normalize and validate an LLM-suggested path. Returns null if invalid. */
149
- export function sanitizeSuggestedPath(raw: string): string | null {
150
- const trimmed = raw.trim();
151
- if (!trimmed) return null;
152
- if (!trimmed.startsWith("/")) return null;
153
- if (trimmed.includes("..")) return null;
154
- // Collapse repeated slashes, strip trailing slash (unless root).
155
- const collapsed = trimmed.replace(/\/+/g, "/");
156
- if (collapsed === "/") return null; // needs a filename
157
- return collapsed.endsWith("/") ? collapsed.slice(0, -1) : collapsed;
158
95
  }
159
96
 
160
97
  /**
@@ -173,7 +110,7 @@ export async function generateDescription(
173
110
  const client = new Anthropic({ apiKey: config.anthropic_api_key });
174
111
 
175
112
  try {
176
- const content = await buildMessageContent(opts, false);
113
+ const content = await buildMessageContent(opts);
177
114
 
178
115
  const response = await Promise.race([
179
116
  client.messages.create({
@@ -201,55 +138,3 @@ export async function generateDescription(
201
138
  return "";
202
139
  }
203
140
  }
204
-
205
- /**
206
- * Generate description + suggested_path in a single LLM call.
207
- * Returns { description, suggested_path } on success, or null on failure.
208
- */
209
- export async function generateDescriptionAndPath(
210
- config: Required<BotholomewConfig>,
211
- opts: DescriberOpts,
212
- ): Promise<{ description: string; suggested_path: string } | null> {
213
- if (!config.anthropic_api_key) return null;
214
-
215
- const client = new Anthropic({ apiKey: config.anthropic_api_key });
216
-
217
- try {
218
- const content = await buildMessageContent(opts, true);
219
-
220
- const response = await Promise.race([
221
- client.messages.create({
222
- model: config.chunker_model,
223
- max_tokens: 512,
224
- tools: [DESCRIBE_AND_PLACE_TOOL],
225
- tool_choice: { type: "tool", name: DESCRIBE_AND_PLACE_TOOL_NAME },
226
- messages: [{ role: "user", content }],
227
- }),
228
- new Promise<never>((_, reject) =>
229
- setTimeout(
230
- () => reject(new Error("Description+path generation timeout")),
231
- TIMEOUT_MS,
232
- ),
233
- ),
234
- ]);
235
-
236
- const toolBlock = response.content.find((b) => b.type === "tool_use");
237
- if (!toolBlock || toolBlock.type !== "tool_use") return null;
238
-
239
- const input = toolBlock.input as {
240
- description?: string;
241
- suggested_path?: string;
242
- };
243
- const suggested = input.suggested_path
244
- ? sanitizeSuggestedPath(input.suggested_path)
245
- : null;
246
- if (!suggested) return null;
247
- return {
248
- description: input.description || "",
249
- suggested_path: suggested,
250
- };
251
- } catch (err) {
252
- logger.debug(`Description+path generation failed: ${err}`);
253
- return null;
254
- }
255
- }
@@ -0,0 +1,110 @@
1
+ /**
2
+ * Drives name the origin of a context item. Every item lives at a
3
+ * `(drive, path)` pair; the `drive:/path` string form is a display and CLI
4
+ * convention (single column queries use the two columns directly).
5
+ *
6
+ * Built-in drives:
7
+ * disk — local filesystem; path is the absolute filesystem path
8
+ * url — generic HTTP(S) URL; path is the full URL
9
+ * agent — agent-authored scratch; path is whatever the agent chose
10
+ * google-docs — Google Docs; path is `/<docId>`
11
+ * github — GitHub content; path is `/<owner>/<repo>/<rest>`
12
+ */
13
+
14
+ export const BUILT_IN_DRIVES = [
15
+ "disk",
16
+ "url",
17
+ "agent",
18
+ "google-docs",
19
+ "github",
20
+ ] as const;
21
+
22
+ export interface DriveTarget {
23
+ drive: string;
24
+ path: string;
25
+ }
26
+
27
+ /** Parse `drive:/path` → `{ drive, path }`. Returns null if not in drive form. */
28
+ export function parseDriveRef(ref: string): DriveTarget | null {
29
+ const i = ref.indexOf(":");
30
+ if (i <= 0) return null;
31
+ const drive = ref.slice(0, i);
32
+ const path = ref.slice(i + 1);
33
+ if (!path.startsWith("/")) return null;
34
+ if (!/^[a-z][a-z0-9_-]*$/.test(drive)) return null;
35
+ return { drive, path };
36
+ }
37
+
38
+ /** Format a `(drive, path)` pair for display / CLI. */
39
+ export function formatDriveRef(target: DriveTarget): string {
40
+ return `${target.drive}:${target.path}`;
41
+ }
42
+
43
+ /**
44
+ * Detect the right drive for a URL. If `mcpxServerName` is provided, prefer it
45
+ * as a hint (some MCP servers are named after the service they back).
46
+ */
47
+ export function detectDriveFromUrl(
48
+ url: string,
49
+ mcpxServerName?: string | null,
50
+ ): DriveTarget {
51
+ const hint = mcpxServerName?.toLowerCase() ?? "";
52
+ let parsed: URL | null = null;
53
+ try {
54
+ parsed = new URL(url);
55
+ } catch {
56
+ return { drive: "url", path: `/${url}` };
57
+ }
58
+
59
+ const host = parsed.hostname.toLowerCase();
60
+
61
+ if (
62
+ host === "docs.google.com" ||
63
+ (hint.includes("google") && hint.includes("doc"))
64
+ ) {
65
+ const docId = extractGoogleDocId(parsed);
66
+ if (docId) return { drive: "google-docs", path: `/${docId}` };
67
+ }
68
+
69
+ if (
70
+ host === "github.com" ||
71
+ host === "raw.githubusercontent.com" ||
72
+ hint.includes("github")
73
+ ) {
74
+ const ghPath = extractGithubPath(parsed);
75
+ if (ghPath) return { drive: "github", path: ghPath };
76
+ }
77
+
78
+ return { drive: "url", path: `/${url}` };
79
+ }
80
+
81
+ function extractGoogleDocId(u: URL): string | null {
82
+ // https://docs.google.com/document/d/<docId>/edit
83
+ // https://docs.google.com/spreadsheets/d/<docId>/edit
84
+ const m = u.pathname.match(/\/d\/([^/]+)/);
85
+ return m?.[1] ?? null;
86
+ }
87
+
88
+ function extractGithubPath(u: URL): string | null {
89
+ // https://github.com/<owner>/<repo>/blob/<ref>/<path...>
90
+ // https://github.com/<owner>/<repo>/tree/<ref>/<path...>
91
+ // https://github.com/<owner>/<repo>
92
+ // https://raw.githubusercontent.com/<owner>/<repo>/<ref>/<path...>
93
+ const segs = u.pathname.split("/").filter(Boolean);
94
+ if (segs.length < 2) return null;
95
+ const [owner, repo, kind, _ref, ...rest] = segs;
96
+ if (!owner || !repo) return null;
97
+ if (u.hostname === "raw.githubusercontent.com") {
98
+ // segs: owner, repo, ref, ...rest
99
+ const [_o, _r, _f, ...raw] = segs;
100
+ return raw.length > 0
101
+ ? `/${owner}/${repo}/${raw.join("/")}`
102
+ : `/${owner}/${repo}`;
103
+ }
104
+ if (kind === "blob" || kind === "tree") {
105
+ return rest.length > 0
106
+ ? `/${owner}/${repo}/${rest.join("/")}`
107
+ : `/${owner}/${repo}`;
108
+ }
109
+ return `/${owner}/${repo}`;
110
+ }
@@ -15,6 +15,7 @@ import { mcpSearchTool } from "../tools/mcp/search.ts";
15
15
  import type { ToolContext } from "../tools/tool.ts";
16
16
  import { type AnyToolDefinition, toAnthropicTool } from "../tools/tool.ts";
17
17
  import { logger } from "../utils/logger.ts";
18
+ import { detectDriveFromUrl } from "./drives.ts";
18
19
  import { stripHtmlTags } from "./url-utils.ts";
19
20
 
20
21
  const MAX_CONTENT_BYTES = 500_000;
@@ -28,6 +29,8 @@ export interface FetchedContent {
28
29
  content: string;
29
30
  mimeType: string;
30
31
  sourceUrl: string;
32
+ drive: string;
33
+ path: string;
31
34
  }
32
35
 
33
36
  export class FetchFailureError extends Error {
@@ -176,7 +179,8 @@ async function runFetcherLoop(
176
179
 
177
180
  // Cache of full mcp_exec results keyed by tool_use_id.
178
181
  // The LLM only sees a truncated preview; on accept_content it references
179
- // the id and the harness saves the captured content.
182
+ // the id and the harness saves the captured content. `server` is retained so
183
+ // we can attribute the save to a specific MCP service when routing to a drive.
180
184
  const execResults = new Map<
181
185
  string,
182
186
  { server: string; tool: string; content: string; mimeType: string }
@@ -289,11 +293,14 @@ async function runFetcherLoop(
289
293
  logger.dim(
290
294
  ` turn ${turn + 1}: accept_content: "${input.title}" (${cached.content.length} chars, ${mimeType}, from ${cached.server}/${cached.tool})`,
291
295
  );
296
+ const { drive, path } = detectDriveFromUrl(url, cached.server);
292
297
  return {
293
298
  title: input.title,
294
299
  content: cached.content.slice(0, MAX_CONTENT_BYTES),
295
300
  mimeType,
296
301
  sourceUrl: url,
302
+ drive,
303
+ path,
297
304
  };
298
305
  }
299
306
 
@@ -428,10 +435,13 @@ export async function httpFallback(url: string): Promise<FetchedContent> {
428
435
  ? "text/markdown"
429
436
  : contentType.split(";")[0] || "text/plain";
430
437
 
438
+ const { drive, path } = detectDriveFromUrl(url);
431
439
  return {
432
440
  title,
433
441
  content: text,
434
442
  mimeType,
435
443
  sourceUrl: url,
444
+ drive,
445
+ path,
436
446
  };
437
447
  }
@@ -1,9 +1,10 @@
1
1
  import type { BotholomewConfig } from "../config/schemas.ts";
2
2
  import type { DbConnection } from "../db/connection.ts";
3
- import { getContextItem, getContextItemByPath } from "../db/context.ts";
3
+ import { getContextItem, getContextItemById } from "../db/context.ts";
4
4
  import { createEmbedding, deleteEmbeddingsForItem } from "../db/embeddings.ts";
5
5
  import { logger } from "../utils/logger.ts";
6
6
  import { chunk } from "./chunker.ts";
7
+ import { type DriveTarget, formatDriveRef } from "./drives.ts";
7
8
  import { embed as defaultEmbed } from "./embedder.ts";
8
9
 
9
10
  type IngestEmbedFn = (texts: string[]) => Promise<number[][]>;
@@ -12,7 +13,8 @@ export interface PreparedIngestion {
12
13
  itemId: string;
13
14
  title: string;
14
15
  description: string;
15
- sourcePath: string | null;
16
+ drive: string;
17
+ path: string;
16
18
  chunks: { index: number; content: string }[];
17
19
  vectors: number[][];
18
20
  }
@@ -27,7 +29,7 @@ export async function prepareIngestion(
27
29
  config: Required<BotholomewConfig>,
28
30
  embedFn?: IngestEmbedFn,
29
31
  ): Promise<PreparedIngestion | null> {
30
- const item = await getContextItem(conn, itemId);
32
+ const item = await getContextItemById(conn, itemId);
31
33
  if (!item) {
32
34
  logger.warn(`ingest: context item ${itemId} not found`);
33
35
  return null;
@@ -52,11 +54,12 @@ export async function prepareIngestion(
52
54
  const chunks = await chunk(item.content, item.mime_type, config);
53
55
  if (chunks.length === 0) return null;
54
56
 
57
+ const ref = formatDriveRef(item);
55
58
  const textsForEmbedding = chunks.map((c) => {
56
59
  const parts: string[] = [];
57
60
  if (item.title) parts.push(`Title: ${item.title}`);
58
61
  if (item.description) parts.push(`Description: ${item.description}`);
59
- if (item.source_path) parts.push(`Source: ${item.source_path}`);
62
+ parts.push(`Source: ${ref}`);
60
63
  parts.push(c.content);
61
64
  return parts.join("\n");
62
65
  });
@@ -66,7 +69,8 @@ export async function prepareIngestion(
66
69
  itemId,
67
70
  title: item.title,
68
71
  description: item.description,
69
- sourcePath: item.source_path,
72
+ drive: item.drive,
73
+ path: item.path,
70
74
  chunks,
71
75
  vectors,
72
76
  };
@@ -102,7 +106,6 @@ export async function storeIngestion(
102
106
  chunkContent: c.content,
103
107
  title: prepared.title,
104
108
  description: prepared.description,
105
- sourcePath: prepared.sourcePath,
106
109
  embedding: v,
107
110
  });
108
111
  }
@@ -144,17 +147,17 @@ export async function ingestContextItem(
144
147
  }
145
148
 
146
149
  /**
147
- * Ingest a context item by its virtual path.
150
+ * Ingest a context item by its (drive, path) pair.
148
151
  */
149
152
  export async function ingestByPath(
150
153
  conn: DbConnection,
151
- contextPath: string,
154
+ target: DriveTarget,
152
155
  config: Required<BotholomewConfig>,
153
156
  embedFn?: IngestEmbedFn,
154
157
  ): Promise<number> {
155
- const item = await getContextItemByPath(conn, contextPath);
158
+ const item = await getContextItem(conn, target);
156
159
  if (!item) {
157
- logger.warn(`ingest: no item at path ${contextPath}`);
160
+ logger.warn(`ingest: no item at ${formatDriveRef(target)}`);
158
161
  return 0;
159
162
  }
160
163
  return ingestContextItem(conn, item.id, config, embedFn);