npm - botholomew - Versions diffs - 0.11.6 → 0.12.0 - Mend

botholomew 0.11.6 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/package.json +1 -1
package/src/chat/agent.ts +3 -4
package/src/commands/context.ts +62 -28
package/src/commands/tools.ts +0 -12
package/src/tools/registry.ts +2 -4
package/src/tools/search/fuse.ts +117 -0
package/src/tools/search/index.ts +134 -0
package/src/tools/search/regexp.ts +70 -0
package/src/tools/search/semantic.ts +74 -62
package/src/worker/prompt.ts +2 -2
package/src/tools/search/grep.ts +0 -128

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "botholomew",
-  "version": "0.11.6",
+  "version": "0.12.0",
   "description": "An autonomous AI agent for knowledge work — works your task queue while you sleep.",
   "type": "module",
   "bin": {

package/src/chat/agent.ts CHANGED Viewed

@@ -42,8 +42,7 @@ const CHAT_TOOL_NAMES = new Set([
   "context_read",
   "context_write",
   "context_edit",
-  "search_grep",
-  "search_semantic",
+  "search",
   "list_threads",
   "view_thread",
   "create_schedule",
@@ -134,14 +133,14 @@ Format your responses using Markdown. Use headings, bold, italic, lists, and cod
 Workflow for any "look up / find / read" intent:
-1. \`search_semantic\` (semantic) or \`context_search\` (keyword), then \`context_read\` / \`context_tree\` to drill in.
+1. \`search\` (hybrid regexp + semantic) or \`context_search\` (keyword), then \`context_read\` / \`context_tree\` to drill in.
 2. If freshness matters, call \`context_info\` and check \`indexed_at\`. To re-pull a single stale item, use \`context_refresh\` rather than going to MCP for the whole document.
 3. Only call \`mcp_exec\` for reads when the data is genuinely missing locally **or** must be real-time (e.g., "what's on my calendar right now").
 Writes always go through MCP — sending an email, creating an issue, posting to Slack. Don't search context first for those.
 Examples:
-- "What does doc X say?" → \`search_semantic\` first.
+- "What does doc X say?" → \`search\` first.
 - "Any new emails from Y?" → check the \`gmail\` drive first; only hit Gmail MCP if the freshest indexed item is too old for the question.
 - "Send an email to Y" → MCP write directly; no context lookup.

package/src/commands/context.ts CHANGED Viewed

@@ -14,7 +14,6 @@ import {
   formatDriveRef,
   parseDriveRef,
 } from "../context/drives.ts";
-import { embedSingle } from "../context/embedder.ts";
 import { FetchFailureError, fetchUrl } from "../context/fetcher.ts";
 import {
   type PreparedIngestion,
@@ -36,14 +35,13 @@ import {
   resolveContextItem,
   upsertContextItem,
 } from "../db/context.ts";
-import { getEmbeddingsForItem, hybridSearch } from "../db/embeddings.ts";
+import { getEmbeddingsForItem } from "../db/embeddings.ts";
 import { reembedMissingVectors } from "../db/reembed.ts";
 import { createMcpxClient } from "../mcpx/client.ts";
+import { searchTool } from "../tools/search/index.ts";
+import type { ToolContext } from "../tools/tool.ts";
 import { logger } from "../utils/logger.ts";
-import {
-  registerContextToolSubcommands,
-  registerSearchToolSubcommands,
-} from "./tools.ts";
+import { registerContextToolSubcommands } from "./tools.ts";
 import { withDb } from "./with-db.ts";
 function fmtDate(d: Date): string {
@@ -513,46 +511,82 @@ export function registerContextCommand(program: Command) {
   const search = ctx
     .command("search")
-    .description("Search context entries")
-    .argument("[query]", "search query (hybrid keyword + semantic)")
-    .option("-k, --top-k <n>", "max results", Number.parseInt, 10)
+    .description("Search context entries (hybrid regexp + semantic)")
+    .argument(
+      "[query]",
+      "natural-language query (semantic + BM25). Combine with --pattern for fused regexp + semantic ranking.",
+    )
+    .option("-k, --top-k <n>", "max results", Number.parseInt, 20)
+    .option(
+      "--pattern <regex>",
+      "regex pattern (regexp side). May be combined with [query] to fuse signals.",
+    )
+    .option("--drive <drive>", "restrict to a single drive")
+    .option("--path <path>", "directory prefix within drive (requires --drive)")
+    .option("--glob <glob>", "filter results to files whose basename matches")
+    .option("--ignore-case", "case-insensitive regex")
+    .option(
+      "--context <n>",
+      "context lines around each regexp hit",
+      Number.parseInt,
+    )
     .action((query, opts) =>
       withDb(program, async (conn, dir) => {
-        if (!query) {
+        if (!query && !opts.pattern) {
           search.help();
           return;
         }
         const config = await loadConfig(dir);
-        const queryVec = await embedSingle(query, config);
-        const results = await hybridSearch(conn, query, queryVec, opts.topK);
+        const toolCtx: ToolContext = {
+          conn,
+          dbPath: getDbPath(dir),
+          projectDir: dir,
+          config,
+          mcpxClient: null,
+        };
+        const result = await searchTool.execute(
+          {
+            query,
+            pattern: opts.pattern,
+            drive: opts.drive,
+            path: opts.path,
+            glob: opts.glob,
+            ignore_case: opts.ignoreCase,
+            context: opts.context,
+            max_results: opts.topK,
+          },
+          toolCtx,
+        );
-        if (results.length === 0) {
+        if (result.is_error) {
+          logger.error(result.message ?? "Search failed");
+          process.exit(1);
+        }
+        if (result.matches.length === 0) {
           logger.dim("No results found.");
           return;
         }
-        for (const [i, r] of results.entries()) {
-          const score = (r.score * 100).toFixed(1);
-          console.log(
-            `${ansis.bold(`${i + 1}.`)} ${ansis.cyan(r.title)} ${ansis.dim(`(${score}%)`)}`,
-          );
-          const ref =
-            r.drive && r.path
-              ? formatDriveRef({ drive: r.drive, path: r.path })
-              : r.context_item_id;
+        for (const [i, m] of result.matches.entries()) {
+          const tagColor =
+            m.match_type === "both"
+              ? ansis.green
+              : m.match_type === "regexp"
+                ? ansis.yellow
+                : ansis.cyan;
+          const tag = tagColor(`[${m.match_type}]`);
+          const location = m.line != null ? `${m.ref}:${m.line}` : m.ref;
           console.log(
-            `   ${ansis.dim(ref)}  ${ansis.dim(fmtDate(r.created_at))}`,
+            `${ansis.bold(`${i + 1}.`)} ${tag} ${ansis.cyan(location)}  ${ansis.dim(`score=${m.score.toFixed(4)}`)}`,
           );
-          if (r.chunk_content) {
-            const snippet = r.chunk_content.slice(0, 120).replace(/\n/g, " ");
-            console.log(`   ${snippet}...`);
-          }
+          const snippet = m.content.slice(0, 200).replace(/\n/g, " ");
+          if (snippet) console.log(`   ${snippet}`);
           console.log("");
         }
       }),
     );
-  registerSearchToolSubcommands(search);
   ctx
     .command("delete <ref>")
     .description("Delete a context entry (UUID or drive:/path)")

package/src/commands/tools.ts CHANGED Viewed

@@ -37,16 +37,6 @@ export function registerContextToolSubcommands(parent: Command) {
   }
 }
-/**
- * Register search tool subcommands (grep, semantic) onto an
- * existing Commander command (e.g. the "context search" group).
- */
-export function registerSearchToolSubcommands(parent: Command) {
-  for (const tool of getToolsByGroup("search")) {
-    registerToolAsCLI(parent, tool);
-  }
-}
 /** Derive CLI subcommand name from tool name: "context_read" → "read", "context_create_dir" → "create-dir" */
 function deriveSubName(toolName: string): string {
   return toolName.replace(/^[^_]+_/, "").replace(/_/g, "-");
@@ -341,8 +331,6 @@ function isPositionalArg(key: string, toolName: string): boolean {
     context_exists: ["path"],
     context_count_lines: ["path"],
     context_search: ["query"],
-    search_grep: ["pattern"],
-    search_semantic: ["query"],
   };
   return positionalKeys[toolName]?.includes(key) ?? false;
 }

package/src/tools/registry.ts CHANGED Viewed

@@ -31,8 +31,7 @@ import { mcpSearchTool } from "./mcp/search.ts";
 import { createScheduleTool } from "./schedule/create.ts";
 import { listSchedulesTool } from "./schedule/list.ts";
 // Search tools
-import { searchGrepTool } from "./search/grep.ts";
-import { searchSemanticTool } from "./search/semantic.ts";
+import { searchTool } from "./search/index.ts";
 // Skill tools
 import { skillDeleteTool } from "./skill/delete.ts";
 import { skillEditTool } from "./skill/edit.ts";
@@ -96,8 +95,7 @@ export function registerAllTools(): void {
   registerTool(listSchedulesTool);
   // Search
-  registerTool(searchGrepTool);
-  registerTool(searchSemanticTool);
+  registerTool(searchTool);
   // Skill
   registerTool(skillListTool);

package/src/tools/search/fuse.ts ADDED Viewed

@@ -0,0 +1,117 @@
+import type { RegexpHit } from "./regexp.ts";
+import type { SemanticHit } from "./semantic.ts";
+export interface FusedMatch {
+  ref: string;
+  drive: string;
+  path: string;
+  line: number | null;
+  content: string;
+  context_lines: string[];
+  match_type: "regexp" | "semantic" | "both";
+  semantic_score: number | null;
+  score: number;
+}
+const SNIPPET_MAX = 300;
+/**
+ * Reciprocal rank fusion of regexp line hits and semantic chunk hits.
+ *
+ * Each regexp hit becomes its own row. If the file (drive + path) also has a
+ * semantic hit, the regexp row picks up that semantic side's RRF contribution
+ * and is tagged `match_type: "both"` — exact-line + semantic agreement is
+ * the strongest signal.
+ *
+ * Semantic hits are emitted as their own rows only for paths with no regexp
+ * hit; otherwise the regexp row already represents that file (and is more
+ * locatable). This keeps the result list focused without losing pure
+ * semantic matches in files the regexp didn't touch.
+ */
+export function fuseRRF(
+  regexpHits: RegexpHit[],
+  semanticHits: SemanticHit[],
+  options: { k?: number; limit: number },
+): FusedMatch[] {
+  const k = options.k ?? 60;
+  const bestSemByPath = new Map<
+    string,
+    { rank: number; score: number; hit: SemanticHit }
+  >();
+  for (let i = 0; i < semanticHits.length; i++) {
+    const hit = semanticHits[i];
+    if (!hit) continue;
+    const key = pathKey(hit.drive, hit.path);
+    if (key == null) continue;
+    const existing = bestSemByPath.get(key);
+    if (!existing || i < existing.rank) {
+      bestSemByPath.set(key, { rank: i, score: hit.score, hit });
+    }
+  }
+  const regexpPaths = new Set<string>();
+  for (const hit of regexpHits) {
+    regexpPaths.add(pathKey(hit.drive, hit.path) ?? "");
+  }
+  const fused: FusedMatch[] = [];
+  for (let i = 0; i < regexpHits.length; i++) {
+    const rx = regexpHits[i];
+    if (!rx) continue;
+    const key = pathKey(rx.drive, rx.path) ?? "";
+    const sem = bestSemByPath.get(key);
+    let score = 1 / (k + i + 1);
+    let matchType: FusedMatch["match_type"] = "regexp";
+    let semanticScore: number | null = null;
+    if (sem) {
+      score += 1 / (k + sem.rank + 1);
+      matchType = "both";
+      semanticScore = round(sem.score);
+    }
+    fused.push({
+      ref: rx.ref,
+      drive: rx.drive,
+      path: rx.path,
+      line: rx.line,
+      content: rx.content,
+      context_lines: rx.context_lines,
+      match_type: matchType,
+      semantic_score: semanticScore,
+      score: round(score),
+    });
+  }
+  for (let i = 0; i < semanticHits.length; i++) {
+    const sem = semanticHits[i];
+    if (!sem) continue;
+    const key = pathKey(sem.drive, sem.path);
+    if (key == null) continue;
+    if (regexpPaths.has(key)) continue;
+    const score = 1 / (k + i + 1);
+    fused.push({
+      ref: sem.ref,
+      drive: sem.drive ?? "",
+      path: sem.path ?? "",
+      line: null,
+      content: sem.chunk_content.slice(0, SNIPPET_MAX),
+      context_lines: [],
+      match_type: "semantic",
+      semantic_score: round(sem.score),
+      score: round(score),
+    });
+  }
+  fused.sort((a, b) => b.score - a.score);
+  return fused.slice(0, options.limit);
+}
+function pathKey(drive: string | null, path: string | null): string | null {
+  if (!drive || !path) return null;
+  return `${drive}:${path}`;
+}
+function round(n: number): number {
+  return Math.round(n * 10000) / 10000;
+}

package/src/tools/search/index.ts ADDED Viewed

@@ -0,0 +1,134 @@
+import { z } from "zod";
+import {
+  listContextItems,
+  listContextItemsByPrefix,
+} from "../../db/context.ts";
+import type { ToolDefinition } from "../tool.ts";
+import { fuseRRF } from "./fuse.ts";
+import { runRegexp } from "./regexp.ts";
+import { runSemantic } from "./semantic.ts";
+const MatchSchema = z.object({
+  ref: z.string(),
+  drive: z.string(),
+  path: z.string(),
+  line: z.number().nullable(),
+  content: z.string(),
+  context_lines: z.array(z.string()),
+  match_type: z.enum(["regexp", "semantic", "both"]),
+  semantic_score: z.number().nullable(),
+  score: z.number(),
+});
+const inputSchema = z.object({
+  query: z
+    .string()
+    .optional()
+    .describe(
+      "Natural-language query for semantic + keyword (BM25) hybrid search. Provide alongside `pattern` for the strongest signal — chunks matched by both methods are boosted via reciprocal rank fusion.",
+    ),
+  pattern: z
+    .string()
+    .optional()
+    .describe("Regex pattern for exact text search across context contents."),
+  drive: z
+    .string()
+    .optional()
+    .describe(
+      "Restrict to a single drive (applies to both `query` and `pattern`).",
+    ),
+  path: z
+    .string()
+    .optional()
+    .describe("Directory prefix within the drive. Requires `drive`."),
+  glob: z
+    .string()
+    .optional()
+    .describe("Filter results to files whose basename matches this glob."),
+  ignore_case: z
+    .boolean()
+    .optional()
+    .describe("Case-insensitive regex (only affects `pattern`)."),
+  context: z
+    .number()
+    .optional()
+    .describe(
+      "Lines of surrounding context to include for each regex hit (only affects `pattern`).",
+    ),
+  max_results: z
+    .number()
+    .optional()
+    .describe("Maximum number of fused results to return (default 20)."),
+});
+const outputSchema = z.object({
+  matches: z.array(MatchSchema),
+  is_error: z.boolean(),
+  error_type: z.string().optional(),
+  message: z.string().optional(),
+});
+export const searchTool = {
+  name: "search",
+  description:
+    "[[ bash equivalent command: grep -r ]] Hybrid search over indexed context. At least one of `query` (natural language → semantic + BM25) or `pattern` (regex over file contents) is required. Pass both for the strongest signal: results matched by both methods float to the top via reciprocal rank fusion. Scoping (`drive`, `path`, `glob`) applies to both sides.",
+  group: "search",
+  inputSchema,
+  outputSchema,
+  execute: async (input, ctx) => {
+    if (!input.query && !input.pattern) {
+      return {
+        matches: [],
+        is_error: true,
+        error_type: "invalid_arguments",
+        message:
+          "Provide at least one of `query` (natural language) or `pattern` (regex). Pass both to fuse semantic and exact-match signals.",
+      };
+    }
+    if (input.path && !input.drive) {
+      return {
+        matches: [],
+        is_error: true,
+        error_type: "invalid_arguments",
+        message:
+          "`path` requires `drive` — call context_list_drives to see which drives exist, then pass `drive` alongside `path`.",
+      };
+    }
+    const limit = input.max_results ?? 20;
+    const regexpHits = input.pattern
+      ? runRegexp(
+          input.drive
+            ? await listContextItemsByPrefix(
+                ctx.conn,
+                input.drive,
+                input.path ?? "/",
+                { recursive: true },
+              )
+            : await listContextItems(ctx.conn),
+          {
+            pattern: input.pattern,
+            glob: input.glob,
+            ignore_case: input.ignore_case,
+            context: input.context,
+            max_results: 100,
+          },
+        )
+      : [];
+    const semanticHits = input.query
+      ? await runSemantic(ctx, {
+          query: input.query,
+          drive: input.drive,
+          path: input.path,
+          glob: input.glob,
+          limit: 100,
+        })
+      : [];
+    const matches = fuseRRF(regexpHits, semanticHits, { limit });
+    return { matches, is_error: false };
+  },
+} satisfies ToolDefinition<typeof inputSchema, typeof outputSchema>;

package/src/tools/search/regexp.ts ADDED Viewed

@@ -0,0 +1,70 @@
+import { formatDriveRef } from "../../context/drives.ts";
+import type { ContextItem } from "../../db/context.ts";
+export interface RegexpHit {
+  ref: string;
+  drive: string;
+  path: string;
+  line: number;
+  content: string;
+  context_lines: string[];
+}
+export interface RegexpOptions {
+  pattern: string;
+  glob?: string;
+  ignore_case?: boolean;
+  context?: number;
+  max_results?: number;
+}
+export function runRegexp(
+  items: ContextItem[],
+  options: RegexpOptions,
+): RegexpHit[] {
+  const flags = options.ignore_case ? "gi" : "g";
+  const regex = new RegExp(options.pattern, flags);
+  const globRegex = options.glob ? globToRegex(options.glob) : null;
+  const contextLines = options.context ?? 0;
+  const maxResults = options.max_results ?? 100;
+  const hits: RegexpHit[] = [];
+  for (const item of items) {
+    if (item.content == null) continue;
+    if (globRegex) {
+      const filename = item.path.split("/").pop() ?? "";
+      if (!globRegex.test(filename)) continue;
+    }
+    const lines = item.content.split("\n");
+    for (let i = 0; i < lines.length; i++) {
+      regex.lastIndex = 0;
+      const line = lines[i];
+      if (line !== undefined && regex.test(line)) {
+        const start = Math.max(0, i - contextLines);
+        const end = Math.min(lines.length, i + contextLines + 1);
+        hits.push({
+          ref: formatDriveRef(item),
+          drive: item.drive,
+          path: item.path,
+          line: i + 1,
+          content: line,
+          context_lines: lines.slice(start, end),
+        });
+        if (hits.length >= maxResults) return hits;
+      }
+    }
+  }
+  return hits;
+}
+export function globToRegex(glob: string): RegExp {
+  const escaped = glob
+    .replace(/[.+^${}()|[\]\\]/g, "\\$&")
+    .replace(/\*/g, ".*")
+    .replace(/\?/g, ".");
+  return new RegExp(`^${escaped}$`, "i");
+}

package/src/tools/search/semantic.ts CHANGED Viewed

@@ -1,69 +1,81 @@
-import { z } from "zod";
 import { formatDriveRef } from "../../context/drives.ts";
 import { embedSingle } from "../../context/embedder.ts";
-import { hybridSearch } from "../../db/embeddings.ts";
-import type { ToolDefinition } from "../tool.ts";
+import { type HybridSearchResult, hybridSearch } from "../../db/embeddings.ts";
+import type { ToolContext } from "../tool.ts";
+import { globToRegex } from "./regexp.ts";
-const inputSchema = z.object({
-  query: z.string().describe("Natural language search query"),
-  top_k: z
-    .number()
-    .optional()
-    .default(10)
-    .describe("Maximum number of results to return (defaults to 10)"),
-  threshold: z
-    .number()
-    .optional()
-    .describe("Minimum similarity score (0-1) to include in results"),
-});
+export interface SemanticHit {
+  ref: string;
+  drive: string | null;
+  path: string | null;
+  context_item_id: string;
+  chunk_index: number;
+  title: string;
+  chunk_content: string;
+  score: number;
+}
-const outputSchema = z.object({
-  results: z.array(
-    z.object({
-      ref: z.string(),
-      title: z.string(),
-      score: z.number(),
-      snippet: z.string(),
-    }),
-  ),
-  is_error: z.boolean(),
-});
+export interface SemanticOptions {
+  query: string;
+  drive?: string;
+  path?: string;
+  glob?: string;
+  limit?: number;
+}
-export const searchSemanticTool = {
-  name: "search_semantic",
-  description:
-    "Semantic search over indexed context using vector embeddings. Finds conceptually related content, not just keyword matches.",
-  group: "search",
-  inputSchema,
-  outputSchema,
-  execute: async (input, ctx) => {
-    const queryVec = await embedSingle(input.query, ctx.config);
-    const results = await hybridSearch(
-      ctx.conn,
-      input.query,
-      queryVec,
-      input.top_k,
-    );
+/**
+ * Run the embedding + hybrid-search pipeline. Scoping (`drive` / `path` /
+ * `glob`) is applied as a *post-filter* on results so the caller gets
+ * consistent behavior whether they used the regex side, the semantic side,
+ * or both.
+ */
+export async function runSemantic(
+  ctx: ToolContext,
+  options: SemanticOptions,
+): Promise<SemanticHit[]> {
+  const queryVec = await embedSingle(options.query, ctx.config);
+  const results = await hybridSearch(
+    ctx.conn,
+    options.query,
+    queryVec,
+    options.limit ?? 100,
+  );
-    const threshold = input.threshold;
-    const filtered =
-      threshold !== undefined
-        ? results.filter((r) => r.score >= threshold)
-        : results;
+  return results.filter((r) => matchesScope(r, options)).map(toHit);
+}
-    return {
-      results: filtered
-        .map((r) => ({
-          ref:
-            r.drive && r.path
-              ? formatDriveRef({ drive: r.drive, path: r.path })
-              : r.context_item_id,
-          title: r.title,
-          score: Math.round(r.score * 1000) / 1000,
-          snippet: (r.chunk_content || "").slice(0, 300),
-        }))
-        .sort((a, b) => b.score - a.score),
-      is_error: false,
-    };
-  },
-} satisfies ToolDefinition<typeof inputSchema, typeof outputSchema>;
+function matchesScope(
+  result: HybridSearchResult,
+  options: SemanticOptions,
+): boolean {
+  if (options.drive && result.drive !== options.drive) return false;
+  if (options.path && result.path) {
+    const prefix = options.path.endsWith("/")
+      ? options.path
+      : `${options.path}/`;
+    if (result.path !== options.path && !result.path.startsWith(prefix)) {
+      return false;
+    }
+  }
+  if (options.glob && result.path) {
+    const filename = result.path.split("/").pop() ?? "";
+    if (!globToRegex(options.glob).test(filename)) return false;
+  }
+  return true;
+}
+function toHit(r: HybridSearchResult): SemanticHit {
+  return {
+    ref:
+      r.drive && r.path
+        ? formatDriveRef({ drive: r.drive, path: r.path })
+        : r.context_item_id,
+    drive: r.drive,
+    path: r.path,
+    context_item_id: r.context_item_id,
+    chunk_index: r.chunk_index,
+    title: r.title,
+    chunk_content: r.chunk_content ?? "",
+    score: r.score,
+  };
+}

package/src/worker/prompt.ts CHANGED Viewed

@@ -145,14 +145,14 @@ When calling complete_task, write a summary that captures your key findings, dec
 Workflow for any "look up / find / read" intent:
-1. \`search_semantic\` (semantic) or \`context_search\` (keyword), then \`context_read\` / \`context_tree\` to drill in.
+1. \`search\` (hybrid regexp + semantic) or \`context_search\` (keyword), then \`context_read\` / \`context_tree\` to drill in.
 2. If freshness matters, call \`context_info\` and check \`indexed_at\`. To re-pull a single stale item, use \`context_refresh\` rather than going to MCP for the whole document.
 3. Only call \`mcp_exec\` for reads when the data is genuinely missing locally **or** must be real-time (e.g., "what's on my calendar right now").
 Writes always go through MCP — sending an email, creating an issue, posting to Slack. Don't search context first for those.
 Examples:
-- "What does doc X say?" → \`search_semantic\` first.
+- "What does doc X say?" → \`search\` first.
 - "Any new emails from Y?" → check the \`gmail\` drive first; only hit Gmail MCP if the freshest indexed item is too old for the question.
 - "Send an email to Y" → MCP write directly; no context lookup.

package/src/tools/search/grep.ts DELETED Viewed

@@ -1,128 +0,0 @@
-import { z } from "zod";
-import { formatDriveRef } from "../../context/drives.ts";
-import {
-  listContextItems,
-  listContextItemsByPrefix,
-} from "../../db/context.ts";
-import type { ToolDefinition } from "../tool.ts";
-const GrepMatchSchema = z.object({
-  ref: z.string(),
-  drive: z.string(),
-  path: z.string(),
-  line: z.number(),
-  content: z.string(),
-  context_lines: z.array(z.string()),
-});
-const inputSchema = z.object({
-  pattern: z.string().describe("Regex pattern to search for"),
-  drive: z
-    .string()
-    .optional()
-    .describe("Restrict search to a single drive (defaults to all drives)"),
-  path: z
-    .string()
-    .optional()
-    .describe(
-      "Directory to search under within the drive (defaults to /). Requires `drive`.",
-    ),
-  glob: z
-    .string()
-    .optional()
-    .describe("Only search files whose basename matches this glob pattern"),
-  ignore_case: z.boolean().optional().describe("Case-insensitive search"),
-  context: z
-    .number()
-    .optional()
-    .describe("Number of context lines before and after each match"),
-  max_results: z
-    .number()
-    .optional()
-    .describe("Maximum number of matches to return"),
-});
-const outputSchema = z.object({
-  matches: z.array(GrepMatchSchema),
-  is_error: z.boolean(),
-  error_type: z.string().optional(),
-  message: z.string().optional(),
-});
-export const searchGrepTool = {
-  name: "search_grep",
-  description: "Search file contents by regex pattern across context drives.",
-  group: "search",
-  inputSchema,
-  outputSchema,
-  execute: async (input, ctx) => {
-    // `path` scopes to a directory within a single drive; requiring `drive`
-    // alongside prevents a silent full-DB scan when only `path` is passed.
-    if (input.path && !input.drive) {
-      return {
-        matches: [],
-        is_error: true,
-        error_type: "invalid_arguments",
-        message:
-          "`path` requires `drive` — use context_list_drives to see which drives exist, then pass `drive` alongside `path`.",
-      };
-    }
-    const items = input.drive
-      ? await listContextItemsByPrefix(
-          ctx.conn,
-          input.drive,
-          input.path ?? "/",
-          {
-            recursive: true,
-          },
-        )
-      : await listContextItems(ctx.conn);
-    const flags = input.ignore_case ? "gi" : "g";
-    const regex = new RegExp(input.pattern, flags);
-    const globRegex = input.glob ? globToRegex(input.glob) : null;
-    const contextLines = input.context ?? 0;
-    const maxResults = input.max_results ?? 100;
-    const matches: z.infer<typeof GrepMatchSchema>[] = [];
-    for (const item of items) {
-      if (item.content == null) continue;
-      if (globRegex) {
-        const filename = item.path.split("/").pop() ?? "";
-        if (!globRegex.test(filename)) continue;
-      }
-      const lines = item.content.split("\n");
-      for (let i = 0; i < lines.length; i++) {
-        regex.lastIndex = 0;
-        const line = lines[i];
-        if (line !== undefined && regex.test(line)) {
-          const start = Math.max(0, i - contextLines);
-          const end = Math.min(lines.length, i + contextLines + 1);
-          matches.push({
-            ref: formatDriveRef(item),
-            drive: item.drive,
-            path: item.path,
-            line: i + 1,
-            content: line,
-            context_lines: lines.slice(start, end),
-          });
-          if (matches.length >= maxResults) return { matches, is_error: false };
-        }
-      }
-    }
-    return { matches, is_error: false };
-  },
-} satisfies ToolDefinition<typeof inputSchema, typeof outputSchema>;
-function globToRegex(glob: string): RegExp {
-  const escaped = glob
-    .replace(/[.+^${}()|[\]\\]/g, "\\$&")
-    .replace(/\*/g, ".*")
-    .replace(/\?/g, ".");
-  return new RegExp(`^${escaped}$`, "i");
-}