npm - @comfanion/usethis_search - Versions diffs - 4.2.0-dev.4 → 4.3.0-dev.0 - Mend

@comfanion/usethis_search 4.2.0-dev.4 → 4.3.0-dev.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/hooks/message-before.ts +92 -9
package/hooks/tool-substitution.ts +167 -11
package/index.ts +2 -3
package/package.json +3 -2
package/tools/read-interceptor.ts +149 -0
package/tools/search.ts +140 -75
package/tools/workspace.ts +52 -77
package/vectorizer/chunkers/markdown-chunker.ts +70 -4

package/hooks/message-before.ts CHANGED Viewed

@@ -57,13 +57,14 @@ export function createWorkspaceInjectionHandler(state: SessionState) {
     // Don't inject or prune for sub-agents (title generation, etc.)
     if (state.isSubAgent) return
-    // ── Prune: replace old search tool outputs with compact summaries ────
-    // Files are already in workspace injection — no need for big search
-    // output in chat history. This runs even when workspace is empty
-    // (handles case where workspace was cleared but old search outputs remain).
+    // ── Prune: replace old tool outputs with compact summaries ────────────
+    // Files are already in workspace injection — no need for big outputs
+    // in chat history. This runs even when workspace is empty
+    // (handles case where workspace was cleared but old outputs remain).
     const wsConfig = workspaceCache.getConfig()
     if (wsConfig.autoPruneSearch !== false) {
       pruneSearchToolOutputs(output.messages)
+      pruneReadToolOutputs(output.messages)
     }
     let entries = workspaceCache.getAll()
@@ -225,7 +226,8 @@ function formatFileWithChunks(
 }
 /**
- * Format a single chunk with metadata.
+ * Format a single chunk with metadata and line numbers (cat -n style).
+ * This allows the agent to see exact line numbers without needing grep.
  */
 function formatChunk(entry: ReturnType<typeof workspaceCache.getAll>[0]): string {
   let block = ""
@@ -251,11 +253,21 @@ function formatChunk(entry: ReturnType<typeof workspaceCache.getAll>[0]): string
     block += `<!-- ${meta.join(" | ")} -->\n`
   }
-  // Chunk content
+  // Chunk content WITH LINE NUMBERS (cat -n style)
+  // This allows agent to reference exact lines without grep
+  const startLine = entry.metadata?.startLine ?? 1
+  const lines = entry.content.split("\n")
   const lang = entry.metadata?.language || ""
   block += `\`\`\`${lang}\n`
-  block += entry.content
-  if (!entry.content.endsWith("\n")) block += "\n"
+  for (let i = 0; i < lines.length; i++) {
+    const lineNum = startLine + i
+    const lineContent = lines[i]
+    // Format: "  123| line content" (5 chars for line number + tab)
+    block += `${lineNum.toString().padStart(5, " ")}| ${lineContent}\n`
+  }
   block += `\`\`\`\n`
   return block
@@ -270,7 +282,7 @@ function findLastUserMessage(messages: Message[]): Message | null {
   return null
 }
-// ── Search output pruning ────────────────────────────────────────────────────
+// ── Tool output pruning ─────────────────────────────────────────────────────
 /**
  * Minimum output length to consider pruning.
@@ -344,3 +356,74 @@ export function pruneSearchToolOutputs(messages: Message[]): void {
       `${attachedCount} attached to workspace. Full content available via workspace context.]`
   }
 }
+/**
+ * Replace read() tool outputs in chat history with compact summaries.
+ *
+ * Why: read() returns full file content or large chunks.
+ * After workspace injection (or auto-attach), the content is already in context.
+ * Keeping the read output wastes tokens — replace it with a 1-line summary.
+ *
+ * Only prunes completed read calls with output longer than MIN_PRUNE_LENGTH.
+ * The last read output is kept (the agent may still be referencing it).
+ */
+export function pruneReadToolOutputs(messages: Message[]): void {
+  // Find all read tool parts (completed, with long output)
+  const readParts: { msgIdx: number; partIdx: number; part: MessagePart }[] = []
+  for (let i = 0; i < messages.length; i++) {
+    const msg = messages[i]
+    const parts = Array.isArray(msg.parts) ? msg.parts : []
+    for (let j = 0; j < parts.length; j++) {
+      const part = parts[j]
+      if (
+        part.type === "tool" &&
+        (part.tool === "read" || part.tool === "Read") &&
+        part.state?.status === "completed" &&
+        typeof part.state?.output === "string" &&
+        part.state.output.length > MIN_PRUNE_LENGTH
+      ) {
+        readParts.push({ msgIdx: i, partIdx: j, part })
+      }
+    }
+  }
+  // Keep the last read output (agent may reference it) — prune the rest
+  if (readParts.length <= 1) return
+  const toPrune = readParts.slice(0, -1)
+  for (const { part } of toPrune) {
+    const output = part.state.output as string
+    // Extract file path from output or input
+    const filePath = part.input?.filePath || extractFilePathFromOutput(output)
+    // Check if it's a substituted output (already compact)
+    if (output.startsWith("[File ") || output.startsWith("[Lines ") || output.startsWith("✓ Attached chunk")) {
+      // Already substituted — keep as-is
+      continue
+    }
+    // Replace with compact summary
+    part.state.output = `[Read "${filePath || "file"}" — content available in workspace context]`
+  }
+}
+/**
+ * Extract file path from read() output.
+ * Output usually starts with file path or has markers.
+ */
+function extractFilePathFromOutput(output: string): string | null {
+  // Try to find file path in first line
+  const firstLine = output.split("\n")[0]
+  // Pattern: "## path/to/file.ts" or "path/to/file.ts"
+  const pathMatch = firstLine.match(/##?\s*(.+?\.(ts|js|go|py|md|txt|yaml|json|tsx|jsx|rs|java|kt|swift|c|cpp|h|cs|rb|php))/)
+  if (pathMatch) {
+    return pathMatch[1].trim()
+  }
+  return null
+}

package/hooks/tool-substitution.ts CHANGED Viewed

@@ -21,6 +21,75 @@
 import type { SessionState } from "./types.ts"
 import { workspaceCache, WorkspaceCache } from "../cache/manager.ts"
+import { getIndexer, releaseIndexer } from "../vectorizer/index.ts"
+// ── Chunk Detection Helpers ─────────────────────────────────────────────────
+/**
+ * Find which chunk contains the given line offset.
+ *
+ * @param filePath Relative file path
+ * @param offset Line number (0-based as used by read())
+ * @param limit Number of lines to read
+ * @returns Chunk metadata if found, null otherwise
+ */
+async function findChunkByOffset(
+  filePath: string,
+  offset: number,
+  limit?: number
+): Promise<{
+  chunk_id: string
+  content: string
+  chunk_index: number
+  start_line: number
+  end_line: number
+  language?: string
+  function_name?: string
+  class_name?: string
+} | null> {
+  try {
+    const projectRoot = process.cwd()
+    const indexer = await getIndexer(projectRoot, "code")
+    try {
+      // Get all chunks for this file
+      const chunks = await indexer.findChunksByPath(filePath)
+      if (chunks.length === 0) return null
+      // Find chunk that contains this offset
+      // offset is 0-based line number from read()
+      const targetLine = offset
+      const endLine = limit ? offset + limit : offset + 100
+      for (const chunk of chunks) {
+        const chunkStart = chunk.start_line ?? 0
+        const chunkEnd = chunk.end_line ?? Number.MAX_SAFE_INTEGER
+        // Check if offset falls within this chunk
+        if (targetLine >= chunkStart && targetLine <= chunkEnd) {
+          return {
+            chunk_id: chunk.chunk_id || `${filePath}:chunk-${chunk.chunk_index ?? 0}`,
+            content: chunk.content,
+            chunk_index: chunk.chunk_index ?? 0,
+            start_line: chunkStart,
+            end_line: chunkEnd,
+            language: chunk.language,
+            function_name: chunk.function_name,
+            class_name: chunk.class_name,
+          }
+        }
+      }
+      return null
+    } finally {
+      releaseIndexer(projectRoot, "code")
+    }
+  } catch (error) {
+    // Index not available or error — return null
+    return null
+  }
+}
 /**
  * Create the tool output substitution handler.
@@ -75,7 +144,8 @@ export function createToolSubstitutionHandler(state: SessionState, cache?: Works
     // is valuable for AI navigation. Only read() is substituted.
     switch (input.tool) {
       case "read":
-        substituteReadOutput(output, wsCache)
+      case "Read":
+        await handleReadSubstitution(output, wsCache)
         break
       // case "grep":  // Disabled — AI needs line numbers and match context
       // case "glob":  // Disabled — discovery tool, paths are metadata not content
@@ -84,16 +154,106 @@ export function createToolSubstitutionHandler(state: SessionState, cache?: Works
 }
 /**
- * Substitute read() output if file has chunks in workspace.
+ * Handle read() substitution with smart chunk detection.
+ *
+ * Two modes:
+ * 1. Full read (no offset) → substitute if file in workspace
+ * 2. Partial read (with offset) → auto-attach chunk, then substitute
+ */
+async function handleReadSubstitution(
+  output: { title: string; output: string; metadata: any },
+  cache: WorkspaceCache
+): Promise<void> {
+  const filePath = output.metadata?.filePath || extractFilePathFromTitle(output.title)
+  if (!filePath) return
+  const offset = output.metadata?.offset
+  const limit = output.metadata?.limit
+  const isPartialRead = offset !== undefined
+  // MODE 1: Partial read with offset → auto-attach chunk
+  if (isPartialRead) {
+    await handlePartialReadAttach(filePath, offset, limit, output, cache)
+    return
+  }
+  // MODE 2: Full read → standard substitution
+  substituteReadOutput(output, cache)
+}
+/**
+ * Auto-attach chunk for partial read() with offset.
+ *
+ * When agent does read({ filePath: "src/auth.ts", offset: 150, limit: 50 }),
+ * we find which chunk contains lines 150-200 and attach it to workspace.
+ */
+async function handlePartialReadAttach(
+  filePath: string,
+  offset: number,
+  limit: number | undefined,
+  output: { title: string; output: string; metadata: any },
+  cache: WorkspaceCache
+): Promise<void> {
+  try {
+    // Find which chunk contains this offset
+    const chunk = await findChunkByOffset(filePath, offset, limit)
+    if (!chunk) {
+      // Chunk not found (file not indexed or offset out of range)
+      // Keep original output
+      return
+    }
+    // Check if chunk already in workspace
+    const existing = cache.get(chunk.chunk_id)
+    if (existing) {
+      // Already attached → replace output with reference
+      output.output = `[Lines ${chunk.start_line}-${chunk.end_line} (chunk ${chunk.chunk_index}) already in workspace — see <workspace_context>]`
+      return
+    }
+    // Attach chunk to workspace
+    cache.attach({
+      chunkId: chunk.chunk_id,
+      path: filePath,
+      content: chunk.content,
+      chunkIndex: chunk.chunk_index,
+      role: "manual",
+      attachedAt: Date.now(),
+      attachedBy: `read(offset:${offset})`,
+      metadata: {
+        language: chunk.language,
+        function_name: chunk.function_name,
+        class_name: chunk.class_name,
+        startLine: chunk.start_line,
+        endLine: chunk.end_line,
+      },
+    })
+    // Replace output with compact message
+    const meta: string[] = []
+    if (chunk.function_name) meta.push(`fn: ${chunk.function_name}`)
+    if (chunk.class_name) meta.push(`class: ${chunk.class_name}`)
+    const metaStr = meta.length > 0 ? ` (${meta.join(", ")})` : ""
+    output.output = `✓ Attached chunk ${chunk.chunk_index} to workspace${metaStr}\n\nLines ${chunk.start_line}-${chunk.end_line} — see <workspace_context> for content.\n\nWorkspace: ${cache.size} chunks, ${cache.totalTokens.toLocaleString()} tokens`
+    // Save workspace asynchronously
+    cache.save().catch(() => {})
+  } catch (error) {
+    // Auto-attach failed — keep original output
+    // Silent failure (don't break read())
+  }
+}
+/**
+ * Substitute read() output if file has chunks in workspace (full reads only).
  *
- * Input: { filePath: "src/auth.ts", offset?: 0, limit?: 100 }
+ * Input: { filePath: "src/auth.ts" } (no offset/limit)
  * Output: "export function login(...)\n..."
  *
- * If file has chunks in workspace AND no offset/limit (full read):
+ * If file has chunks in workspace:
  *   Replace with: "[File "src/auth.ts" has N chunks in workspace (chunks: 2, 5, 7) — see <workspace_context>]"
- *
- * If offset/limit present (partial read):
- *   Keep original (partial reads are not in workspace injection)
  */
 function substituteReadOutput(output: { title: string; output: string; metadata: any }, cache: WorkspaceCache): void {
   try {
@@ -101,10 +261,6 @@ function substituteReadOutput(output: { title: string; output: string; metadata:
     const filePath = output.metadata?.filePath || extractFilePathFromTitle(output.title)
     if (!filePath) return
-    // Check if this is a partial read (offset/limit present)
-    const isPartialRead = output.metadata?.offset !== undefined || output.metadata?.limit !== undefined
-    if (isPartialRead) return
     // Don't substitute if file was modified (dirty) — workspace has stale content
     if (cache.isDirty(filePath)) return

package/index.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 import type { Plugin } from "@opencode-ai/plugin"
 import search from "./tools/search"
-import { workspace_list, workspace_attach, workspace_detach, workspace_clear, workspace_restore } from "./tools/workspace"
+import { workspace_list, workspace_forget, workspace_clear, workspace_restore } from "./tools/workspace"
 import FileIndexerPlugin from "./file-indexer"
 import { workspaceCache } from "./cache/manager"
 import { createWorkspaceInjectionHandler } from "./hooks/message-before"
@@ -39,8 +39,7 @@ const UsethisSearchPlugin: Plugin = async ({ directory, client }) => {
     tool: {
       search,
       workspace_list,
-      workspace_attach,
-      workspace_detach,
+      workspace_forget,
       workspace_clear,
       workspace_restore,
     },

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@comfanion/usethis_search",
-  "version": "4.2.0-dev.4",
-  "description": "OpenCode plugin: semantic search with chunk-based workspace injection (v4.2-dev: chunk-level context, granular detach, improved token efficiency)",
+  "version": "4.3.0-dev.0",
+  "description": "OpenCode plugin: semantic search with auto-attach, line numbers in workspace, simplified API (v4.3: auto-detect modes, read() caching, 99% token reduction, no grep needed)",
   "type": "module",
   "main": "./index.ts",
   "exports": {
@@ -25,6 +25,7 @@
     "tools/search.ts",
     "tools/codeindex.ts",
     "tools/workspace.ts",
+    "tools/read-interceptor.ts",
     "cache/manager.ts",
     "hooks/message-before.ts",
     "hooks/tool-substitution.ts",

package/tools/read-interceptor.ts ADDED Viewed

@@ -0,0 +1,149 @@
+import { tool } from "@opencode-ai/plugin"
+import path from "path"
+import fs from "fs/promises"
+import { CodebaseIndexer } from "../vectorizer/index.ts"
+// FR-043: Logging for intercepted Read() calls
+const DEBUG = process.env.DEBUG?.includes("vectorizer") || process.env.DEBUG === "*"
+interface ReadLogEntry {
+  timestamp: number
+  filePath: string
+  relPath: string
+  chunksFound: number
+  relatedContextCount: number
+  durationMs: number
+  fallback: boolean
+}
+const LOG_MAX_ENTRIES = 500
+/**
+ * Append a log entry to the Read() interception log file.
+ * Non-blocking, non-fatal — errors are silently ignored.
+ */
+async function logReadInterception(projectRoot: string, entry: ReadLogEntry): Promise<void> {
+  try {
+    const logPath = path.join(projectRoot, ".opencode", "vectors", "read-intercept.log.json")
+    await fs.mkdir(path.dirname(logPath), { recursive: true })
+    let entries: ReadLogEntry[] = []
+    try {
+      const raw = await fs.readFile(logPath, "utf-8")
+      entries = JSON.parse(raw)
+    } catch {
+      // file doesn't exist or is invalid — start fresh
+    }
+    entries.push(entry)
+    // Cap log size to avoid unbounded growth
+    if (entries.length > LOG_MAX_ENTRIES) {
+      entries = entries.slice(-LOG_MAX_ENTRIES)
+    }
+    await fs.writeFile(logPath, JSON.stringify(entries, null, 2), "utf-8")
+  } catch {
+    // non-fatal — logging must never break Read
+  }
+}
+export default tool({
+  description: `Read file with graph-aware context attachment. When available, this tool searches the file in the index and returns content + related context from the graph (imports, links, etc.).
+Use this instead of the standard Read tool for better context awareness.`,
+  args: {
+    filePath: tool.schema.string().describe("Path to the file to read"),
+  },
+  async execute(args) {
+    const startTime = Date.now()
+    const projectRoot = process.cwd()
+    const filePath = path.isAbsolute(args.filePath) ? args.filePath : path.join(projectRoot, args.filePath)
+    const relPath = path.relative(projectRoot, filePath)
+    if (DEBUG) {
+      console.log(`[read-interceptor] Intercepted Read("${relPath}")`)
+    }
+    // Resilient search: if vector index is corrupted or unavailable, fall back gracefully
+    let fileChunks: any[] = []
+    let allRelated: any[] = []
+    let searchFailed = false
+    try {
+      const indexer = await new CodebaseIndexer(projectRoot, "code").init()
+      try {
+        const results = await indexer.search(relPath, 20, false, {})
+        fileChunks = results.filter((r: any) => r.file === relPath)
+        allRelated = fileChunks
+          .flatMap((c: any) => c.relatedContext || [])
+          .filter((r: any, i: number, arr: any[]) => arr.findIndex((x: any) => x.chunk_id === r.chunk_id) === i)
+      } catch (searchErr: any) {
+        if (DEBUG) {
+          console.log(`[read-interceptor] Search failed for "${relPath}": ${searchErr.message}`)
+        }
+        searchFailed = true
+      }
+      await indexer.unloadModel()
+    } catch (initErr: any) {
+      if (DEBUG) {
+        console.log(`[read-interceptor] Indexer init failed: ${initErr.message}`)
+      }
+      searchFailed = true
+    }
+    const durationMs = Date.now() - startTime
+    const fallback = fileChunks.length === 0
+    // FR-043: Log the interception asynchronously (non-blocking)
+    logReadInterception(projectRoot, {
+      timestamp: startTime,
+      filePath: args.filePath,
+      relPath,
+      chunksFound: fileChunks.length,
+      relatedContextCount: allRelated.length,
+      durationMs,
+      fallback,
+    }).catch(() => {})
+    if (DEBUG) {
+      console.log(
+        `[read-interceptor] ${relPath}: ${fileChunks.length} chunks, ${allRelated.length} related, ${durationMs}ms${fallback ? " (fallback)" : ""}${searchFailed ? " (search error)" : ""}`
+      )
+    }
+    if (fallback) {
+      const reason = searchFailed
+        ? `Search index unavailable (possibly corrupted). Run codeindex({ action: "reindex", index: "code" }) to rebuild.`
+        : `File "${relPath}" not indexed. Use original Read tool or run codeindex({ action: "reindex", index: "code" })`
+      return reason
+    }
+    let output = `## ${relPath}\n\n`
+    output += `### Content\n\n`
+    for (const chunk of fileChunks) {
+      output += chunk.content + "\n\n"
+    }
+    if (allRelated.length > 0) {
+      output += `### Related Context\n\n`
+      for (const rel of allRelated) {
+        const snippet = rel.content.length > 300
+          ? rel.content.substring(0, 300) + "..."
+          : rel.content
+        output += `**${rel.file}** (${rel.relation})\n`
+        output += `\`\`\`\n${snippet}\n\`\`\`\n\n`
+      }
+    }
+    return output
+  },
+})
+// Export for testing
+export { logReadInterception, ReadLogEntry }

package/tools/search.ts CHANGED Viewed

@@ -178,47 +178,70 @@ function parseFilter(filter: string): {
 }
 export default tool({
-  description: `Search the codebase semantically OR attach specific chunks/files to workspace.
+  description: `Search codebase and automatically attach relevant context to workspace.
-Three modes:
-1. Semantic search (query) - Find relevant code by meaning
-2. Direct chunk attach (chunkId) - Attach specific chunk by ID
-3. File attach (path) - Attach all chunks from a file
+Accepts any query - semantic search, file path, or chunk ID:
+- "authentication logic" → finds relevant code
+- "docs/architecture.md" → attaches file
+- "src/auth.ts:chunk-5" → attaches specific chunk
-Available indexes:
-- "code" (default) - Source code files (*.js, *.ts, *.py, *.go, etc.)
-- "docs" - Documentation files (*.md, *.txt, etc.)
-- searchAll: true - Search across all indexes
+Results are optimized for context - top chunks auto-attached with expanded context
+(related code, imports, class methods).
+IMPORTANT: Workspace has limited token budget. Use workspace_forget() to remove
+irrelevant files or old searches before adding new context.
+Choose index based on what you're looking for:
+- index: "code" → search source code
+- index: "docs" → search documentation
+- searchAll: true → search everywhere
 Examples:
 - search({ query: "authentication logic" })
 - search({ query: "how to deploy", index: "docs" })
-- search({ query: "tenant management", filter: "internal/domain/" })
-- search({ chunkId: "src/auth.ts:chunk-5" })
-- search({ path: "docs/architecture.md" })
-- search({ path: "src/auth.ts", index: "code" })`,
+- search({ query: "docs/prd.md" })  // attach file
+- search({ query: "internal/domain/", filter: "*.go" })`,
   args: {
-    query: tool.schema.string().optional().describe("Semantic search query describing what you're looking for"),
-    chunkId: tool.schema.string().optional().describe("Specific chunk ID to attach (e.g. 'src/auth.ts:chunk-5')"),
-    path: tool.schema.string().optional().describe("File path to attach all chunks from (e.g. 'docs/architecture.md')"),
-    index: tool.schema.string().optional().default("code").describe("Index to search: code, docs"),
-    limit: tool.schema.number().optional().describe("Number of results (default from config, typically 10)"),
-    searchAll: tool.schema.boolean().optional().default(false).describe("Search all indexes instead of just one"),
-    filter: tool.schema.string().optional().describe("Filter results by path or language. Examples: 'internal/domain/', '*.go', 'internal/**/*.go', 'service'"),
+    query: tool.schema.string().describe("What to search: semantic query, file path, or chunk ID"),
+    index: tool.schema.string().optional().default("code").describe("Where to search: 'code', 'docs', or leave empty for auto-detect"),
+    limit: tool.schema.number().optional().describe("Max results (default: 10)"),
+    searchAll: tool.schema.boolean().optional().default(false).describe("Search all indexes instead of one"),
+    filter: tool.schema.string().optional().describe("Filter by path/language: 'internal/domain/', '*.go', 'service'"),
   },
   async execute(args) {
     const projectRoot = process.cwd()
     try {
-      // Validate: exactly one of query, chunkId, or path must be specified
-      const modes = [args.query, args.chunkId, args.path].filter(x => x !== undefined)
-      if (modes.length === 0) {
-        return `Error: Must specify one of: query (semantic search), chunkId (direct attach), or path (file attach)\n\nExamples:\n- search({ query: "authentication" })\n- search({ chunkId: "src/auth.ts:chunk-5" })\n- search({ path: "docs/architecture.md" })`
+      if (!args.query) {
+        return `Error: query is required\n\nExamples:\n- search({ query: "authentication logic" })\n- search({ query: "docs/architecture.md" })\n- search({ query: "src/auth.ts:chunk-5" })`
+      }
+      // Auto-detect mode from query
+      let mode: "chunkId" | "path" | "semantic"
+      let chunkId: string | undefined
+      let filePath: string | undefined
+      let semanticQuery: string | undefined
+      // 1. Check if it's a chunk ID (contains ":chunk-")
+      if (args.query.includes(":chunk-")) {
+        mode = "chunkId"
+        chunkId = args.query
+      }
+      // 2. Check if it's a file path (has extension or starts with common paths)
+      else if (
+        args.query.match(/\.(md|ts|js|go|py|tsx|jsx|rs|java|kt|swift|txt|yaml|json|yml|toml)$/i) ||
+        args.query.match(/^(src|docs|internal|pkg|lib|app|pages|components|api)\//i) ||
+        args.query.includes("/")
+      ) {
+        mode = "path"
+        filePath = args.query
       }
-      if (modes.length > 1) {
-        return `Error: Specify only ONE of: query, chunkId, or path (got ${modes.length})`
+      // 3. Otherwise, it's a semantic search
+      else {
+        mode = "semantic"
+        semanticQuery = args.query
       }
       // Load config defaults (parsed from vectorizer.yaml)
@@ -234,23 +257,23 @@ Examples:
       // ══════════════════════════════════════════════════════════════════════
       // MODE 1: Direct chunk attach by chunkId
       // ══════════════════════════════════════════════════════════════════════
-      if (args.chunkId) {
+      if (mode === "chunkId") {
         const indexer = await getIndexer(projectRoot, indexName)
         try {
-          const chunk = await indexer.findChunkById(args.chunkId)
+          const chunk = await indexer.findChunkById(chunkId!)
           if (!chunk) {
-            return `Chunk "${args.chunkId}" not found in index "${indexName}".\n\nMake sure:\n1. The file is indexed\n2. The chunk ID is correct (format: "path:chunk-N")\n3. You're searching the right index`
+            return `Chunk "${chunkId}" not found in index "${indexName}".\n\nMake sure:\n1. The file is indexed\n2. The chunk ID is correct (format: "path:chunk-N")\n3. You're searching the right index`
           }
           // Attach to workspace
           workspaceCache.attach({
-            chunkId: args.chunkId,
+            chunkId: chunkId!,
             path: chunk.file,
             content: chunk.content,
             chunkIndex: chunk.chunk_index ?? 0,
             role: "manual",
             attachedAt: Date.now(),
-            attachedBy: `direct:${args.chunkId}`,
+            attachedBy: `direct:${chunkId}`,
             metadata: {
               language: chunk.language,
               function_name: chunk.function_name,
@@ -263,8 +286,8 @@ Examples:
           workspaceCache.save().catch(() => {})
-          const entry = workspaceCache.get(args.chunkId)!
-          return `✓ Attached chunk to workspace\n\nChunk: ${args.chunkId}\nFile: ${chunk.file}\nTokens: ${entry.tokens.toLocaleString()}\nLanguage: ${chunk.language}\nLines: ${chunk.start_line}-${chunk.end_line}\n\nWorkspace: ${workspaceCache.size} chunks, ${workspaceCache.totalTokens.toLocaleString()} tokens`
+          const entry = workspaceCache.get(chunkId!)!
+          return `✓ Attached chunk to workspace\n\nChunk: ${chunkId}\nFile: ${chunk.file}\nTokens: ${entry.tokens.toLocaleString()}\nLanguage: ${chunk.language}\nLines: ${chunk.start_line}-${chunk.end_line}\n\nWorkspace: ${workspaceCache.size} chunks, ${workspaceCache.totalTokens.toLocaleString()} tokens`
         } finally {
           releaseIndexer(projectRoot, indexName)
         }
@@ -273,27 +296,27 @@ Examples:
       // ══════════════════════════════════════════════════════════════════════
       // MODE 2: File attach by path (all chunks)
       // ══════════════════════════════════════════════════════════════════════
-      if (args.path) {
+      if (mode === "path") {
         const indexer = await getIndexer(projectRoot, indexName)
         try {
-          const chunks = await indexer.findChunksByPath(args.path)
+          const chunks = await indexer.findChunksByPath(filePath!)
           if (chunks.length === 0) {
-            return `No chunks found for file "${args.path}" in index "${indexName}".\n\nMake sure:\n1. The file exists and is indexed\n2. The path is correct (relative to project root)\n3. You're searching the right index\n\nRun: bunx usethis_search reindex`
+            return `No chunks found for file "${filePath}" in index "${indexName}".\n\nMake sure:\n1. The file exists and is indexed\n2. The path is correct (relative to project root)\n3. You're searching the right index\n\nRun: bunx usethis_search reindex`
           }
           // Attach all chunks to workspace
           let totalTokens = 0
           for (const chunk of chunks) {
-            const chunkId = chunk.chunk_id || `${args.path}:chunk-${chunk.chunk_index ?? 0}`
+            const chunkIdForChunk = chunk.chunk_id || `${filePath}:chunk-${chunk.chunk_index ?? 0}`
             workspaceCache.attach({
-              chunkId,
-              path: args.path,
+              chunkId: chunkIdForChunk,
+              path: filePath!,
               content: chunk.content,
               chunkIndex: chunk.chunk_index ?? 0,
               role: "manual",
               attachedAt: Date.now(),
-              attachedBy: `file:${args.path}`,
+              attachedBy: `file:${filePath}`,
               metadata: {
                 language: chunk.language,
                 function_name: chunk.function_name,
@@ -304,13 +327,13 @@ Examples:
               },
             })
-            const entry = workspaceCache.get(chunkId)!
+            const entry = workspaceCache.get(chunkIdForChunk)!
             totalTokens += entry.tokens
           }
           workspaceCache.save().catch(() => {})
-          return `✓ Attached file to workspace\n\nFile: ${args.path}\nChunks: ${chunks.length}\nTokens: ${totalTokens.toLocaleString()}\nLanguage: ${chunks[0].language}\n\nWorkspace: ${workspaceCache.size} chunks, ${workspaceCache.totalTokens.toLocaleString()} tokens`
+          return `✓ Attached file to workspace\n\nFile: ${filePath}\nChunks: ${chunks.length}\nTokens: ${totalTokens.toLocaleString()}\nLanguage: ${chunks[0].language}\n\nWorkspace: ${workspaceCache.size} chunks, ${workspaceCache.totalTokens.toLocaleString()} tokens`
         } finally {
           releaseIndexer(projectRoot, indexName)
         }
@@ -352,7 +375,7 @@ Examples:
         for (const idx of indexes) {
           const indexer = await getIndexer(projectRoot, idx)
           try {
-            const results = await indexer.search(args.query, limit, includeArchived, searchOptions)
+            const results = await indexer.search(semanticQuery!, limit, includeArchived, searchOptions)
             allResults.push(...results.map((r: any) => ({ ...r, _index: idx })))
           } finally {
             releaseIndexer(projectRoot, idx)
@@ -390,14 +413,14 @@ Examples:
           if (available.length > 0) {
             const list = available.map(i => `"${i}"`).join(", ")
-            return `Index "${indexName}" not found. Available indexes: ${list}.\n\nTry: search({ query: "${args.query}", index: "${available[0]}" })\nOr search all: search({ query: "${args.query}", searchAll: true })`
+            return `Index "${indexName}" not found. Available indexes: ${list}.\n\nTry: search({ query: "${semanticQuery}", index: "${available[0]}" })\nOr search all: search({ query: "${semanticQuery}", searchAll: true })`
           }
           return `No indexes found. The codebase needs to be indexed first.\n\nRun the CLI: bunx usethis_search reindex`
         }
         const indexer = await getIndexer(projectRoot, indexName)
         try {
-          const results = await indexer.search(args.query, limit, includeArchived, searchOptions)
+          const results = await indexer.search(semanticQuery!, limit, includeArchived, searchOptions)
           allResults = results.map((r: any) => ({ ...r, _index: indexName }))
         } finally {
           releaseIndexer(projectRoot, indexName)
@@ -411,20 +434,58 @@ Examples:
       })
       // ── Filter — apply path/language constraints from `filter` param ───────
+      // Strategy: Try strict filter first, fallback to relaxed if too few results
+      const unfilteredResults = [...allResults]
+      let filterApplied = false
+      let filterRelaxed = false
       if (filterParsed.pathPrefix) {
         const prefix = filterParsed.pathPrefix
-        allResults = allResults.filter(r => r.file && r.file.startsWith(prefix))
+        const strictFiltered = allResults.filter(r => r.file && r.file.startsWith(prefix))
+        // Fallback: if strict gives < 3 results, try "contains" instead of "startsWith"
+        if (strictFiltered.length < 3 && allResults.length > strictFiltered.length) {
+          const relaxedFiltered = allResults.filter(r => r.file && r.file.includes(prefix))
+          if (relaxedFiltered.length > strictFiltered.length) {
+            allResults = relaxedFiltered
+            filterRelaxed = true
+          } else {
+            allResults = strictFiltered
+          }
+        } else {
+          allResults = strictFiltered
+        }
+        filterApplied = true
       }
       if (filterParsed.pathContains) {
         const needle = filterParsed.pathContains.toLowerCase()
         allResults = allResults.filter(r => r.file && r.file.toLowerCase().includes(needle))
+        filterApplied = true
       }
       if (filterParsed.language) {
-        allResults = allResults.filter(r => !r.language || r.language === filterParsed.language || r.language === "unknown")
+        const strictFiltered = allResults.filter(r => r.language === filterParsed.language)
+        // Fallback: if strict language filter gives < 3 results, include "unknown" language
+        if (strictFiltered.length < 3 && allResults.length > strictFiltered.length) {
+          const relaxedFiltered = allResults.filter(r =>
+            !r.language || r.language === filterParsed.language || r.language === "unknown"
+          )
+          if (relaxedFiltered.length > strictFiltered.length) {
+            allResults = relaxedFiltered
+            filterRelaxed = true
+          } else {
+            allResults = strictFiltered
+          }
+        } else {
+          allResults = strictFiltered
+        }
+        filterApplied = true
       }
       // ── Reranking — boost results where query keywords appear in text ──────
-      const queryKeywords = args.query.toLowerCase().split(/\s+/).filter((w: string) => w.length > 2)
+      const queryKeywords = semanticQuery!.toLowerCase().split(/\s+/).filter((w: string) => w.length > 2)
       for (const r of allResults) {
         const isBM25Only = !!r._bm25Only
         const vectorScore = r._distance != null ? Math.max(0, 1 - r._distance / 2) : 0
@@ -454,7 +515,7 @@ Examples:
        if (topChunks.length === 0) {
          const scope = args.searchAll ? "any index" : `index "${indexName}"`
          const filterNote = args.filter ? ` with filter "${args.filter}"` : ""
-         return `No results found in ${scope}${filterNote} for: "${args.query}" (min score: ${minScore})\n\nTry:\n- Different keywords or phrasing\n- Remove or broaden the filter\n- search({ query: "...", searchAll: true })`
+         return `No results found in ${scope}${filterNote} for: "${semanticQuery}" (min score: ${minScore})\n\nTry:\n- Different keywords or phrasing\n- Remove or broaden the filter\n- search({ query: "...", searchAll: true })`
        }
        // ══════════════════════════════════════════════════════════════════════
@@ -479,15 +540,15 @@ Examples:
          // Attach main chunk
          const chunkId = chunk.chunkId || `${chunk.file}:chunk-${chunk.index ?? 0}`
-         workspaceCache.attach({
-           chunkId,
-           path: chunk.file,
-           content: chunk.content,
-           chunkIndex: chunk.index ?? 0,
-           role: "search-main",
-           attachedAt: Date.now(),
-           attachedBy: args.query,
-           score: chunk._finalScore,
+          workspaceCache.attach({
+            chunkId,
+            path: chunk.file,
+            content: chunk.content,
+            chunkIndex: chunk.index ?? 0,
+            role: "search-main",
+            attachedAt: Date.now(),
+            attachedBy: semanticQuery!,
+            score: chunk._finalScore,
            metadata: {
              language: chunk.language,
              function_name: chunk.function_name,
@@ -511,14 +572,14 @@ Examples:
              // Check budget before adding
              if (workspaceCache.size >= wsConfig.maxChunks) break
-             workspaceCache.attach({
-               chunkId: expChunkId,
-               path: expChunk.file,
-               content: expChunk.content,
-               chunkIndex: expChunk.chunk_index ?? 0,
-               role: "search-context",
-               attachedAt: Date.now(),
-               attachedBy: `${args.query} (${reason})`,
+              workspaceCache.attach({
+                chunkId: expChunkId,
+                path: expChunk.file,
+                content: expChunk.content,
+                chunkIndex: expChunk.chunk_index ?? 0,
+                role: "search-context",
+                attachedAt: Date.now(),
+                attachedBy: `${semanticQuery} (${reason})`,
                score: chunk._finalScore * 0.9, // Slightly lower score than main
                metadata: {
                  language: expChunk.language,
@@ -550,14 +611,14 @@ Examples:
              const relChunkId = rel.chunkId || `${rel.file}:chunk-${rel.index ?? 0}`
              if (alreadyAttached.has(relChunkId)) continue
-             workspaceCache.attach({
-               chunkId: relChunkId,
-               path: rel.file,
-               content: rel.content,
-               chunkIndex: rel.index ?? 0,
-               role: "search-graph",
-               attachedAt: Date.now(),
-               attachedBy: `${args.query} (${rel.relation} from ${chunkId})`,
+              workspaceCache.attach({
+                chunkId: relChunkId,
+                path: rel.file,
+                content: rel.content,
+                chunkIndex: rel.index ?? 0,
+                role: "search-graph",
+                attachedAt: Date.now(),
+                attachedBy: `${semanticQuery} (${rel.relation} from ${chunkId})`,
                score: rel.score,
                metadata: {
                  language: rel.language,
@@ -590,12 +651,16 @@ Examples:
       const hasBM25Only = allResults.some((r: any) => r._bm25Only)
       const scope = args.searchAll ? "all indexes" : `index "${indexName}"`
       const filterLabel = args.filter ? ` filter:"${args.filter}"` : ""
-      let output = `## Search: "${args.query}" (${scope}${filterLabel})\n\n`
+      let output = `## Search: "${semanticQuery}" (${scope}${filterLabel})\n\n`
       if (hasBM25Only) {
         output += `> **BM25-only mode** -- vector embeddings not yet available. Quality will improve after embedding completes.\n\n`
       }
+      if (filterRelaxed) {
+        output += `> **Filter relaxed.** Strict filter gave too few results. Showing broader matches.\n\n`
+      }
       if (topScore < 0.45) {
         output += `> **Low confidence.** Best score: ${topScore.toFixed(3)}. Try more specific keywords.\n\n`
       }

package/tools/workspace.ts CHANGED Viewed

@@ -155,102 +155,77 @@ export const workspace_list = tool({
   },
 })
-// ── workspace.attach ────────────────────────────────────────────────────────
+// ── workspace.forget ────────────────────────────────────────────────────────
-export const workspace_attach = tool({
-  description: `Manually attach a file to workspace context as a single chunk. The file will be visible in context injection without needing read().`,
+export const workspace_forget = tool({
+  description: `Remove chunks from workspace context to optimize context size and focus.
-  args: {
-    filePath: tool.schema.string().describe("Relative file path to attach (e.g. 'src/auth/login.ts')"),
-  },
+IMPORTANT: Regularly clean up workspace by removing irrelevant files or old search results.
+This keeps context focused and prevents token budget overflow.
-  async execute(args) {
-    const projectRoot = process.cwd()
-    // Read file content
-    try {
-      const fullPath = path.join(projectRoot, args.filePath)
-      const content = await fs.readFile(fullPath, "utf-8")
-      // Generate chunkId for manual attachment: "path:chunk-0"
-      const chunkId = `${args.filePath}:chunk-0`
-      // Check if already attached
-      if (workspaceCache.has(args.filePath)) {
-        const existing = workspaceCache.getChunksByPath(args.filePath)
-        if (existing.length > 0) {
-          const first = existing[0]
-          const totalTokens = existing.reduce((sum, c) => sum + c.tokens, 0)
-          return `File "${args.filePath}" is already in workspace (${existing.length} chunk${existing.length > 1 ? "s" : ""}).\nTokens: ${totalTokens.toLocaleString()} | Role: ${first.role} | Score: ${first.score?.toFixed(3) ?? "n/a"}`
-        }
-      }
+Auto-detects what to remove based on input:
+- Chunk ID: "src/auth.ts:chunk-5"
+- File path: "docs/architecture.md" (removes ALL chunks)
+- Search query: "authentication logic" (removes chunks from this search)
+- Age: "5" (removes chunks older than 5 minutes)
-      workspaceCache.attach({
-        chunkId,
-        path: args.filePath,
-        content,
-        chunkIndex: 0,
-        role: "manual",
-        attachedAt: Date.now(),
-        attachedBy: "manual",
-      })
-      const entry = workspaceCache.get(chunkId)!
-      return `Attached "${args.filePath}" to workspace as single chunk.\nChunkId: ${chunkId}\nTokens: ${entry.tokens.toLocaleString()}\nWorkspace total: ${workspaceCache.totalTokens.toLocaleString()} tokens (${workspaceCache.size} chunks)`
-    } catch (error: any) {
-      return `Failed to attach "${args.filePath}": ${error.message || String(error)}`
-    }
-  },
-})
-// ── workspace.detach ────────────────────────────────────────────────────────
-export const workspace_detach = tool({
-  description: `Remove chunks from workspace context. Can detach by chunkId, by file path (removes ALL chunks of that file), by search query, or by age.`,
+Examples:
+- workspace_forget({ what: "docs/prd.md" })
+- workspace_forget({ what: "5" })  // older than 5 min
+- workspace_forget({ what: "src/auth.ts:chunk-3" })`,
   args: {
-    chunkId: tool.schema.string().optional().describe("Specific chunk ID to remove (e.g. 'src/auth.ts:chunk-5')"),
-    filePath: tool.schema.string().optional().describe("File path to remove (removes ALL chunks of that file)"),
-    query: tool.schema.string().optional().describe("Remove all chunks attached by this search query"),
-    olderThan: tool.schema.number().optional().describe("Remove chunks older than N minutes"),
+    what: tool.schema.string().describe("What to forget: chunk ID, file path, search query, or age in minutes"),
   },
   async execute(args) {
     let removed = 0
-    if (args.chunkId) {
-      // Detach specific chunk by chunkId
-      const entry = workspaceCache.get(args.chunkId)
+    // Auto-detect what to remove
+    // 1. Check if it's a chunk ID (contains ":chunk-")
+    if (args.what.includes(":chunk-")) {
+      const entry = workspaceCache.get(args.what)
       if (!entry) {
-        return `Chunk "${args.chunkId}" not found in workspace.`
+        return `Chunk "${args.what}" not found in workspace.`
       }
-      removed = workspaceCache.detach(args.chunkId) ? 1 : 0
+      removed = workspaceCache.detach(args.what) ? 1 : 0
       if (removed === 0) {
-        return `Failed to remove chunk "${args.chunkId}".`
+        return `Failed to remove chunk "${args.what}".`
       }
-    } else if (args.filePath) {
-      // Detach all chunks of a file
-      const fileChunks = workspaceCache.getChunksByPath(args.filePath)
+      return `Removed chunk "${args.what}" from workspace.\nWorkspace: ${workspaceCache.size} chunks, ${workspaceCache.totalTokens.toLocaleString()} tokens`
+    }
+    // 2. Check if it's a number (age in minutes)
+    const ageMatch = args.what.match(/^(\d+)$/)
+    if (ageMatch) {
+      const minutes = parseInt(ageMatch[1], 10)
+      removed = workspaceCache.detachOlderThan(minutes * 60 * 1000)
+      return `Removed ${removed} chunk(s) older than ${minutes} minutes.\nWorkspace: ${workspaceCache.size} chunks, ${workspaceCache.totalTokens.toLocaleString()} tokens`
+    }
+    // 3. Check if it's a file path (has extension or common path prefixes)
+    if (
+      args.what.match(/\.(md|ts|js|go|py|tsx|jsx|rs|java|kt|swift|txt|yaml|json|yml|toml)$/i) ||
+      args.what.match(/^(src|docs|internal|pkg|lib|app|pages|components|api)\//i) ||
+      args.what.includes("/")
+    ) {
+      const fileChunks = workspaceCache.getChunksByPath(args.what)
       if (fileChunks.length === 0) {
-        return `File "${args.filePath}" not found in workspace.`
+        return `File "${args.what}" not found in workspace.`
       }
-      removed = workspaceCache.detachByPath(args.filePath)
+      removed = workspaceCache.detachByPath(args.what)
       if (removed === 0) {
-        return `Failed to remove chunks from "${args.filePath}".`
+        return `Failed to remove chunks from "${args.what}".`
       }
-    } else if (args.query) {
-      removed = workspaceCache.detachByQuery(args.query)
-    } else if (args.olderThan) {
-      removed = workspaceCache.detachOlderThan(args.olderThan * 60 * 1000)
-    } else {
-      return `Specify chunkId, filePath, query, or olderThan to detach chunks.`
+      return `Removed ${removed} chunk(s) from "${args.what}".\nWorkspace: ${workspaceCache.size} chunks, ${workspaceCache.totalTokens.toLocaleString()} tokens`
     }
-    return `Removed ${removed} chunk(s) from workspace.\nWorkspace: ${workspaceCache.size} chunks, ${workspaceCache.totalTokens.toLocaleString()} tokens`
+    // 4. Otherwise, treat as search query
+    removed = workspaceCache.detachByQuery(args.what)
+    if (removed === 0) {
+      return `No chunks found attached by query "${args.what}".\n\nTip: Use workspace_list() to see what's in workspace.`
+    }
+    return `Removed ${removed} chunk(s) from search "${args.what}".\nWorkspace: ${workspaceCache.size} chunks, ${workspaceCache.totalTokens.toLocaleString()} tokens`
   },
 })

package/vectorizer/chunkers/markdown-chunker.ts CHANGED Viewed

@@ -10,6 +10,7 @@ export interface MarkdownChunkConfig {
   max_chunk_size: number   // split sections larger than this (chars)
   split_by_headings: boolean
   preserve_heading_hierarchy: boolean
+  skip_low_priority: boolean  // Skip low-priority sections (SQL, aggregates, etc.)
 }
 export const DEFAULT_MD_CONFIG: MarkdownChunkConfig = {
@@ -17,6 +18,7 @@ export const DEFAULT_MD_CONFIG: MarkdownChunkConfig = {
   max_chunk_size: 8000,  // Large chunks for docs (SQL schemas, API specs, etc.)
   split_by_headings: true,
   preserve_heading_hierarchy: true,
+  skip_low_priority: true,  // Skip SQL schemas, aggregates, views by default
 }
 export interface MarkdownChunk {
@@ -24,6 +26,7 @@ export interface MarkdownChunk {
   heading_context: string   // "H1 > H2 > H3"
   start_line?: number
   end_line?: number
+  priority?: "high" | "normal" | "low"  // Chunk priority for ranking
 }
 // ── Internal types ──────────────────────────────────────────────────────────
@@ -34,6 +37,55 @@ interface Section {
   body: string
   start_line: number
   end_line: number
+  priority: "high" | "normal" | "low"
+}
+// ── Priority detection ──────────────────────────────────────────────────────
+/**
+ * Detect if heading indicates low-priority content (SQL schemas, aggregates, etc.)
+ * These sections are often "noise" when searching for business logic.
+ */
+function isLowPriorityHeading(heading: string): boolean {
+  const lower = heading.toLowerCase()
+  // SQL-related sections (schemas, DDL, migrations)
+  if (lower.includes("sql schema") ||
+      lower.includes("database schema") ||
+      lower.includes("continuous aggregate") ||
+      lower.includes("materialized view") ||
+      lower.includes("ddl") ||
+      lower.includes("migration")) {
+    return true
+  }
+  // Generated/auto-generated content
+  if (lower.includes("auto-generated") ||
+      lower.includes("generated schema") ||
+      lower.includes("api reference") && lower.includes("generated")) {
+    return true
+  }
+  // Large reference tables (often boilerplate)
+  if (lower.includes("full reference") ||
+      lower.includes("complete list") ||
+      lower.includes("all endpoints")) {
+    return true
+  }
+  return false
+}
+/** Determine priority level for a section based on heading and context. */
+function getSectionPriority(heading: string, body: string): "high" | "normal" | "low" {
+  if (isLowPriorityHeading(heading)) return "low"
+  // High-priority: short sections with code examples (tutorials, guides)
+  if (body.includes("```") && body.length < 2000) {
+    return "high"
+  }
+  return "normal"
 }
 // ── Parsing ─────────────────────────────────────────────────────────────────
@@ -42,15 +94,16 @@ interface Section {
 function parseSections(content: string): Section[] {
   const lines = content.split("\n")
   const sections: Section[] = []
-  let currentSection: Section = { level: 0, heading: "", body: "", start_line: 0, end_line: 0 }
+  let currentSection: Section = { level: 0, heading: "", body: "", start_line: 0, end_line: 0, priority: "normal" }
   for (let i = 0; i < lines.length; i++) {
     const line = lines[i]
     const headingMatch = line.match(/^(#{1,6})\s+(.+)$/)
     if (headingMatch) {
-      // Push previous section
+      // Push previous section (with priority calculated)
       if (currentSection.body.trim() || currentSection.heading) {
         currentSection.end_line = i - 1
+        currentSection.priority = getSectionPriority(currentSection.heading, currentSection.body)
         sections.push(currentSection)
       }
       currentSection = {
@@ -59,15 +112,17 @@ function parseSections(content: string): Section[] {
         body: "",
         start_line: i,
         end_line: 0,
+        priority: "normal",  // Will be calculated when section ends
       }
     } else {
       currentSection.body += line + "\n"
     }
   }
-  // Push last section
+  // Push last section (with priority calculated)
   if (currentSection.body.trim() || currentSection.heading) {
     currentSection.end_line = lines.length - 1
+    currentSection.priority = getSectionPriority(currentSection.heading, currentSection.body)
     sections.push(currentSection)
   }
@@ -191,12 +246,18 @@ export function chunkMarkdown(
       heading_context: headingContext,
       start_line: section.start_line,
       end_line: section.end_line,
+      priority: section.priority,
     })
   }
+  // Filter low-priority sections if configured
+  const filteredChunks = config.skip_low_priority
+    ? rawChunks.filter(chunk => chunk.priority !== "low")
+    : rawChunks
   // Merge small sections with previous
   const merged: MarkdownChunk[] = []
-  for (const chunk of rawChunks) {
+  for (const chunk of filteredChunks) {
     if (
       merged.length > 0 &&
       chunk.content.length < config.min_chunk_size
@@ -211,6 +272,10 @@ export function chunkMarkdown(
       if (chunk.heading_context) {
         prev.heading_context = chunk.heading_context
       }
+      // Keep highest priority (high > normal > low)
+      if (chunk.priority === "high" || (chunk.priority === "normal" && prev.priority === "low")) {
+        prev.priority = chunk.priority
+      }
     } else {
       merged.push({ ...chunk })
     }
@@ -227,6 +292,7 @@ export function chunkMarkdown(
           heading_context: chunk.heading_context,
           start_line: part.start_line,
           end_line: part.end_line,
+          priority: chunk.priority,  // Inherit priority from parent chunk
         })
       }
     } else {