npm - @comfanion/usethis_search - Versions diffs - 3.0.1 → 4.1.0-dev.2 - Mend

@comfanion/usethis_search 3.0.1 → 4.1.0-dev.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/cache/manager.ts +751 -0
package/hooks/message-before.ts +261 -0
package/hooks/tool-substitution.ts +261 -0
package/hooks/types.ts +23 -0
package/index.ts +63 -1
package/package.json +7 -2
package/tools/search.ts +154 -63
package/tools/workspace.ts +210 -0
package/vectorizer/index.ts +47 -1
package/vectorizer.yaml +11 -0

package/tools/search.ts CHANGED Viewed

@@ -1,8 +1,11 @@
 /**
- * Semantic Code Search Tool (v3)
+ * Semantic Code Search Tool (v4 — workspace injection)
  *
  * Uses local embeddings + LanceDB vector store via bundled vectorizer.
- * v3: simplified agent API — 5 params, config-driven defaults, smart filter.
+ * v4: Top results + graph relations attached to workspace with full content.
+ *     Rest returned as summary only.
+ *     AI sees full files via message.before injection — no read() needed.
+ *
  * Index data is stored in `.opencode/vectors/<index>/`.
  */
@@ -11,6 +14,7 @@ import path from "path"
 import fs from "fs/promises"
 import { CodebaseIndexer, getSearchConfig, getIndexer, releaseIndexer } from "../vectorizer/index.ts"
+import { workspaceCache } from "../cache/manager.ts"
 // ── Extension → language mapping (for filter parsing) ─────────────────────
 const EXT_TO_LANG: Record<string, string> = {
@@ -88,7 +92,7 @@ function parseFilter(filter: string): {
 }
 export default tool({
-  description: `Search the codebase semantically. Use this to find relevant code snippets, functions, or files based on meaning, not just text matching.
+  description: `Search the codebase semantically. Top results are attached to workspace with full content (visible via context injection). Rest returned as summary.
 Available indexes:
 - "code" (default) - Source code files (*.js, *.ts, *.py, *.go, etc.)
@@ -123,6 +127,9 @@ Examples:
       const minScore = cfg.min_score ?? 0.35
       const includeArchived = cfg.include_archived ?? false
+      // Workspace config
+      const wsConfig = workspaceCache.getConfig()
       // Parse filter into path/language constraints
       const filterParsed = args.filter ? parseFilter(args.filter) : {}
@@ -209,8 +216,6 @@ Examples:
         const needle = filterParsed.pathContains.toLowerCase()
         allResults = allResults.filter(r => r.file && r.file.toLowerCase().includes(needle))
       }
-      // Language filter is already passed to searchOptions above, but double-check
-      // in case vectorizer didn't filter (e.g. docs index has no language field)
       if (filterParsed.language) {
         allResults = allResults.filter(r => !r.language || r.language === filterParsed.language || r.language === "unknown")
       }
@@ -265,84 +270,170 @@ Examples:
         return `No results found in ${scope}${filterNote} for: "${args.query}" (min score: ${minScore})\n\nTry:\n- Different keywords or phrasing\n- Remove or broaden the filter\n- search({ query: "...", searchAll: true })`
       }
-      // ── Confidence signal ──────────────────────────────────────────────────
+      // ══════════════════════════════════════════════════════════════════════
+      // WORKSPACE ATTACH: Top N main files + graph relations (FULL CONTENT)
+      // ══════════════════════════════════════════════════════════════════════
+      const topGroups = sortedGroups.slice(0, wsConfig.attachTopN)
+      const restGroups = sortedGroups.slice(wsConfig.attachTopN)
+      const attachedMain: string[] = []
+      const attachedGraph: string[] = []
+      const alreadyAttached = new Set<string>()
+      for (const { best: r } of topGroups) {
+        // Skip if score too low
+        if ((r._finalScore ?? 0) < wsConfig.minScoreMain) continue
+        // Read full file and attach
+        try {
+          const fullPath = path.join(projectRoot, r.file)
+          const content = await fs.readFile(fullPath, "utf-8")
+          workspaceCache.attach({
+            path: r.file,
+            content,
+            role: "search-main",
+            attachedAt: Date.now(),
+            attachedBy: args.query,
+            score: r._finalScore,
+            metadata: {
+              language: r.language,
+              function_name: r.function_name,
+              class_name: r.class_name,
+              heading_context: r.heading_context,
+            },
+          })
+          attachedMain.push(r.file)
+          alreadyAttached.add(r.file)
+        } catch {
+          // File read failed — skip
+          continue
+        }
+        // Attach graph relations (imports, extends, used_by)
+        if (r.relatedContext && r.relatedContext.length > 0) {
+          const topRelated = r.relatedContext
+            .filter((rel: any) => rel.score >= wsConfig.minScoreRelated)
+            .sort((a: any, b: any) => b.score - a.score)
+            .slice(0, wsConfig.attachRelatedPerFile)
+          for (const rel of topRelated) {
+            if (alreadyAttached.has(rel.file)) continue
+            try {
+              const relFullPath = path.join(projectRoot, rel.file)
+              const relContent = await fs.readFile(relFullPath, "utf-8")
+              workspaceCache.attach({
+                path: rel.file,
+                content: relContent,
+                role: "search-graph",
+                attachedAt: Date.now(),
+                attachedBy: `${args.query} (${rel.relation} from ${r.file})`,
+                score: rel.score,
+                metadata: {
+                  language: rel.language,
+                  relation: rel.relation,
+                  mainFile: r.file,
+                },
+              })
+              attachedGraph.push(rel.file)
+              alreadyAttached.add(rel.file)
+            } catch {
+              // Related file read failed — skip
+            }
+          }
+        }
+      }
+      // ── Flush workspace to disk immediately (don't rely on debounce) ─────
+      if (attachedMain.length > 0 || attachedGraph.length > 0) {
+        workspaceCache.save().catch(() => {})
+      }
+      // ══════════════════════════════════════════════════════════════════════
+      // BUILD OUTPUT: Attached (summary) + Rest (summary only)
+      // ══════════════════════════════════════════════════════════════════════
       const topScore = sortedGroups[0].best._finalScore ?? 0
       const hasBM25Only = allResults.some((r: any) => r._bm25Only)
       const scope = args.searchAll ? "all indexes" : `index "${indexName}"`
       const filterLabel = args.filter ? ` filter:"${args.filter}"` : ""
-      let output = `## Search Results for: "${args.query}" (${scope}${filterLabel})\n\n`
+      let output = `## Search: "${args.query}" (${scope}${filterLabel})\n\n`
       if (hasBM25Only) {
-        output += `> **BM25-only mode** — vector embeddings not yet available. Results are keyword-based. Quality will improve after embedding completes.\n\n`
+        output += `> **BM25-only mode** -- vector embeddings not yet available. Quality will improve after embedding completes.\n\n`
       }
       if (topScore < 0.45) {
-        output += `> **Low confidence results.** Best score: ${topScore.toFixed(3)}. These results may not be relevant to your query.\n> Try more specific keywords or different phrasing.\n\n`
+        output += `> **Low confidence.** Best score: ${topScore.toFixed(3)}. Try more specific keywords.\n\n`
       }
-      for (let i = 0; i < sortedGroups.length; i++) {
-        const { best: r, chunks } = sortedGroups[i]
-        const score = (r._finalScore ?? 0).toFixed(3)
-        const indexLabel = args.searchAll ? ` [${r._index}]` : ""
-        const chunkNote = chunks.length > 1 ? ` (${chunks.length} matching sections)` : ""
-        // Rich metadata
-        const metaParts: string[] = []
-        if (r.language && r.language !== "unknown") metaParts.push(r.language)
-        if (r.heading_context) metaParts.push(`"${r.heading_context}"`)
-        if (r.function_name) metaParts.push(`fn: ${r.function_name}`)
-        if (r.class_name) metaParts.push(`class: ${r.class_name}`)
-        const metaLine = metaParts.length > 0 ? ` (${metaParts.join(", ")})` : ""
-        // Score breakdown
-        const breakdownParts: string[] = r._bm25Only
-          ? [`bm25: ${(r._bm25Component ?? 0).toFixed(2)}`]
-          : [`vec: ${(r._vectorScore ?? 0).toFixed(2)}`]
-        if (!r._bm25Only && r._bm25Component > 0.005) breakdownParts.push(`bm25: +${r._bm25Component.toFixed(2)}`)
-        if (r._keywordBonus > 0.005) breakdownParts.push(`kw: +${r._keywordBonus.toFixed(2)}`)
-        const breakdown = breakdownParts.join(", ")
-        // Matched keywords
-        const kwDisplay = r._matchedKeywords && r._matchedKeywords.length > 0
-          ? ` | matched: "${r._matchedKeywords.join('", "')}"`
-          : ""
-        output += `### ${i + 1}. ${r.file}${indexLabel}${chunkNote}\n`
-        output += `**Score:** ${score} (${breakdown}${kwDisplay})${metaLine}\n\n`
-        output += "```\n"
-        const content = r.content.length > 500 ? r.content.substring(0, 500) + "\n... (truncated)" : r.content
-        output += content
-        output += "\n```\n"
-        // Second-best chunk hint
-        if (chunks.length > 1) {
-          const second = chunks.find((c: any) => c !== r)
-          if (second) {
-            const secMeta: string[] = []
-            if (second.function_name) secMeta.push(`fn: ${second.function_name}`)
-            if (second.heading_context) secMeta.push(`"${second.heading_context}"`)
-            const secLabel = secMeta.length > 0 ? ` ${secMeta.join(", ")}` : ""
-            output += `\n*Also:${secLabel}*\n`
-          }
+      // ── Attached files (summary — full content in workspace injection) ─────
+      if (attachedMain.length > 0) {
+        const totalAttached = attachedMain.length + attachedGraph.length
+        output += `### Attached to workspace (${totalAttached} files)\n\n`
+        for (let i = 0; i < attachedMain.length; i++) {
+          const group = topGroups.find(g => g.best.file === attachedMain[i])
+          if (!group) continue
+          const r = group.best
+          const score = (r._finalScore ?? 0).toFixed(3)
+          const chunkNote = group.chunks.length > 1 ? ` (${group.chunks.length} sections)` : ""
+          const metaParts: string[] = []
+          if (r.language && r.language !== "unknown") metaParts.push(r.language)
+          if (r.function_name) metaParts.push(`fn: ${r.function_name}`)
+          if (r.class_name) metaParts.push(`class: ${r.class_name}`)
+          const metaLine = metaParts.length > 0 ? ` — ${metaParts.join(", ")}` : ""
+          output += `${i + 1}. **${r.file}** score: ${score}${chunkNote}${metaLine}\n`
         }
-        if (r.relatedContext && r.relatedContext.length > 0) {
-          output += "\n**Related Context:**\n"
-          for (const rel of r.relatedContext) {
-            const snippet = rel.content.length > 200
-              ? rel.content.substring(0, 200) + "..."
-              : rel.content
-            output += `- **${rel.file}** (${rel.relation}, via ${rel.via}, score: ${rel.score.toFixed(2)})\n`
-            output += `  \`\`\`\n  ${snippet}\n  \`\`\`\n`
+        if (attachedGraph.length > 0) {
+          output += `\n**Graph relations:**\n`
+          for (const graphFile of attachedGraph) {
+            const entry = workspaceCache.get(graphFile)
+            const relation = entry?.metadata?.relation || "related"
+            const mainFile = entry?.metadata?.mainFile
+            const mainBasename = mainFile ? path.basename(mainFile) : "?"
+            output += `- ${graphFile} (${relation} from ${mainBasename})\n`
           }
         }
+        output += `\n`
+      }
-        output += "\n"
+      // ── Rest files (summary only — not attached) ──────────────────────────
+      if (restGroups.length > 0) {
+        output += `### Additional results (summary only)\n\n`
+        for (let i = 0; i < restGroups.length; i++) {
+          const { best: r, chunks } = restGroups[i]
+          const score = (r._finalScore ?? 0).toFixed(3)
+          const chunkNote = chunks.length > 1 ? ` (${chunks.length} sections)` : ""
+          const indexLabel = args.searchAll ? ` [${r._index}]` : ""
+          const metaParts: string[] = []
+          if (r.language && r.language !== "unknown") metaParts.push(r.language)
+          if (r.function_name) metaParts.push(`fn: ${r.function_name}`)
+          if (r.class_name) metaParts.push(`class: ${r.class_name}`)
+          const metaLine = metaParts.length > 0 ? ` — ${metaParts.join(", ")}` : ""
+          output += `${attachedMain.length + i + 1}. ${r.file}${indexLabel} score: ${score}${chunkNote}${metaLine}\n`
+        }
+        output += `\nUse \`workspace.attach("path")\` to attach additional files.\n`
       }
+      // ── Footer ────────────────────────────────────────────────────────────
       const totalChunks = allResults.length
       const uniqueFiles = sortedGroups.length
-      output += `---\n*${uniqueFiles} files (${totalChunks} chunks). Use Read tool to see full files.*`
+      output += `\n---\n`
+      output += `*${uniqueFiles} files (${totalChunks} chunks) | `
+      output += `Workspace: ${workspaceCache.size} files, ${workspaceCache.totalTokens.toLocaleString()} tokens*\n`
+      output += `*Attached files are in workspace context — reference them directly without read().*`
       return output
     } catch (error: any) {
       return `Search failed: ${error.message || String(error)}`

package/tools/workspace.ts ADDED Viewed

@@ -0,0 +1,210 @@
+/**
+ * Workspace Management Tools
+ *
+ * Manual control over the workspace cache:
+ *   workspace_list    — show all attached files + stats
+ *   workspace_attach  — manually attach a file by path
+ *   workspace_detach  — remove file(s) from workspace
+ *   workspace_clear   — remove all files
+ *   workspace_restore — restore a saved session snapshot
+ */
+import { tool } from "@opencode-ai/plugin"
+import path from "path"
+import fs from "fs/promises"
+import { workspaceCache } from "../cache/manager.ts"
+// ── workspace.list ──────────────────────────────────────────────────────────
+export const workspace_list = tool({
+  description: `List all files currently in workspace context. Shows file paths, roles, scores, and token counts.`,
+  args: {},
+  async execute() {
+    const entries = workspaceCache.getAll()
+    if (entries.length === 0) {
+      return `Workspace is empty.\n\nUse search() to find and attach files, or workspace.attach("path") to add manually.`
+    }
+    const sessionId = workspaceCache.getSessionId()
+    let output = `## Workspace Status\n\n`
+    if (sessionId) {
+      output += `Session: ${sessionId}\n`
+    }
+    output += `Files: ${workspaceCache.size}\n`
+    output += `Total tokens: ${workspaceCache.totalTokens.toLocaleString()}\n\n`
+    const mainFiles = entries.filter(e => e.role === "search-main")
+    const graphFiles = entries.filter(e => e.role === "search-graph")
+    const manualFiles = entries.filter(e => e.role === "manual")
+    if (mainFiles.length > 0) {
+      output += `### Search results (${mainFiles.length})\n`
+      for (const e of mainFiles) {
+        const age = Math.floor((Date.now() - e.attachedAt) / 1000 / 60)
+        const score = e.score ? ` score: ${e.score.toFixed(3)}` : ""
+        const meta = e.metadata?.function_name || e.metadata?.class_name || ""
+        output += `- **${e.path}** — ${e.tokens.toLocaleString()} tok${score}${meta ? ` (${meta})` : ""} — ${age}m ago\n`
+        if (e.attachedBy && e.attachedBy !== "manual") {
+          output += `  query: "${e.attachedBy}"\n`
+        }
+      }
+      output += `\n`
+    }
+    if (graphFiles.length > 0) {
+      output += `### Graph relations (${graphFiles.length})\n`
+      for (const e of graphFiles) {
+        const age = Math.floor((Date.now() - e.attachedAt) / 1000 / 60)
+        const relation = e.metadata?.relation || "related"
+        const mainFile = e.metadata?.mainFile ? path.basename(e.metadata.mainFile) : "?"
+        output += `- **${e.path}** — ${e.tokens.toLocaleString()} tok — ${relation} from ${mainFile} — ${age}m ago\n`
+      }
+      output += `\n`
+    }
+    if (manualFiles.length > 0) {
+      output += `### Manually attached (${manualFiles.length})\n`
+      for (const e of manualFiles) {
+        const age = Math.floor((Date.now() - e.attachedAt) / 1000 / 60)
+        output += `- **${e.path}** — ${e.tokens.toLocaleString()} tok — ${age}m ago\n`
+      }
+      output += `\n`
+    }
+    // Budget info
+    const config = workspaceCache.getConfig()
+    const pct = Math.round((workspaceCache.totalTokens / config.maxTokens) * 100)
+    output += `---\n`
+    output += `*Budget: ${workspaceCache.totalTokens.toLocaleString()} / ${config.maxTokens.toLocaleString()} tokens (${pct}%) | `
+    output += `${workspaceCache.size} / ${config.maxFiles} files*`
+    return output
+  },
+})
+// ── workspace.attach ────────────────────────────────────────────────────────
+export const workspace_attach = tool({
+  description: `Manually attach a file to workspace context. The file will be visible in context injection without needing read().`,
+  args: {
+    filePath: tool.schema.string().describe("Relative file path to attach (e.g. 'src/auth/login.ts')"),
+  },
+  async execute(args) {
+    const projectRoot = process.cwd()
+    // Check if already attached
+    if (workspaceCache.has(args.filePath)) {
+      const entry = workspaceCache.get(args.filePath)!
+      return `File "${args.filePath}" is already in workspace.\nRole: ${entry.role} | Tokens: ${entry.tokens.toLocaleString()} | Score: ${entry.score?.toFixed(3) ?? "n/a"}`
+    }
+    // Read file content
+    try {
+      const fullPath = path.join(projectRoot, args.filePath)
+      const content = await fs.readFile(fullPath, "utf-8")
+      workspaceCache.attach({
+        path: args.filePath,
+        content,
+        role: "manual",
+        attachedAt: Date.now(),
+        attachedBy: "manual",
+      })
+      return `Attached "${args.filePath}" to workspace.\nTokens: ${workspaceCache.get(args.filePath)!.tokens.toLocaleString()}\nWorkspace total: ${workspaceCache.totalTokens.toLocaleString()} tokens (${workspaceCache.size} files)`
+    } catch (error: any) {
+      return `Failed to attach "${args.filePath}": ${error.message || String(error)}`
+    }
+  },
+})
+// ── workspace.detach ────────────────────────────────────────────────────────
+export const workspace_detach = tool({
+  description: `Remove file(s) from workspace context. Can detach by path, by search query, or by age.`,
+  args: {
+    filePath: tool.schema.string().optional().describe("Specific file path to remove"),
+    query: tool.schema.string().optional().describe("Remove all files attached by this search query"),
+    olderThan: tool.schema.number().optional().describe("Remove files older than N minutes"),
+  },
+  async execute(args) {
+    let removed = 0
+    if (args.filePath) {
+      removed = workspaceCache.detach(args.filePath) ? 1 : 0
+      if (removed === 0) {
+        return `File "${args.filePath}" not found in workspace.`
+      }
+    } else if (args.query) {
+      removed = workspaceCache.detachByQuery(args.query)
+    } else if (args.olderThan) {
+      removed = workspaceCache.detachOlderThan(args.olderThan * 60 * 1000)
+    } else {
+      return `Specify filePath, query, or olderThan to detach files.`
+    }
+    return `Removed ${removed} file(s) from workspace.\nWorkspace: ${workspaceCache.size} files, ${workspaceCache.totalTokens.toLocaleString()} tokens`
+  },
+})
+// ── workspace.clear ─────────────────────────────────────────────────────────
+export const workspace_clear = tool({
+  description: `Remove ALL files from workspace context. Use when switching tasks or starting fresh.`,
+  args: {},
+  async execute() {
+    const count = workspaceCache.size
+    const tokens = workspaceCache.totalTokens
+    workspaceCache.clear()
+    return `Cleared workspace: ${count} files removed (${tokens.toLocaleString()} tokens freed).\nWorkspace is now empty.`
+  },
+})
+// ── workspace.restore ───────────────────────────────────────────────────────
+export const workspace_restore = tool({
+  description: `Restore workspace from a saved session snapshot. Use after compaction or to switch context.`,
+  args: {
+    sessionId: tool.schema.string().optional().describe("Session ID to restore. If not provided, lists available snapshots."),
+  },
+  async execute(args) {
+    if (!args.sessionId) {
+      // List available snapshots
+      const snapshots = await workspaceCache.listSnapshots()
+      if (snapshots.length === 0) {
+        return `No saved workspace snapshots found.`
+      }
+      let output = `## Saved Workspace Snapshots\n\n`
+      for (const snap of snapshots) {
+        const date = new Date(snap.savedAt).toLocaleString()
+        output += `- **${snap.id}** — ${snap.fileCount} files, ${snap.totalTokens.toLocaleString()} tokens — ${date}\n`
+      }
+      output += `\nUse \`workspace.restore("session-id")\` to restore.`
+      return output
+    }
+    // Restore specific snapshot
+    const restored = await workspaceCache.restore(args.sessionId)
+    if (!restored) {
+      return `Snapshot "${args.sessionId}" not found or empty.`
+    }
+    return `Restored workspace from "${args.sessionId}".\nFiles: ${workspaceCache.size}\nTokens: ${workspaceCache.totalTokens.toLocaleString()}`
+  },
+})

package/vectorizer/index.ts CHANGED Viewed

@@ -95,6 +95,20 @@ const DEFAULT_SEARCH_CONFIG = {
 };
 let SEARCH_CONFIG = { ...DEFAULT_SEARCH_CONFIG };
+// ── Workspace injection config (v4) ─────────────────────────────────────────
+const DEFAULT_WORKSPACE_CONFIG = {
+  maxTokens: 50_000,           // Max total tokens across all cached files
+  maxFiles: 30,                // Max number of files in workspace
+  attachTopN: 5,               // Top N search results to attach with full content
+  attachRelatedPerFile: 3,     // Max graph relations per main file
+  minScoreMain: 0.65,          // Min score for main files
+  minScoreRelated: 0.5,        // Min score for graph relations
+  persistContent: false,       // Save full content in snapshots (debug mode)
+  autoPruneSearch: true,       // Replace old search outputs with compact summaries
+  substituteToolOutputs: true, // Replace tool outputs when files in workspace
+};
+let WORKSPACE_CONFIG = { ...DEFAULT_WORKSPACE_CONFIG };
 // ── Graph config (v3) ───────────────────────────────────────────────────────
 const DEFAULT_GRAPH_CONFIG = {
   enabled: true,
@@ -162,6 +176,18 @@ function defaultVectorizerYaml() {
     `      timeout_ms: 5000\n` +
     `    read_intercept: true\n` +
     `\n` +
+    `  # Workspace injection (v4)\n` +
+    `  workspace:\n` +
+    `    max_tokens: 50000            # Max total tokens across all cached files\n` +
+    `    max_files: 30                # Max number of files in workspace\n` +
+    `    attach_top_n: 5              # Top N search results to attach with full content\n` +
+    `    attach_related_per_file: 3   # Max graph relations per main file\n` +
+    `    min_score_main: 0.65         # Min score for main files\n` +
+    `    min_score_related: 0.5       # Min score for graph relations\n` +
+    `    persist_content: false        # Save full content in snapshots (debug mode)\n` +
+    `    auto_prune_search: true      # Replace old search outputs with compact summaries\n` +
+    `    substitute_tool_outputs: true # Replace tool outputs when files in workspace\n` +
+    `\n` +
     `  # Quality monitoring\n` +
     `  quality:\n` +
     `    enable_metrics: false\n` +
@@ -320,6 +346,22 @@ async function loadConfig(projectRoot) {
       SEARCH_CONFIG.default_limit = parseNumber(ss, "default_limit", DEFAULT_SEARCH_CONFIG.default_limit);
     }
+    // ── Parse workspace injection config (v4) ──────────────────────────────
+    const workspaceMatch = section.match(/^\s{2}workspace:\s*\n([\s\S]*?)(?=^\s{2}[a-zA-Z_\-]+:|(?![\s\S]))/m);
+    if (workspaceMatch) {
+      const ws = workspaceMatch[1];
+      WORKSPACE_CONFIG.maxTokens = parseNumber(ws, "max_tokens", DEFAULT_WORKSPACE_CONFIG.maxTokens);
+      WORKSPACE_CONFIG.maxFiles = parseNumber(ws, "max_files", DEFAULT_WORKSPACE_CONFIG.maxFiles);
+      WORKSPACE_CONFIG.attachTopN = parseNumber(ws, "attach_top_n", DEFAULT_WORKSPACE_CONFIG.attachTopN);
+      WORKSPACE_CONFIG.attachRelatedPerFile = parseNumber(ws, "attach_related_per_file", DEFAULT_WORKSPACE_CONFIG.attachRelatedPerFile);
+      WORKSPACE_CONFIG.minScoreMain = parseNumber(ws, "min_score_main", DEFAULT_WORKSPACE_CONFIG.minScoreMain);
+      WORKSPACE_CONFIG.minScoreRelated = parseNumber(ws, "min_score_related", DEFAULT_WORKSPACE_CONFIG.minScoreRelated);
+      WORKSPACE_CONFIG.persistContent = parseBool(ws, "persist_content", DEFAULT_WORKSPACE_CONFIG.persistContent);
+      WORKSPACE_CONFIG.autoPruneSearch = parseBool(ws, "auto_prune_search", DEFAULT_WORKSPACE_CONFIG.autoPruneSearch);
+      WORKSPACE_CONFIG.substituteToolOutputs = parseBool(ws, "substitute_tool_outputs", DEFAULT_WORKSPACE_CONFIG.substituteToolOutputs);
+      if (DEBUG) console.log("[vectorizer] Workspace config:", WORKSPACE_CONFIG);
+    }
     // ── Parse quality config ────────────────────────────────────────────────
     const qualityMatch = section.match(/^\s{2}quality:\s*\n([\s\S]*?)(?=^\s{2}[a-zA-Z_\-]+:|(?![\s\S]))/m);
     if (qualityMatch) {
@@ -1683,6 +1725,10 @@ function getSearchConfig() {
   return SEARCH_CONFIG;
 }
+function getWorkspaceConfig() {
+  return WORKSPACE_CONFIG;
+}
 // ── Singleton indexer pool ──────────────────────────────────────────────────
 // Prevents LevelDB lock conflicts when parallel searches hit the same index.
 // Each unique (projectRoot, indexName) gets one shared CodebaseIndexer.
@@ -1744,4 +1790,4 @@ async function destroyIndexer(projectRoot: string, indexName: string = "code") {
   }
 }
-export { CodebaseIndexer, INDEX_PRESETS, getEmbeddingModel, getSearchConfig, getIndexer, releaseIndexer, destroyIndexer };
+export { CodebaseIndexer, INDEX_PRESETS, getEmbeddingModel, getSearchConfig, getWorkspaceConfig, getIndexer, releaseIndexer, destroyIndexer };

package/vectorizer.yaml CHANGED Viewed

@@ -55,6 +55,17 @@ vectorizer:
     # Read() intercept
     read_intercept: true
+  # Workspace injection (v4) — search results attached to AI context
+  workspace:
+    max_tokens: 50000          # Max total tokens across all cached files
+    max_files: 30              # Max number of files in workspace
+    attach_top_n: 5            # Top N search results to attach with full content
+    attach_related_per_file: 3 # Max graph relations per main file
+    min_score_main: 0.65       # Min score for main files
+    min_score_related: 0.5     # Min score for graph relations
+    persist_content: false     # Save full content in snapshots (debug mode)
+    auto_prune_search: true    # Replace old search outputs with compact summaries
   # Quality monitoring (v2)
   quality:
     enable_metrics: false   # Track search quality metrics