@comfanion/usethis_search 3.0.1 → 4.1.0-dev.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/tools/search.ts CHANGED
@@ -1,8 +1,11 @@
1
1
  /**
2
- * Semantic Code Search Tool (v3)
2
+ * Semantic Code Search Tool (v4 — workspace injection)
3
3
  *
4
4
  * Uses local embeddings + LanceDB vector store via bundled vectorizer.
5
- * v3: simplified agent API 5 params, config-driven defaults, smart filter.
5
+ * v4: Top results + graph relations attached to workspace with full content.
6
+ * Rest returned as summary only.
7
+ * AI sees full files via message.before injection — no read() needed.
8
+ *
6
9
  * Index data is stored in `.opencode/vectors/<index>/`.
7
10
  */
8
11
 
@@ -11,6 +14,7 @@ import path from "path"
11
14
  import fs from "fs/promises"
12
15
 
13
16
  import { CodebaseIndexer, getSearchConfig, getIndexer, releaseIndexer } from "../vectorizer/index.ts"
17
+ import { workspaceCache } from "../cache/manager.ts"
14
18
 
15
19
  // ── Extension → language mapping (for filter parsing) ─────────────────────
16
20
  const EXT_TO_LANG: Record<string, string> = {
@@ -88,7 +92,7 @@ function parseFilter(filter: string): {
88
92
  }
89
93
 
90
94
  export default tool({
91
- description: `Search the codebase semantically. Use this to find relevant code snippets, functions, or files based on meaning, not just text matching.
95
+ description: `Search the codebase semantically. Top results are attached to workspace with full content (visible via context injection). Rest returned as summary.
92
96
 
93
97
  Available indexes:
94
98
  - "code" (default) - Source code files (*.js, *.ts, *.py, *.go, etc.)
@@ -123,6 +127,9 @@ Examples:
123
127
  const minScore = cfg.min_score ?? 0.35
124
128
  const includeArchived = cfg.include_archived ?? false
125
129
 
130
+ // Workspace config
131
+ const wsConfig = workspaceCache.getConfig()
132
+
126
133
  // Parse filter into path/language constraints
127
134
  const filterParsed = args.filter ? parseFilter(args.filter) : {}
128
135
 
@@ -209,8 +216,6 @@ Examples:
209
216
  const needle = filterParsed.pathContains.toLowerCase()
210
217
  allResults = allResults.filter(r => r.file && r.file.toLowerCase().includes(needle))
211
218
  }
212
- // Language filter is already passed to searchOptions above, but double-check
213
- // in case vectorizer didn't filter (e.g. docs index has no language field)
214
219
  if (filterParsed.language) {
215
220
  allResults = allResults.filter(r => !r.language || r.language === filterParsed.language || r.language === "unknown")
216
221
  }
@@ -265,84 +270,170 @@ Examples:
265
270
  return `No results found in ${scope}${filterNote} for: "${args.query}" (min score: ${minScore})\n\nTry:\n- Different keywords or phrasing\n- Remove or broaden the filter\n- search({ query: "...", searchAll: true })`
266
271
  }
267
272
 
268
- // ── Confidence signal ──────────────────────────────────────────────────
273
+ // ══════════════════════════════════════════════════════════════════════
274
+ // WORKSPACE ATTACH: Top N main files + graph relations (FULL CONTENT)
275
+ // ══════════════════════════════════════════════════════════════════════
276
+
277
+ const topGroups = sortedGroups.slice(0, wsConfig.attachTopN)
278
+ const restGroups = sortedGroups.slice(wsConfig.attachTopN)
279
+
280
+ const attachedMain: string[] = []
281
+ const attachedGraph: string[] = []
282
+ const alreadyAttached = new Set<string>()
283
+
284
+ for (const { best: r } of topGroups) {
285
+ // Skip if score too low
286
+ if ((r._finalScore ?? 0) < wsConfig.minScoreMain) continue
287
+
288
+ // Read full file and attach
289
+ try {
290
+ const fullPath = path.join(projectRoot, r.file)
291
+ const content = await fs.readFile(fullPath, "utf-8")
292
+
293
+ workspaceCache.attach({
294
+ path: r.file,
295
+ content,
296
+ role: "search-main",
297
+ attachedAt: Date.now(),
298
+ attachedBy: args.query,
299
+ score: r._finalScore,
300
+ metadata: {
301
+ language: r.language,
302
+ function_name: r.function_name,
303
+ class_name: r.class_name,
304
+ heading_context: r.heading_context,
305
+ },
306
+ })
307
+
308
+ attachedMain.push(r.file)
309
+ alreadyAttached.add(r.file)
310
+ } catch {
311
+ // File read failed — skip
312
+ continue
313
+ }
314
+
315
+ // Attach graph relations (imports, extends, used_by)
316
+ if (r.relatedContext && r.relatedContext.length > 0) {
317
+ const topRelated = r.relatedContext
318
+ .filter((rel: any) => rel.score >= wsConfig.minScoreRelated)
319
+ .sort((a: any, b: any) => b.score - a.score)
320
+ .slice(0, wsConfig.attachRelatedPerFile)
321
+
322
+ for (const rel of topRelated) {
323
+ if (alreadyAttached.has(rel.file)) continue
324
+
325
+ try {
326
+ const relFullPath = path.join(projectRoot, rel.file)
327
+ const relContent = await fs.readFile(relFullPath, "utf-8")
328
+
329
+ workspaceCache.attach({
330
+ path: rel.file,
331
+ content: relContent,
332
+ role: "search-graph",
333
+ attachedAt: Date.now(),
334
+ attachedBy: `${args.query} (${rel.relation} from ${r.file})`,
335
+ score: rel.score,
336
+ metadata: {
337
+ language: rel.language,
338
+ relation: rel.relation,
339
+ mainFile: r.file,
340
+ },
341
+ })
342
+
343
+ attachedGraph.push(rel.file)
344
+ alreadyAttached.add(rel.file)
345
+ } catch {
346
+ // Related file read failed — skip
347
+ }
348
+ }
349
+ }
350
+ }
351
+
352
+ // ── Flush workspace to disk immediately (don't rely on debounce) ─────
353
+ if (attachedMain.length > 0 || attachedGraph.length > 0) {
354
+ workspaceCache.save().catch(() => {})
355
+ }
356
+
357
+ // ══════════════════════════════════════════════════════════════════════
358
+ // BUILD OUTPUT: Attached (summary) + Rest (summary only)
359
+ // ══════════════════════════════════════════════════════════════════════
360
+
269
361
  const topScore = sortedGroups[0].best._finalScore ?? 0
270
362
  const hasBM25Only = allResults.some((r: any) => r._bm25Only)
271
363
  const scope = args.searchAll ? "all indexes" : `index "${indexName}"`
272
364
  const filterLabel = args.filter ? ` filter:"${args.filter}"` : ""
273
- let output = `## Search Results for: "${args.query}" (${scope}${filterLabel})\n\n`
365
+ let output = `## Search: "${args.query}" (${scope}${filterLabel})\n\n`
274
366
 
275
367
  if (hasBM25Only) {
276
- output += `> **BM25-only mode** vector embeddings not yet available. Results are keyword-based. Quality will improve after embedding completes.\n\n`
368
+ output += `> **BM25-only mode** -- vector embeddings not yet available. Quality will improve after embedding completes.\n\n`
277
369
  }
278
370
 
279
371
  if (topScore < 0.45) {
280
- output += `> **Low confidence results.** Best score: ${topScore.toFixed(3)}. These results may not be relevant to your query.\n> Try more specific keywords or different phrasing.\n\n`
372
+ output += `> **Low confidence.** Best score: ${topScore.toFixed(3)}. Try more specific keywords.\n\n`
281
373
  }
282
374
 
283
- for (let i = 0; i < sortedGroups.length; i++) {
284
- const { best: r, chunks } = sortedGroups[i]
285
- const score = (r._finalScore ?? 0).toFixed(3)
286
- const indexLabel = args.searchAll ? ` [${r._index}]` : ""
287
- const chunkNote = chunks.length > 1 ? ` (${chunks.length} matching sections)` : ""
288
-
289
- // Rich metadata
290
- const metaParts: string[] = []
291
- if (r.language && r.language !== "unknown") metaParts.push(r.language)
292
- if (r.heading_context) metaParts.push(`"${r.heading_context}"`)
293
- if (r.function_name) metaParts.push(`fn: ${r.function_name}`)
294
- if (r.class_name) metaParts.push(`class: ${r.class_name}`)
295
- const metaLine = metaParts.length > 0 ? ` (${metaParts.join(", ")})` : ""
296
-
297
- // Score breakdown
298
- const breakdownParts: string[] = r._bm25Only
299
- ? [`bm25: ${(r._bm25Component ?? 0).toFixed(2)}`]
300
- : [`vec: ${(r._vectorScore ?? 0).toFixed(2)}`]
301
- if (!r._bm25Only && r._bm25Component > 0.005) breakdownParts.push(`bm25: +${r._bm25Component.toFixed(2)}`)
302
- if (r._keywordBonus > 0.005) breakdownParts.push(`kw: +${r._keywordBonus.toFixed(2)}`)
303
- const breakdown = breakdownParts.join(", ")
304
-
305
- // Matched keywords
306
- const kwDisplay = r._matchedKeywords && r._matchedKeywords.length > 0
307
- ? ` | matched: "${r._matchedKeywords.join('", "')}"`
308
- : ""
309
-
310
- output += `### ${i + 1}. ${r.file}${indexLabel}${chunkNote}\n`
311
- output += `**Score:** ${score} (${breakdown}${kwDisplay})${metaLine}\n\n`
312
- output += "```\n"
313
- const content = r.content.length > 500 ? r.content.substring(0, 500) + "\n... (truncated)" : r.content
314
- output += content
315
- output += "\n```\n"
316
-
317
- // Second-best chunk hint
318
- if (chunks.length > 1) {
319
- const second = chunks.find((c: any) => c !== r)
320
- if (second) {
321
- const secMeta: string[] = []
322
- if (second.function_name) secMeta.push(`fn: ${second.function_name}`)
323
- if (second.heading_context) secMeta.push(`"${second.heading_context}"`)
324
- const secLabel = secMeta.length > 0 ? ` ${secMeta.join(", ")}` : ""
325
- output += `\n*Also:${secLabel}*\n`
326
- }
375
+ // ── Attached files (summary full content in workspace injection) ─────
376
+ if (attachedMain.length > 0) {
377
+ const totalAttached = attachedMain.length + attachedGraph.length
378
+ output += `### Attached to workspace (${totalAttached} files)\n\n`
379
+
380
+ for (let i = 0; i < attachedMain.length; i++) {
381
+ const group = topGroups.find(g => g.best.file === attachedMain[i])
382
+ if (!group) continue
383
+ const r = group.best
384
+ const score = (r._finalScore ?? 0).toFixed(3)
385
+ const chunkNote = group.chunks.length > 1 ? ` (${group.chunks.length} sections)` : ""
386
+
387
+ const metaParts: string[] = []
388
+ if (r.language && r.language !== "unknown") metaParts.push(r.language)
389
+ if (r.function_name) metaParts.push(`fn: ${r.function_name}`)
390
+ if (r.class_name) metaParts.push(`class: ${r.class_name}`)
391
+ const metaLine = metaParts.length > 0 ? ` ${metaParts.join(", ")}` : ""
392
+
393
+ output += `${i + 1}. **${r.file}** score: ${score}${chunkNote}${metaLine}\n`
327
394
  }
328
395
 
329
- if (r.relatedContext && r.relatedContext.length > 0) {
330
- output += "\n**Related Context:**\n"
331
- for (const rel of r.relatedContext) {
332
- const snippet = rel.content.length > 200
333
- ? rel.content.substring(0, 200) + "..."
334
- : rel.content
335
- output += `- **${rel.file}** (${rel.relation}, via ${rel.via}, score: ${rel.score.toFixed(2)})\n`
336
- output += ` \`\`\`\n ${snippet}\n \`\`\`\n`
396
+ if (attachedGraph.length > 0) {
397
+ output += `\n**Graph relations:**\n`
398
+ for (const graphFile of attachedGraph) {
399
+ const entry = workspaceCache.get(graphFile)
400
+ const relation = entry?.metadata?.relation || "related"
401
+ const mainFile = entry?.metadata?.mainFile
402
+ const mainBasename = mainFile ? path.basename(mainFile) : "?"
403
+ output += `- ${graphFile} (${relation} from ${mainBasename})\n`
337
404
  }
338
405
  }
406
+ output += `\n`
407
+ }
339
408
 
340
- output += "\n"
409
+ // ── Rest files (summary only — not attached) ──────────────────────────
410
+ if (restGroups.length > 0) {
411
+ output += `### Additional results (summary only)\n\n`
412
+ for (let i = 0; i < restGroups.length; i++) {
413
+ const { best: r, chunks } = restGroups[i]
414
+ const score = (r._finalScore ?? 0).toFixed(3)
415
+ const chunkNote = chunks.length > 1 ? ` (${chunks.length} sections)` : ""
416
+ const indexLabel = args.searchAll ? ` [${r._index}]` : ""
417
+
418
+ const metaParts: string[] = []
419
+ if (r.language && r.language !== "unknown") metaParts.push(r.language)
420
+ if (r.function_name) metaParts.push(`fn: ${r.function_name}`)
421
+ if (r.class_name) metaParts.push(`class: ${r.class_name}`)
422
+ const metaLine = metaParts.length > 0 ? ` — ${metaParts.join(", ")}` : ""
423
+
424
+ output += `${attachedMain.length + i + 1}. ${r.file}${indexLabel} score: ${score}${chunkNote}${metaLine}\n`
425
+ }
426
+ output += `\nUse \`workspace.attach("path")\` to attach additional files.\n`
341
427
  }
342
428
 
429
+ // ── Footer ────────────────────────────────────────────────────────────
343
430
  const totalChunks = allResults.length
344
431
  const uniqueFiles = sortedGroups.length
345
- output += `---\n*${uniqueFiles} files (${totalChunks} chunks). Use Read tool to see full files.*`
432
+ output += `\n---\n`
433
+ output += `*${uniqueFiles} files (${totalChunks} chunks) | `
434
+ output += `Workspace: ${workspaceCache.size} files, ${workspaceCache.totalTokens.toLocaleString()} tokens*\n`
435
+ output += `*Attached files are in workspace context — reference them directly without read().*`
436
+
346
437
  return output
347
438
  } catch (error: any) {
348
439
  return `Search failed: ${error.message || String(error)}`
@@ -0,0 +1,210 @@
1
+ /**
2
+ * Workspace Management Tools
3
+ *
4
+ * Manual control over the workspace cache:
5
+ * workspace_list — show all attached files + stats
6
+ * workspace_attach — manually attach a file by path
7
+ * workspace_detach — remove file(s) from workspace
8
+ * workspace_clear — remove all files
9
+ * workspace_restore — restore a saved session snapshot
10
+ */
11
+
12
+ import { tool } from "@opencode-ai/plugin"
13
+ import path from "path"
14
+ import fs from "fs/promises"
15
+
16
+ import { workspaceCache } from "../cache/manager.ts"
17
+
18
+ // ── workspace.list ──────────────────────────────────────────────────────────
19
+
20
+ export const workspace_list = tool({
21
+ description: `List all files currently in workspace context. Shows file paths, roles, scores, and token counts.`,
22
+
23
+ args: {},
24
+
25
+ async execute() {
26
+ const entries = workspaceCache.getAll()
27
+
28
+ if (entries.length === 0) {
29
+ return `Workspace is empty.\n\nUse search() to find and attach files, or workspace.attach("path") to add manually.`
30
+ }
31
+
32
+ const sessionId = workspaceCache.getSessionId()
33
+ let output = `## Workspace Status\n\n`
34
+ if (sessionId) {
35
+ output += `Session: ${sessionId}\n`
36
+ }
37
+ output += `Files: ${workspaceCache.size}\n`
38
+ output += `Total tokens: ${workspaceCache.totalTokens.toLocaleString()}\n\n`
39
+
40
+ const mainFiles = entries.filter(e => e.role === "search-main")
41
+ const graphFiles = entries.filter(e => e.role === "search-graph")
42
+ const manualFiles = entries.filter(e => e.role === "manual")
43
+
44
+ if (mainFiles.length > 0) {
45
+ output += `### Search results (${mainFiles.length})\n`
46
+ for (const e of mainFiles) {
47
+ const age = Math.floor((Date.now() - e.attachedAt) / 1000 / 60)
48
+ const score = e.score ? ` score: ${e.score.toFixed(3)}` : ""
49
+ const meta = e.metadata?.function_name || e.metadata?.class_name || ""
50
+ output += `- **${e.path}** — ${e.tokens.toLocaleString()} tok${score}${meta ? ` (${meta})` : ""} — ${age}m ago\n`
51
+ if (e.attachedBy && e.attachedBy !== "manual") {
52
+ output += ` query: "${e.attachedBy}"\n`
53
+ }
54
+ }
55
+ output += `\n`
56
+ }
57
+
58
+ if (graphFiles.length > 0) {
59
+ output += `### Graph relations (${graphFiles.length})\n`
60
+ for (const e of graphFiles) {
61
+ const age = Math.floor((Date.now() - e.attachedAt) / 1000 / 60)
62
+ const relation = e.metadata?.relation || "related"
63
+ const mainFile = e.metadata?.mainFile ? path.basename(e.metadata.mainFile) : "?"
64
+ output += `- **${e.path}** — ${e.tokens.toLocaleString()} tok — ${relation} from ${mainFile} — ${age}m ago\n`
65
+ }
66
+ output += `\n`
67
+ }
68
+
69
+ if (manualFiles.length > 0) {
70
+ output += `### Manually attached (${manualFiles.length})\n`
71
+ for (const e of manualFiles) {
72
+ const age = Math.floor((Date.now() - e.attachedAt) / 1000 / 60)
73
+ output += `- **${e.path}** — ${e.tokens.toLocaleString()} tok — ${age}m ago\n`
74
+ }
75
+ output += `\n`
76
+ }
77
+
78
+ // Budget info
79
+ const config = workspaceCache.getConfig()
80
+ const pct = Math.round((workspaceCache.totalTokens / config.maxTokens) * 100)
81
+ output += `---\n`
82
+ output += `*Budget: ${workspaceCache.totalTokens.toLocaleString()} / ${config.maxTokens.toLocaleString()} tokens (${pct}%) | `
83
+ output += `${workspaceCache.size} / ${config.maxFiles} files*`
84
+
85
+ return output
86
+ },
87
+ })
88
+
89
+ // ── workspace.attach ────────────────────────────────────────────────────────
90
+
91
+ export const workspace_attach = tool({
92
+ description: `Manually attach a file to workspace context. The file will be visible in context injection without needing read().`,
93
+
94
+ args: {
95
+ filePath: tool.schema.string().describe("Relative file path to attach (e.g. 'src/auth/login.ts')"),
96
+ },
97
+
98
+ async execute(args) {
99
+ const projectRoot = process.cwd()
100
+
101
+ // Check if already attached
102
+ if (workspaceCache.has(args.filePath)) {
103
+ const entry = workspaceCache.get(args.filePath)!
104
+ return `File "${args.filePath}" is already in workspace.\nRole: ${entry.role} | Tokens: ${entry.tokens.toLocaleString()} | Score: ${entry.score?.toFixed(3) ?? "n/a"}`
105
+ }
106
+
107
+ // Read file content
108
+ try {
109
+ const fullPath = path.join(projectRoot, args.filePath)
110
+ const content = await fs.readFile(fullPath, "utf-8")
111
+
112
+ workspaceCache.attach({
113
+ path: args.filePath,
114
+ content,
115
+ role: "manual",
116
+ attachedAt: Date.now(),
117
+ attachedBy: "manual",
118
+ })
119
+
120
+ return `Attached "${args.filePath}" to workspace.\nTokens: ${workspaceCache.get(args.filePath)!.tokens.toLocaleString()}\nWorkspace total: ${workspaceCache.totalTokens.toLocaleString()} tokens (${workspaceCache.size} files)`
121
+ } catch (error: any) {
122
+ return `Failed to attach "${args.filePath}": ${error.message || String(error)}`
123
+ }
124
+ },
125
+ })
126
+
127
+ // ── workspace.detach ────────────────────────────────────────────────────────
128
+
129
+ export const workspace_detach = tool({
130
+ description: `Remove file(s) from workspace context. Can detach by path, by search query, or by age.`,
131
+
132
+ args: {
133
+ filePath: tool.schema.string().optional().describe("Specific file path to remove"),
134
+ query: tool.schema.string().optional().describe("Remove all files attached by this search query"),
135
+ olderThan: tool.schema.number().optional().describe("Remove files older than N minutes"),
136
+ },
137
+
138
+ async execute(args) {
139
+ let removed = 0
140
+
141
+ if (args.filePath) {
142
+ removed = workspaceCache.detach(args.filePath) ? 1 : 0
143
+ if (removed === 0) {
144
+ return `File "${args.filePath}" not found in workspace.`
145
+ }
146
+ } else if (args.query) {
147
+ removed = workspaceCache.detachByQuery(args.query)
148
+ } else if (args.olderThan) {
149
+ removed = workspaceCache.detachOlderThan(args.olderThan * 60 * 1000)
150
+ } else {
151
+ return `Specify filePath, query, or olderThan to detach files.`
152
+ }
153
+
154
+ return `Removed ${removed} file(s) from workspace.\nWorkspace: ${workspaceCache.size} files, ${workspaceCache.totalTokens.toLocaleString()} tokens`
155
+ },
156
+ })
157
+
158
+ // ── workspace.clear ─────────────────────────────────────────────────────────
159
+
160
+ export const workspace_clear = tool({
161
+ description: `Remove ALL files from workspace context. Use when switching tasks or starting fresh.`,
162
+
163
+ args: {},
164
+
165
+ async execute() {
166
+ const count = workspaceCache.size
167
+ const tokens = workspaceCache.totalTokens
168
+ workspaceCache.clear()
169
+
170
+ return `Cleared workspace: ${count} files removed (${tokens.toLocaleString()} tokens freed).\nWorkspace is now empty.`
171
+ },
172
+ })
173
+
174
+ // ── workspace.restore ───────────────────────────────────────────────────────
175
+
176
+ export const workspace_restore = tool({
177
+ description: `Restore workspace from a saved session snapshot. Use after compaction or to switch context.`,
178
+
179
+ args: {
180
+ sessionId: tool.schema.string().optional().describe("Session ID to restore. If not provided, lists available snapshots."),
181
+ },
182
+
183
+ async execute(args) {
184
+ if (!args.sessionId) {
185
+ // List available snapshots
186
+ const snapshots = await workspaceCache.listSnapshots()
187
+
188
+ if (snapshots.length === 0) {
189
+ return `No saved workspace snapshots found.`
190
+ }
191
+
192
+ let output = `## Saved Workspace Snapshots\n\n`
193
+ for (const snap of snapshots) {
194
+ const date = new Date(snap.savedAt).toLocaleString()
195
+ output += `- **${snap.id}** — ${snap.fileCount} files, ${snap.totalTokens.toLocaleString()} tokens — ${date}\n`
196
+ }
197
+ output += `\nUse \`workspace.restore("session-id")\` to restore.`
198
+ return output
199
+ }
200
+
201
+ // Restore specific snapshot
202
+ const restored = await workspaceCache.restore(args.sessionId)
203
+
204
+ if (!restored) {
205
+ return `Snapshot "${args.sessionId}" not found or empty.`
206
+ }
207
+
208
+ return `Restored workspace from "${args.sessionId}".\nFiles: ${workspaceCache.size}\nTokens: ${workspaceCache.totalTokens.toLocaleString()}`
209
+ },
210
+ })
@@ -95,6 +95,20 @@ const DEFAULT_SEARCH_CONFIG = {
95
95
  };
96
96
  let SEARCH_CONFIG = { ...DEFAULT_SEARCH_CONFIG };
97
97
 
98
+ // ── Workspace injection config (v4) ─────────────────────────────────────────
99
+ const DEFAULT_WORKSPACE_CONFIG = {
100
+ maxTokens: 50_000, // Max total tokens across all cached files
101
+ maxFiles: 30, // Max number of files in workspace
102
+ attachTopN: 5, // Top N search results to attach with full content
103
+ attachRelatedPerFile: 3, // Max graph relations per main file
104
+ minScoreMain: 0.65, // Min score for main files
105
+ minScoreRelated: 0.5, // Min score for graph relations
106
+ persistContent: false, // Save full content in snapshots (debug mode)
107
+ autoPruneSearch: true, // Replace old search outputs with compact summaries
108
+ substituteToolOutputs: true, // Replace tool outputs when files in workspace
109
+ };
110
+ let WORKSPACE_CONFIG = { ...DEFAULT_WORKSPACE_CONFIG };
111
+
98
112
  // ── Graph config (v3) ───────────────────────────────────────────────────────
99
113
  const DEFAULT_GRAPH_CONFIG = {
100
114
  enabled: true,
@@ -162,6 +176,18 @@ function defaultVectorizerYaml() {
162
176
  ` timeout_ms: 5000\n` +
163
177
  ` read_intercept: true\n` +
164
178
  `\n` +
179
+ ` # Workspace injection (v4)\n` +
180
+ ` workspace:\n` +
181
+ ` max_tokens: 50000 # Max total tokens across all cached files\n` +
182
+ ` max_files: 30 # Max number of files in workspace\n` +
183
+ ` attach_top_n: 5 # Top N search results to attach with full content\n` +
184
+ ` attach_related_per_file: 3 # Max graph relations per main file\n` +
185
+ ` min_score_main: 0.65 # Min score for main files\n` +
186
+ ` min_score_related: 0.5 # Min score for graph relations\n` +
187
+ ` persist_content: false # Save full content in snapshots (debug mode)\n` +
188
+ ` auto_prune_search: true # Replace old search outputs with compact summaries\n` +
189
+ ` substitute_tool_outputs: true # Replace tool outputs when files in workspace\n` +
190
+ `\n` +
165
191
  ` # Quality monitoring\n` +
166
192
  ` quality:\n` +
167
193
  ` enable_metrics: false\n` +
@@ -320,6 +346,22 @@ async function loadConfig(projectRoot) {
320
346
  SEARCH_CONFIG.default_limit = parseNumber(ss, "default_limit", DEFAULT_SEARCH_CONFIG.default_limit);
321
347
  }
322
348
 
349
+ // ── Parse workspace injection config (v4) ──────────────────────────────
350
+ const workspaceMatch = section.match(/^\s{2}workspace:\s*\n([\s\S]*?)(?=^\s{2}[a-zA-Z_\-]+:|(?![\s\S]))/m);
351
+ if (workspaceMatch) {
352
+ const ws = workspaceMatch[1];
353
+ WORKSPACE_CONFIG.maxTokens = parseNumber(ws, "max_tokens", DEFAULT_WORKSPACE_CONFIG.maxTokens);
354
+ WORKSPACE_CONFIG.maxFiles = parseNumber(ws, "max_files", DEFAULT_WORKSPACE_CONFIG.maxFiles);
355
+ WORKSPACE_CONFIG.attachTopN = parseNumber(ws, "attach_top_n", DEFAULT_WORKSPACE_CONFIG.attachTopN);
356
+ WORKSPACE_CONFIG.attachRelatedPerFile = parseNumber(ws, "attach_related_per_file", DEFAULT_WORKSPACE_CONFIG.attachRelatedPerFile);
357
+ WORKSPACE_CONFIG.minScoreMain = parseNumber(ws, "min_score_main", DEFAULT_WORKSPACE_CONFIG.minScoreMain);
358
+ WORKSPACE_CONFIG.minScoreRelated = parseNumber(ws, "min_score_related", DEFAULT_WORKSPACE_CONFIG.minScoreRelated);
359
+ WORKSPACE_CONFIG.persistContent = parseBool(ws, "persist_content", DEFAULT_WORKSPACE_CONFIG.persistContent);
360
+ WORKSPACE_CONFIG.autoPruneSearch = parseBool(ws, "auto_prune_search", DEFAULT_WORKSPACE_CONFIG.autoPruneSearch);
361
+ WORKSPACE_CONFIG.substituteToolOutputs = parseBool(ws, "substitute_tool_outputs", DEFAULT_WORKSPACE_CONFIG.substituteToolOutputs);
362
+ if (DEBUG) console.log("[vectorizer] Workspace config:", WORKSPACE_CONFIG);
363
+ }
364
+
323
365
  // ── Parse quality config ────────────────────────────────────────────────
324
366
  const qualityMatch = section.match(/^\s{2}quality:\s*\n([\s\S]*?)(?=^\s{2}[a-zA-Z_\-]+:|(?![\s\S]))/m);
325
367
  if (qualityMatch) {
@@ -1683,6 +1725,10 @@ function getSearchConfig() {
1683
1725
  return SEARCH_CONFIG;
1684
1726
  }
1685
1727
 
1728
+ function getWorkspaceConfig() {
1729
+ return WORKSPACE_CONFIG;
1730
+ }
1731
+
1686
1732
  // ── Singleton indexer pool ──────────────────────────────────────────────────
1687
1733
  // Prevents LevelDB lock conflicts when parallel searches hit the same index.
1688
1734
  // Each unique (projectRoot, indexName) gets one shared CodebaseIndexer.
@@ -1744,4 +1790,4 @@ async function destroyIndexer(projectRoot: string, indexName: string = "code") {
1744
1790
  }
1745
1791
  }
1746
1792
 
1747
- export { CodebaseIndexer, INDEX_PRESETS, getEmbeddingModel, getSearchConfig, getIndexer, releaseIndexer, destroyIndexer };
1793
+ export { CodebaseIndexer, INDEX_PRESETS, getEmbeddingModel, getSearchConfig, getWorkspaceConfig, getIndexer, releaseIndexer, destroyIndexer };
package/vectorizer.yaml CHANGED
@@ -55,6 +55,17 @@ vectorizer:
55
55
  # Read() intercept
56
56
  read_intercept: true
57
57
 
58
+ # Workspace injection (v4) — search results attached to AI context
59
+ workspace:
60
+ max_tokens: 50000 # Max total tokens across all cached files
61
+ max_files: 30 # Max number of files in workspace
62
+ attach_top_n: 5 # Top N search results to attach with full content
63
+ attach_related_per_file: 3 # Max graph relations per main file
64
+ min_score_main: 0.65 # Min score for main files
65
+ min_score_related: 0.5 # Min score for graph relations
66
+ persist_content: false # Save full content in snapshots (debug mode)
67
+ auto_prune_search: true # Replace old search outputs with compact summaries
68
+
58
69
  # Quality monitoring (v2)
59
70
  quality:
60
71
  enable_metrics: false # Track search quality metrics