@comfanion/usethis_search 3.0.0-dev.23 → 3.0.0-dev.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@comfanion/usethis_search",
3
- "version": "3.0.0-dev.23",
3
+ "version": "3.0.0-dev.25",
4
4
  "description": "OpenCode plugin: semantic search with graph-based context (v3: graph relations, 1-hop context, LSP + regex analyzers)",
5
5
  "type": "module",
6
6
  "main": "./index.ts",
package/tools/search.ts CHANGED
@@ -1,8 +1,8 @@
1
1
  /**
2
- * Semantic Code Search Tool (v2)
2
+ * Semantic Code Search Tool (v3)
3
3
  *
4
4
  * Uses local embeddings + LanceDB vector store via bundled vectorizer.
5
- * v2: hybrid search, metadata filtering, rich result metadata.
5
+ * v3: simplified agent API — 5 params, config-driven defaults, smart filter.
6
6
  * Index data is stored in `.opencode/vectors/<index>/`.
7
7
  */
8
8
 
@@ -10,7 +10,82 @@ import { tool } from "@opencode-ai/plugin"
10
10
  import path from "path"
11
11
  import fs from "fs/promises"
12
12
 
13
- import { CodebaseIndexer } from "../vectorizer/index.ts"
13
+ import { CodebaseIndexer, getSearchConfig, getIndexer, releaseIndexer } from "../vectorizer/index.ts"
14
+
15
+ // ── Extension → language mapping (for filter parsing) ─────────────────────
16
+ const EXT_TO_LANG: Record<string, string> = {
17
+ go: "go", py: "python", ts: "typescript", tsx: "typescript",
18
+ js: "javascript", jsx: "javascript", mjs: "javascript", cjs: "javascript",
19
+ rs: "rust", java: "java", kt: "kotlin", swift: "swift",
20
+ c: "c", cpp: "cpp", h: "c", hpp: "cpp", cs: "csharp",
21
+ rb: "ruby", php: "php", scala: "scala", clj: "clojure",
22
+ md: "markdown", mdx: "markdown", txt: "text",
23
+ yaml: "yaml", yml: "yaml", json: "json", toml: "toml",
24
+ }
25
+ const LANG_NAMES = new Set(Object.values(EXT_TO_LANG))
26
+
27
+ /**
28
+ * Parse the `filter` param into path prefix and/or language filter.
29
+ *
30
+ * Supported formats:
31
+ * "internal/domain/" → pathPrefix = "internal/domain"
32
+ * "*.go" → language = "go"
33
+ * ".go" → language = "go"
34
+ * "go" → language = "go"
35
+ * "internal/*.go" → pathPrefix = "internal", language = "go"
36
+ * "internal/**\/*.go" → pathPrefix = "internal", language = "go"
37
+ * "service" → pathContains = "service"
38
+ */
39
+ function parseFilter(filter: string): {
40
+ pathPrefix?: string
41
+ language?: string
42
+ pathContains?: string
43
+ } {
44
+ if (!filter) return {}
45
+
46
+ const f = filter.trim()
47
+
48
+ // "internal/**/*.go" or "internal/*.go" → path + extension
49
+ const globMatch = f.match(/^([^*]+?)(?:\/\*\*)?\/?\*\.(\w+)$/)
50
+ if (globMatch) {
51
+ const prefix = globMatch[1].replace(/\/+$/, "")
52
+ const ext = globMatch[2]
53
+ return {
54
+ pathPrefix: prefix,
55
+ language: EXT_TO_LANG[ext] || undefined,
56
+ }
57
+ }
58
+
59
+ // "*.go" or ".go" → extension only
60
+ const extMatch = f.match(/^\*?\.(\w+)$/)
61
+ if (extMatch) {
62
+ const ext = extMatch[1]
63
+ return { language: EXT_TO_LANG[ext] || undefined }
64
+ }
65
+
66
+ // "go", "python", "typescript" → language name
67
+ const lower = f.toLowerCase()
68
+ if (LANG_NAMES.has(lower)) {
69
+ return { language: lower }
70
+ }
71
+ // "go" could also be ext
72
+ if (EXT_TO_LANG[lower]) {
73
+ return { language: EXT_TO_LANG[lower] }
74
+ }
75
+
76
+ // Ends with "/" → path prefix
77
+ if (f.endsWith("/")) {
78
+ return { pathPrefix: f.replace(/\/+$/, "") }
79
+ }
80
+
81
+ // Contains "/" → path prefix (e.g. "internal/domain")
82
+ if (f.includes("/")) {
83
+ return { pathPrefix: f.replace(/\/+$/, "") }
84
+ }
85
+
86
+ // Anything else → substring match on file path
87
+ return { pathContains: f }
88
+ }
14
89
 
15
90
  export default tool({
16
91
  description: `Search the codebase semantically. Use this to find relevant code snippets, functions, or files based on meaning, not just text matching.
@@ -22,106 +97,72 @@ Available indexes:
22
97
  - searchAll: true - Search across all indexes
23
98
 
24
99
  Examples:
25
- - "authentication logic" → finds auth-related code
26
- - "database connection handling" finds DB setup code
27
- - "how to deploy" with index: "docs" → finds deployment docs
28
- - "API keys" with index: "config" → finds config with API settings
29
- - search({ query: "tenant", path: "internal/domain/" }) → searches only in internal/domain/`,
100
+ - search({ query: "authentication logic" })
101
+ - search({ query: "how to deploy", index: "docs" })
102
+ - search({ query: "tenant management", filter: "internal/domain/" })
103
+ - search({ query: "event handling", filter: "*.go" })
104
+ - search({ query: "API routes", filter: "internal/**/*.go" })
105
+ - search({ query: "metrics", searchAll: true })`,
30
106
 
31
107
  args: {
32
108
  query: tool.schema.string().describe("Semantic search query describing what you're looking for"),
33
- index: tool.schema.string().optional().default("code").describe("Index to search: code, docs, config, or custom name"),
34
- limit: tool.schema.number().optional().default(10).describe("Number of results to return (default: 10)"),
109
+ index: tool.schema.string().optional().default("code").describe("Index to search: code, docs, config"),
110
+ limit: tool.schema.number().optional().describe("Number of results (default from config, typically 10)"),
35
111
  searchAll: tool.schema.boolean().optional().default(false).describe("Search all indexes instead of just one"),
36
- freshen: tool.schema.boolean().optional().default(true).describe("Auto-update stale files before searching (default: true)"),
37
- includeArchived: tool.schema.boolean().optional().default(false).describe("Include archived files in results (default: false). Files are archived if in /archive/ folder or have 'archived: true' in frontmatter."),
38
- // v2 params
39
- hybrid: tool.schema.boolean().optional().describe("Enable hybrid search (vector + BM25 keyword matching). Improves exact keyword recall."),
40
- fileType: tool.schema.string().optional().describe("Filter by file type: 'code', 'docs', or 'config'"),
41
- language: tool.schema.string().optional().describe("Filter by language: 'typescript', 'python', 'markdown', etc."),
42
- modifiedAfter: tool.schema.string().optional().describe("Filter: only files modified after this ISO date (e.g. '2024-01-01')"),
43
- modifiedBefore: tool.schema.string().optional().describe("Filter: only files modified before this ISO date"),
44
- tags: tool.schema.string().optional().describe("Filter by frontmatter tags (comma-separated, e.g. 'auth,security')"),
45
- minScore: tool.schema.number().optional().default(0.35).describe("Minimum relevance score (0-1). Results below this threshold are dropped. Default: 0.35"),
46
- path: tool.schema.string().optional().describe("Filter by file path prefix (e.g. 'internal/domain/', 'src/components'). Only returns files under this path."),
112
+ filter: tool.schema.string().optional().describe("Filter results by path or language. Examples: 'internal/domain/', '*.go', 'internal/**/*.go', 'service'"),
47
113
  },
48
114
 
49
115
  async execute(args) {
50
116
  const projectRoot = process.cwd()
51
117
 
52
118
  try {
53
- let allResults: any[] = []
54
- const limit = args.limit || 10
119
+ // Load config defaults (parsed from vectorizer.yaml)
120
+ const cfg = getSearchConfig()
121
+ const limit = args.limit || cfg.default_limit || 10
55
122
  const indexName = args.index || "code"
123
+ const minScore = cfg.min_score ?? 0.35
124
+ const includeArchived = cfg.include_archived ?? false
125
+
126
+ // Parse filter into path/language constraints
127
+ const filterParsed = args.filter ? parseFilter(args.filter) : {}
56
128
 
57
- // Build search options from v2 params
129
+ // Build search options — hybrid is always from per-index config
58
130
  const searchOptions: Record<string, any> = {}
59
- if (args.hybrid != null) searchOptions.hybrid = args.hybrid
60
-
61
- // Normalize fileType: support extensions (*.go, .go) and language names (go, python)
62
- // fileType field stores "code" | "docs" | "config", so map user-friendly values
63
- if (args.fileType) {
64
- const ft = args.fileType.replace(/^\*?\.?/, "").toLowerCase()
65
- const extToLanguage: Record<string, string> = {
66
- go: "go", py: "python", ts: "typescript", tsx: "typescript",
67
- js: "javascript", jsx: "javascript", mjs: "javascript", cjs: "javascript",
68
- rs: "rust", java: "java", kt: "kotlin", swift: "swift",
69
- c: "c", cpp: "cpp", h: "c", hpp: "cpp", cs: "csharp",
70
- rb: "ruby", php: "php", scala: "scala", clj: "clojure",
71
- md: "markdown", mdx: "markdown", txt: "text",
72
- yaml: "yaml", yml: "yaml", json: "json", toml: "toml",
73
- }
74
- // Also accept full language names
75
- const langNames = new Set([
76
- "go", "python", "typescript", "javascript", "rust", "java", "kotlin",
77
- "swift", "c", "cpp", "csharp", "ruby", "php", "scala", "clojure",
78
- "markdown", "text", "yaml", "json", "toml",
79
- ])
80
-
81
- if (ft === "code" || ft === "docs" || ft === "config") {
82
- searchOptions.fileType = ft
83
- } else if (extToLanguage[ft]) {
84
- searchOptions.language = extToLanguage[ft]
85
- } else if (langNames.has(ft)) {
86
- searchOptions.language = ft
87
- } else {
88
- searchOptions.fileType = ft // pass through as-is
131
+ if (filterParsed.language) searchOptions.language = filterParsed.language
132
+
133
+ // Freshen from config (default: false auto_index handles it)
134
+ if (cfg.freshen) {
135
+ try {
136
+ const indexer = await getIndexer(projectRoot, indexName)
137
+ await indexer.freshen()
138
+ releaseIndexer(projectRoot, indexName)
139
+ } catch {
140
+ // non-fatal search can proceed without freshen
89
141
  }
90
142
  }
91
143
 
92
- if (args.language) searchOptions.language = args.language
93
- if (args.modifiedAfter) searchOptions.modifiedAfter = args.modifiedAfter
94
- if (args.modifiedBefore) searchOptions.modifiedBefore = args.modifiedBefore
95
- if (args.tags) searchOptions.tags = args.tags.split(",").map((t: string) => t.trim()).filter(Boolean)
96
-
97
- // Auto-freshen stale files before searching
98
- if (args.freshen !== false) {
99
- const tempIndexer = await new CodebaseIndexer(projectRoot, indexName).init()
100
- await tempIndexer.freshen()
101
- await tempIndexer.unloadModel()
102
- }
144
+ let allResults: any[] = []
103
145
 
104
146
  if (args.searchAll) {
105
- const tempIndexer = await new CodebaseIndexer(projectRoot, "code").init()
147
+ const tempIndexer = await getIndexer(projectRoot, "code")
106
148
  const indexes = await tempIndexer.listIndexes()
107
- await tempIndexer.unloadModel()
149
+ releaseIndexer(projectRoot, "code")
108
150
 
109
151
  if (indexes.length === 0) {
110
- return `No indexes found. Create one with: codeindex({ action: "reindex", index: "code" })`
152
+ return `No indexes found. The codebase needs to be indexed first.\n\nRun the CLI: bunx usethis_search reindex`
111
153
  }
112
154
 
113
155
  for (const idx of indexes) {
114
- const indexer = await new CodebaseIndexer(projectRoot, idx).init()
115
- if (args.freshen !== false) {
116
- await indexer.freshen()
156
+ const indexer = await getIndexer(projectRoot, idx)
157
+ try {
158
+ const results = await indexer.search(args.query, limit, includeArchived, searchOptions)
159
+ allResults.push(...results.map((r: any) => ({ ...r, _index: idx })))
160
+ } finally {
161
+ releaseIndexer(projectRoot, idx)
117
162
  }
118
- const results = await indexer.search(args.query, limit, args.includeArchived, searchOptions)
119
- allResults.push(...results.map((r: any) => ({ ...r, _index: idx })))
120
- await indexer.unloadModel()
121
163
  }
122
164
 
123
165
  allResults.sort((a, b) => {
124
- // Prefer combinedScore (hybrid), fall back to L2→similarity conversion
125
166
  const scoreA = a._combinedScore ?? (a._distance != null ? Math.max(0, 1 - a._distance / 2) : 0)
126
167
  const scoreB = b._combinedScore ?? (b._distance != null ? Math.max(0, 1 - b._distance / 2) : 0)
127
168
  return scoreB - scoreA
@@ -133,9 +174,9 @@ Examples:
133
174
  await fs.access(hashesFile)
134
175
  } catch {
135
176
  // Index doesn't exist — check what indexes ARE available
136
- const tempIndexer = await new CodebaseIndexer(projectRoot, "code").init()
177
+ const tempIndexer = await getIndexer(projectRoot, "code")
137
178
  const available = await tempIndexer.listIndexes()
138
- await tempIndexer.unloadModel()
179
+ releaseIndexer(projectRoot, "code")
139
180
 
140
181
  if (available.length > 0) {
141
182
  const list = available.map(i => `"${i}"`).join(", ")
@@ -144,41 +185,44 @@ Examples:
144
185
  return `No indexes found. The codebase needs to be indexed first.\n\nRun the CLI: bunx usethis_search reindex`
145
186
  }
146
187
 
147
- const indexer = await new CodebaseIndexer(projectRoot, indexName).init()
148
- const results = await indexer.search(args.query, limit, args.includeArchived, searchOptions)
149
- allResults = results.map((r: any) => ({ ...r, _index: indexName }))
150
- await indexer.unloadModel()
188
+ const indexer = await getIndexer(projectRoot, indexName)
189
+ try {
190
+ const results = await indexer.search(args.query, limit, includeArchived, searchOptions)
191
+ allResults = results.map((r: any) => ({ ...r, _index: indexName }))
192
+ } finally {
193
+ releaseIndexer(projectRoot, indexName)
194
+ }
151
195
  }
152
196
 
153
197
  // ── Score cutoff — drop low-relevance results ──────────────────────────
154
- const minScore = args.minScore ?? 0.35
155
198
  allResults = allResults.filter(r => {
156
199
  const score = r._combinedScore ?? (r._distance != null ? Math.max(0, 1 - r._distance / 2) : 0)
157
200
  return score >= minScore
158
201
  })
159
202
 
160
- // ── Path filter restrict to subtree ──────────────────────────────────
161
- if (args.path) {
162
- const prefix = args.path.replace(/\/+$/, "") // normalize trailing slash
203
+ // ── Filterapply path/language constraints from `filter` param ───────
204
+ if (filterParsed.pathPrefix) {
205
+ const prefix = filterParsed.pathPrefix
163
206
  allResults = allResults.filter(r => r.file && r.file.startsWith(prefix))
164
207
  }
208
+ if (filterParsed.pathContains) {
209
+ const needle = filterParsed.pathContains.toLowerCase()
210
+ allResults = allResults.filter(r => r.file && r.file.toLowerCase().includes(needle))
211
+ }
212
+ // Language filter is already passed to searchOptions above, but double-check
213
+ // in case vectorizer didn't filter (e.g. docs index has no language field)
214
+ if (filterParsed.language) {
215
+ allResults = allResults.filter(r => !r.language || r.language === filterParsed.language || r.language === "unknown")
216
+ }
165
217
 
166
218
  // ── Reranking — boost results where query keywords appear in text ──────
167
- // Also store score components for breakdown display
168
219
  const queryKeywords = args.query.toLowerCase().split(/\s+/).filter((w: string) => w.length > 2)
169
220
  for (const r of allResults) {
170
- // Vector score (L2 → similarity)
171
221
  const vectorScore = r._distance != null ? Math.max(0, 1 - r._distance / 2) : 0
172
222
  r._vectorScore = vectorScore
173
-
174
- // BM25 component (present only in hybrid mode — embedded in _combinedScore)
175
- // If _combinedScore exists and differs from vectorScore, the difference is BM25 contribution
176
223
  r._bm25Component = r._combinedScore != null ? Math.max(0, r._combinedScore - vectorScore) : 0
177
-
178
- // Base score before keyword boost
179
224
  const baseScore = r._combinedScore ?? vectorScore
180
225
 
181
- // Keyword matching
182
226
  const text = (r.content || "").toLowerCase()
183
227
  const matchedKeywords: string[] = []
184
228
  if (queryKeywords.length > 0) {
@@ -208,25 +252,24 @@ Examples:
208
252
  }
209
253
  }
210
254
 
211
- // Sort groups by best chunk score, take top N unique files
212
255
  const sortedGroups = [...fileGroups.values()]
213
256
  .sort((a, b) => (b.best._finalScore ?? 0) - (a.best._finalScore ?? 0))
214
257
  .slice(0, limit)
215
258
 
216
259
  if (sortedGroups.length === 0) {
217
260
  const scope = args.searchAll ? "any index" : `index "${indexName}"`
218
- return `No results found in ${scope} for: "${args.query}" (min score: ${minScore})\n\nTry:\n- Different keywords\n- Lower minScore threshold: search({ query: "...", minScore: 0.2 })\n- Enable hybrid search: search({ query: "...", hybrid: true })`
261
+ const filterNote = args.filter ? ` with filter "${args.filter}"` : ""
262
+ return `No results found in ${scope}${filterNote} for: "${args.query}" (min score: ${minScore})\n\nTry:\n- Different keywords or phrasing\n- Remove or broaden the filter\n- search({ query: "...", searchAll: true })`
219
263
  }
220
264
 
221
265
  // ── Confidence signal ──────────────────────────────────────────────────
222
266
  const topScore = sortedGroups[0].best._finalScore ?? 0
223
267
  const scope = args.searchAll ? "all indexes" : `index "${indexName}"`
224
- const hybridLabel = args.hybrid ? " [hybrid]" : ""
225
- const pathLabel = args.path ? ` path:"${args.path}"` : ""
226
- let output = `## Search Results for: "${args.query}" (${scope}${hybridLabel}${pathLabel})\n\n`
268
+ const filterLabel = args.filter ? ` filter:"${args.filter}"` : ""
269
+ let output = `## Search Results for: "${args.query}" (${scope}${filterLabel})\n\n`
227
270
 
228
271
  if (topScore < 0.45) {
229
- output += `> **Low confidence results.** Best score: ${topScore.toFixed(3)}. These results may not be relevant to your query.\n> Try more specific keywords, different phrasing, or hybrid: true.\n\n`
272
+ output += `> **Low confidence results.** Best score: ${topScore.toFixed(3)}. These results may not be relevant to your query.\n> Try more specific keywords or different phrasing.\n\n`
230
273
  }
231
274
 
232
275
  for (let i = 0; i < sortedGroups.length; i++) {
@@ -235,7 +278,7 @@ Examples:
235
278
  const indexLabel = args.searchAll ? ` [${r._index}]` : ""
236
279
  const chunkNote = chunks.length > 1 ? ` (${chunks.length} matching sections)` : ""
237
280
 
238
- // v2: show rich metadata when available
281
+ // Rich metadata
239
282
  const metaParts: string[] = []
240
283
  if (r.language && r.language !== "unknown") metaParts.push(r.language)
241
284
  if (r.heading_context) metaParts.push(`"${r.heading_context}"`)
@@ -243,7 +286,7 @@ Examples:
243
286
  if (r.class_name) metaParts.push(`class: ${r.class_name}`)
244
287
  const metaLine = metaParts.length > 0 ? ` (${metaParts.join(", ")})` : ""
245
288
 
246
- // Score breakdown: vector + bm25 + keywords
289
+ // Score breakdown
247
290
  const breakdownParts: string[] = [`vec: ${(r._vectorScore ?? 0).toFixed(2)}`]
248
291
  if (r._bm25Component > 0.005) breakdownParts.push(`bm25: +${r._bm25Component.toFixed(2)}`)
249
292
  if (r._keywordBonus > 0.005) breakdownParts.push(`kw: +${r._keywordBonus.toFixed(2)}`)
@@ -261,7 +304,7 @@ Examples:
261
304
  output += content
262
305
  output += "\n```\n"
263
306
 
264
- // Show second-best chunk from same file if available (brief)
307
+ // Second-best chunk hint
265
308
  if (chunks.length > 1) {
266
309
  const second = chunks.find((c: any) => c !== r)
267
310
  if (second) {
@@ -85,6 +85,15 @@ let HYBRID_CONFIG = { ...DEFAULT_HYBRID_CONFIG };
85
85
  let METRICS_ENABLED = false;
86
86
  let CACHE_ENABLED = true;
87
87
 
88
+ // ── Search defaults (exposed to tool layer) ──────────────────────────────────
89
+ const DEFAULT_SEARCH_CONFIG = {
90
+ freshen: false, // Don't freshen on every search — auto_index handles it
91
+ min_score: 0.35, // Minimum relevance score cutoff
92
+ include_archived: false, // Exclude archived files by default
93
+ default_limit: 10, // Default result count
94
+ };
95
+ let SEARCH_CONFIG = { ...DEFAULT_SEARCH_CONFIG };
96
+
88
97
  // ── Graph config (v3) ───────────────────────────────────────────────────────
89
98
  const DEFAULT_GRAPH_CONFIG = {
90
99
  enabled: true,
@@ -135,6 +144,10 @@ function defaultVectorizerYaml() {
135
144
  ` search:\n` +
136
145
  ` hybrid: true\n` +
137
146
  ` bm25_weight: 0.3\n` +
147
+ ` freshen: false # Don't re-index on every search (auto_index handles it)\n` +
148
+ ` min_score: 0.35 # Minimum relevance score cutoff\n` +
149
+ ` include_archived: false # Exclude archived files\n` +
150
+ ` default_limit: 10 # Default number of results\n` +
138
151
  `\n` +
139
152
  ` # Graph-based context (v3)\n` +
140
153
  ` graph:\n` +
@@ -299,6 +312,11 @@ async function loadConfig(projectRoot) {
299
312
  enabled: parseBool(ss, "hybrid", false),
300
313
  bm25_weight: parseNumber(ss, "bm25_weight", 0.3),
301
314
  };
315
+ // Extended search defaults
316
+ SEARCH_CONFIG.freshen = parseBool(ss, "freshen", DEFAULT_SEARCH_CONFIG.freshen);
317
+ SEARCH_CONFIG.min_score = parseNumber(ss, "min_score", DEFAULT_SEARCH_CONFIG.min_score);
318
+ SEARCH_CONFIG.include_archived = parseBool(ss, "include_archived", DEFAULT_SEARCH_CONFIG.include_archived);
319
+ SEARCH_CONFIG.default_limit = parseNumber(ss, "default_limit", DEFAULT_SEARCH_CONFIG.default_limit);
302
320
  }
303
321
 
304
322
  // ── Parse quality config ────────────────────────────────────────────────
@@ -442,17 +460,24 @@ class CodebaseIndexer {
442
460
  await this.loadHashes();
443
461
 
444
462
  // Graph DB — only if graph is enabled in config
463
+ // Non-fatal: if LevelDB lock fails (parallel access), search works without graph
445
464
  if (GRAPH_CONFIG.enabled) {
446
- const graphType = this.indexName === "docs" ? "doc_graph" : "code_graph";
447
- const graphPath = path.join(this.root, ".opencode", "graph", graphType);
448
- await fs.mkdir(path.dirname(graphPath), { recursive: true });
449
- this.graphDB = await new GraphDB(graphPath).init();
450
- this.graphBuilder = new GraphBuilder(
451
- this.graphDB,
452
- this.root,
453
- GRAPH_CONFIG.lsp.enabled,
454
- GRAPH_CONFIG.lsp.timeout_ms,
455
- );
465
+ try {
466
+ const graphType = this.indexName === "docs" ? "doc_graph" : "code_graph";
467
+ const graphPath = path.join(this.root, ".opencode", "graph", graphType);
468
+ await fs.mkdir(path.dirname(graphPath), { recursive: true });
469
+ this.graphDB = await new GraphDB(graphPath).init();
470
+ this.graphBuilder = new GraphBuilder(
471
+ this.graphDB,
472
+ this.root,
473
+ GRAPH_CONFIG.lsp.enabled,
474
+ GRAPH_CONFIG.lsp.timeout_ms,
475
+ );
476
+ } catch (e) {
477
+ if (DEBUG) console.log(`[vectorizer] GraphDB init failed (lock?): ${e.message || e}`);
478
+ this.graphDB = null;
479
+ this.graphBuilder = null;
480
+ }
456
481
  }
457
482
 
458
483
  // Usage tracker — provenance & usage stats
@@ -1305,4 +1330,69 @@ function getEmbeddingModel() {
1305
1330
  return EMBEDDING_MODEL;
1306
1331
  }
1307
1332
 
1308
- export { CodebaseIndexer, INDEX_PRESETS, getEmbeddingModel };
1333
+ function getSearchConfig() {
1334
+ return SEARCH_CONFIG;
1335
+ }
1336
+
1337
+ // ── Singleton indexer pool ──────────────────────────────────────────────────
1338
+ // Prevents LevelDB lock conflicts when parallel searches hit the same index.
1339
+ // Each unique (projectRoot, indexName) gets one shared CodebaseIndexer.
1340
+ const _indexerPool = new Map<string, { indexer: CodebaseIndexer; refCount: number; initPromise: Promise<CodebaseIndexer> }>();
1341
+
1342
+ /**
1343
+ * Get or create a shared CodebaseIndexer for the given project + index.
1344
+ * Multiple callers get the same instance — no LevelDB lock conflicts.
1345
+ *
1346
+ * Usage:
1347
+ * const indexer = await getIndexer(projectRoot, "code");
1348
+ * try {
1349
+ * const results = await indexer.search(...);
1350
+ * } finally {
1351
+ * releaseIndexer(projectRoot, "code");
1352
+ * }
1353
+ */
1354
+ async function getIndexer(projectRoot: string, indexName: string = "code"): Promise<CodebaseIndexer> {
1355
+ const key = `${projectRoot}::${indexName}`;
1356
+ const existing = _indexerPool.get(key);
1357
+ if (existing) {
1358
+ existing.refCount++;
1359
+ return existing.initPromise;
1360
+ }
1361
+
1362
+ const indexer = new CodebaseIndexer(projectRoot, indexName);
1363
+ const initPromise = indexer.init();
1364
+ _indexerPool.set(key, { indexer, refCount: 1, initPromise });
1365
+ return initPromise;
1366
+ }
1367
+
1368
+ /**
1369
+ * Release a reference to a shared indexer. When refCount reaches 0,
1370
+ * the indexer is kept alive (for future reuse) but model memory is freed.
1371
+ * Call destroyIndexer() to fully close and remove from pool.
1372
+ */
1373
+ function releaseIndexer(projectRoot: string, indexName: string = "code") {
1374
+ const key = `${projectRoot}::${indexName}`;
1375
+ const entry = _indexerPool.get(key);
1376
+ if (!entry) return;
1377
+ entry.refCount = Math.max(0, entry.refCount - 1);
1378
+ // Keep in pool — don't unload. Next search reuses the same instance.
1379
+ }
1380
+
1381
+ /**
1382
+ * Fully close and remove an indexer from the pool.
1383
+ * Used by CLI clear/reindex operations that need a fresh state.
1384
+ */
1385
+ async function destroyIndexer(projectRoot: string, indexName: string = "code") {
1386
+ const key = `${projectRoot}::${indexName}`;
1387
+ const entry = _indexerPool.get(key);
1388
+ if (!entry) return;
1389
+ _indexerPool.delete(key);
1390
+ try {
1391
+ const indexer = await entry.initPromise;
1392
+ await indexer.unloadModel();
1393
+ } catch {
1394
+ // best effort
1395
+ }
1396
+ }
1397
+
1398
+ export { CodebaseIndexer, INDEX_PRESETS, getEmbeddingModel, getSearchConfig, getIndexer, releaseIndexer, destroyIndexer };