@comfanion/usethis_search 3.0.0-dev.23 → 3.0.0-dev.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/tools/search.ts +145 -102
- package/vectorizer/index.ts +101 -11
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@comfanion/usethis_search",
|
|
3
|
-
"version": "3.0.0-dev.
|
|
3
|
+
"version": "3.0.0-dev.25",
|
|
4
4
|
"description": "OpenCode plugin: semantic search with graph-based context (v3: graph relations, 1-hop context, LSP + regex analyzers)",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./index.ts",
|
package/tools/search.ts
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Semantic Code Search Tool (
|
|
2
|
+
* Semantic Code Search Tool (v3)
|
|
3
3
|
*
|
|
4
4
|
* Uses local embeddings + LanceDB vector store via bundled vectorizer.
|
|
5
|
-
*
|
|
5
|
+
* v3: simplified agent API — 5 params, config-driven defaults, smart filter.
|
|
6
6
|
* Index data is stored in `.opencode/vectors/<index>/`.
|
|
7
7
|
*/
|
|
8
8
|
|
|
@@ -10,7 +10,82 @@ import { tool } from "@opencode-ai/plugin"
|
|
|
10
10
|
import path from "path"
|
|
11
11
|
import fs from "fs/promises"
|
|
12
12
|
|
|
13
|
-
import { CodebaseIndexer } from "../vectorizer/index.ts"
|
|
13
|
+
import { CodebaseIndexer, getSearchConfig, getIndexer, releaseIndexer } from "../vectorizer/index.ts"
|
|
14
|
+
|
|
15
|
+
// ── Extension → language mapping (for filter parsing) ─────────────────────
|
|
16
|
+
const EXT_TO_LANG: Record<string, string> = {
|
|
17
|
+
go: "go", py: "python", ts: "typescript", tsx: "typescript",
|
|
18
|
+
js: "javascript", jsx: "javascript", mjs: "javascript", cjs: "javascript",
|
|
19
|
+
rs: "rust", java: "java", kt: "kotlin", swift: "swift",
|
|
20
|
+
c: "c", cpp: "cpp", h: "c", hpp: "cpp", cs: "csharp",
|
|
21
|
+
rb: "ruby", php: "php", scala: "scala", clj: "clojure",
|
|
22
|
+
md: "markdown", mdx: "markdown", txt: "text",
|
|
23
|
+
yaml: "yaml", yml: "yaml", json: "json", toml: "toml",
|
|
24
|
+
}
|
|
25
|
+
const LANG_NAMES = new Set(Object.values(EXT_TO_LANG))
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Parse the `filter` param into path prefix and/or language filter.
|
|
29
|
+
*
|
|
30
|
+
* Supported formats:
|
|
31
|
+
* "internal/domain/" → pathPrefix = "internal/domain"
|
|
32
|
+
* "*.go" → language = "go"
|
|
33
|
+
* ".go" → language = "go"
|
|
34
|
+
* "go" → language = "go"
|
|
35
|
+
* "internal/*.go" → pathPrefix = "internal", language = "go"
|
|
36
|
+
* "internal/**\/*.go" → pathPrefix = "internal", language = "go"
|
|
37
|
+
* "service" → pathContains = "service"
|
|
38
|
+
*/
|
|
39
|
+
function parseFilter(filter: string): {
|
|
40
|
+
pathPrefix?: string
|
|
41
|
+
language?: string
|
|
42
|
+
pathContains?: string
|
|
43
|
+
} {
|
|
44
|
+
if (!filter) return {}
|
|
45
|
+
|
|
46
|
+
const f = filter.trim()
|
|
47
|
+
|
|
48
|
+
// "internal/**/*.go" or "internal/*.go" → path + extension
|
|
49
|
+
const globMatch = f.match(/^([^*]+?)(?:\/\*\*)?\/?\*\.(\w+)$/)
|
|
50
|
+
if (globMatch) {
|
|
51
|
+
const prefix = globMatch[1].replace(/\/+$/, "")
|
|
52
|
+
const ext = globMatch[2]
|
|
53
|
+
return {
|
|
54
|
+
pathPrefix: prefix,
|
|
55
|
+
language: EXT_TO_LANG[ext] || undefined,
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// "*.go" or ".go" → extension only
|
|
60
|
+
const extMatch = f.match(/^\*?\.(\w+)$/)
|
|
61
|
+
if (extMatch) {
|
|
62
|
+
const ext = extMatch[1]
|
|
63
|
+
return { language: EXT_TO_LANG[ext] || undefined }
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// "go", "python", "typescript" → language name
|
|
67
|
+
const lower = f.toLowerCase()
|
|
68
|
+
if (LANG_NAMES.has(lower)) {
|
|
69
|
+
return { language: lower }
|
|
70
|
+
}
|
|
71
|
+
// "go" could also be ext
|
|
72
|
+
if (EXT_TO_LANG[lower]) {
|
|
73
|
+
return { language: EXT_TO_LANG[lower] }
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// Ends with "/" → path prefix
|
|
77
|
+
if (f.endsWith("/")) {
|
|
78
|
+
return { pathPrefix: f.replace(/\/+$/, "") }
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// Contains "/" → path prefix (e.g. "internal/domain")
|
|
82
|
+
if (f.includes("/")) {
|
|
83
|
+
return { pathPrefix: f.replace(/\/+$/, "") }
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
// Anything else → substring match on file path
|
|
87
|
+
return { pathContains: f }
|
|
88
|
+
}
|
|
14
89
|
|
|
15
90
|
export default tool({
|
|
16
91
|
description: `Search the codebase semantically. Use this to find relevant code snippets, functions, or files based on meaning, not just text matching.
|
|
@@ -22,106 +97,72 @@ Available indexes:
|
|
|
22
97
|
- searchAll: true - Search across all indexes
|
|
23
98
|
|
|
24
99
|
Examples:
|
|
25
|
-
- "authentication logic"
|
|
26
|
-
-
|
|
27
|
-
-
|
|
28
|
-
- "
|
|
29
|
-
- search({ query: "
|
|
100
|
+
- search({ query: "authentication logic" })
|
|
101
|
+
- search({ query: "how to deploy", index: "docs" })
|
|
102
|
+
- search({ query: "tenant management", filter: "internal/domain/" })
|
|
103
|
+
- search({ query: "event handling", filter: "*.go" })
|
|
104
|
+
- search({ query: "API routes", filter: "internal/**/*.go" })
|
|
105
|
+
- search({ query: "metrics", searchAll: true })`,
|
|
30
106
|
|
|
31
107
|
args: {
|
|
32
108
|
query: tool.schema.string().describe("Semantic search query describing what you're looking for"),
|
|
33
|
-
index: tool.schema.string().optional().default("code").describe("Index to search: code, docs, config
|
|
34
|
-
limit: tool.schema.number().optional().
|
|
109
|
+
index: tool.schema.string().optional().default("code").describe("Index to search: code, docs, config"),
|
|
110
|
+
limit: tool.schema.number().optional().describe("Number of results (default from config, typically 10)"),
|
|
35
111
|
searchAll: tool.schema.boolean().optional().default(false).describe("Search all indexes instead of just one"),
|
|
36
|
-
|
|
37
|
-
includeArchived: tool.schema.boolean().optional().default(false).describe("Include archived files in results (default: false). Files are archived if in /archive/ folder or have 'archived: true' in frontmatter."),
|
|
38
|
-
// v2 params
|
|
39
|
-
hybrid: tool.schema.boolean().optional().describe("Enable hybrid search (vector + BM25 keyword matching). Improves exact keyword recall."),
|
|
40
|
-
fileType: tool.schema.string().optional().describe("Filter by file type: 'code', 'docs', or 'config'"),
|
|
41
|
-
language: tool.schema.string().optional().describe("Filter by language: 'typescript', 'python', 'markdown', etc."),
|
|
42
|
-
modifiedAfter: tool.schema.string().optional().describe("Filter: only files modified after this ISO date (e.g. '2024-01-01')"),
|
|
43
|
-
modifiedBefore: tool.schema.string().optional().describe("Filter: only files modified before this ISO date"),
|
|
44
|
-
tags: tool.schema.string().optional().describe("Filter by frontmatter tags (comma-separated, e.g. 'auth,security')"),
|
|
45
|
-
minScore: tool.schema.number().optional().default(0.35).describe("Minimum relevance score (0-1). Results below this threshold are dropped. Default: 0.35"),
|
|
46
|
-
path: tool.schema.string().optional().describe("Filter by file path prefix (e.g. 'internal/domain/', 'src/components'). Only returns files under this path."),
|
|
112
|
+
filter: tool.schema.string().optional().describe("Filter results by path or language. Examples: 'internal/domain/', '*.go', 'internal/**/*.go', 'service'"),
|
|
47
113
|
},
|
|
48
114
|
|
|
49
115
|
async execute(args) {
|
|
50
116
|
const projectRoot = process.cwd()
|
|
51
117
|
|
|
52
118
|
try {
|
|
53
|
-
|
|
54
|
-
const
|
|
119
|
+
// Load config defaults (parsed from vectorizer.yaml)
|
|
120
|
+
const cfg = getSearchConfig()
|
|
121
|
+
const limit = args.limit || cfg.default_limit || 10
|
|
55
122
|
const indexName = args.index || "code"
|
|
123
|
+
const minScore = cfg.min_score ?? 0.35
|
|
124
|
+
const includeArchived = cfg.include_archived ?? false
|
|
125
|
+
|
|
126
|
+
// Parse filter into path/language constraints
|
|
127
|
+
const filterParsed = args.filter ? parseFilter(args.filter) : {}
|
|
56
128
|
|
|
57
|
-
// Build search options from
|
|
129
|
+
// Build search options — hybrid is always from per-index config
|
|
58
130
|
const searchOptions: Record<string, any> = {}
|
|
59
|
-
if (
|
|
60
|
-
|
|
61
|
-
//
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
c: "c", cpp: "cpp", h: "c", hpp: "cpp", cs: "csharp",
|
|
70
|
-
rb: "ruby", php: "php", scala: "scala", clj: "clojure",
|
|
71
|
-
md: "markdown", mdx: "markdown", txt: "text",
|
|
72
|
-
yaml: "yaml", yml: "yaml", json: "json", toml: "toml",
|
|
73
|
-
}
|
|
74
|
-
// Also accept full language names
|
|
75
|
-
const langNames = new Set([
|
|
76
|
-
"go", "python", "typescript", "javascript", "rust", "java", "kotlin",
|
|
77
|
-
"swift", "c", "cpp", "csharp", "ruby", "php", "scala", "clojure",
|
|
78
|
-
"markdown", "text", "yaml", "json", "toml",
|
|
79
|
-
])
|
|
80
|
-
|
|
81
|
-
if (ft === "code" || ft === "docs" || ft === "config") {
|
|
82
|
-
searchOptions.fileType = ft
|
|
83
|
-
} else if (extToLanguage[ft]) {
|
|
84
|
-
searchOptions.language = extToLanguage[ft]
|
|
85
|
-
} else if (langNames.has(ft)) {
|
|
86
|
-
searchOptions.language = ft
|
|
87
|
-
} else {
|
|
88
|
-
searchOptions.fileType = ft // pass through as-is
|
|
131
|
+
if (filterParsed.language) searchOptions.language = filterParsed.language
|
|
132
|
+
|
|
133
|
+
// Freshen from config (default: false — auto_index handles it)
|
|
134
|
+
if (cfg.freshen) {
|
|
135
|
+
try {
|
|
136
|
+
const indexer = await getIndexer(projectRoot, indexName)
|
|
137
|
+
await indexer.freshen()
|
|
138
|
+
releaseIndexer(projectRoot, indexName)
|
|
139
|
+
} catch {
|
|
140
|
+
// non-fatal — search can proceed without freshen
|
|
89
141
|
}
|
|
90
142
|
}
|
|
91
143
|
|
|
92
|
-
|
|
93
|
-
if (args.modifiedAfter) searchOptions.modifiedAfter = args.modifiedAfter
|
|
94
|
-
if (args.modifiedBefore) searchOptions.modifiedBefore = args.modifiedBefore
|
|
95
|
-
if (args.tags) searchOptions.tags = args.tags.split(",").map((t: string) => t.trim()).filter(Boolean)
|
|
96
|
-
|
|
97
|
-
// Auto-freshen stale files before searching
|
|
98
|
-
if (args.freshen !== false) {
|
|
99
|
-
const tempIndexer = await new CodebaseIndexer(projectRoot, indexName).init()
|
|
100
|
-
await tempIndexer.freshen()
|
|
101
|
-
await tempIndexer.unloadModel()
|
|
102
|
-
}
|
|
144
|
+
let allResults: any[] = []
|
|
103
145
|
|
|
104
146
|
if (args.searchAll) {
|
|
105
|
-
const tempIndexer = await
|
|
147
|
+
const tempIndexer = await getIndexer(projectRoot, "code")
|
|
106
148
|
const indexes = await tempIndexer.listIndexes()
|
|
107
|
-
|
|
149
|
+
releaseIndexer(projectRoot, "code")
|
|
108
150
|
|
|
109
151
|
if (indexes.length === 0) {
|
|
110
|
-
return `No indexes found.
|
|
152
|
+
return `No indexes found. The codebase needs to be indexed first.\n\nRun the CLI: bunx usethis_search reindex`
|
|
111
153
|
}
|
|
112
154
|
|
|
113
155
|
for (const idx of indexes) {
|
|
114
|
-
const indexer = await
|
|
115
|
-
|
|
116
|
-
await indexer.
|
|
156
|
+
const indexer = await getIndexer(projectRoot, idx)
|
|
157
|
+
try {
|
|
158
|
+
const results = await indexer.search(args.query, limit, includeArchived, searchOptions)
|
|
159
|
+
allResults.push(...results.map((r: any) => ({ ...r, _index: idx })))
|
|
160
|
+
} finally {
|
|
161
|
+
releaseIndexer(projectRoot, idx)
|
|
117
162
|
}
|
|
118
|
-
const results = await indexer.search(args.query, limit, args.includeArchived, searchOptions)
|
|
119
|
-
allResults.push(...results.map((r: any) => ({ ...r, _index: idx })))
|
|
120
|
-
await indexer.unloadModel()
|
|
121
163
|
}
|
|
122
164
|
|
|
123
165
|
allResults.sort((a, b) => {
|
|
124
|
-
// Prefer combinedScore (hybrid), fall back to L2→similarity conversion
|
|
125
166
|
const scoreA = a._combinedScore ?? (a._distance != null ? Math.max(0, 1 - a._distance / 2) : 0)
|
|
126
167
|
const scoreB = b._combinedScore ?? (b._distance != null ? Math.max(0, 1 - b._distance / 2) : 0)
|
|
127
168
|
return scoreB - scoreA
|
|
@@ -133,9 +174,9 @@ Examples:
|
|
|
133
174
|
await fs.access(hashesFile)
|
|
134
175
|
} catch {
|
|
135
176
|
// Index doesn't exist — check what indexes ARE available
|
|
136
|
-
const tempIndexer = await
|
|
177
|
+
const tempIndexer = await getIndexer(projectRoot, "code")
|
|
137
178
|
const available = await tempIndexer.listIndexes()
|
|
138
|
-
|
|
179
|
+
releaseIndexer(projectRoot, "code")
|
|
139
180
|
|
|
140
181
|
if (available.length > 0) {
|
|
141
182
|
const list = available.map(i => `"${i}"`).join(", ")
|
|
@@ -144,41 +185,44 @@ Examples:
|
|
|
144
185
|
return `No indexes found. The codebase needs to be indexed first.\n\nRun the CLI: bunx usethis_search reindex`
|
|
145
186
|
}
|
|
146
187
|
|
|
147
|
-
const indexer = await
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
188
|
+
const indexer = await getIndexer(projectRoot, indexName)
|
|
189
|
+
try {
|
|
190
|
+
const results = await indexer.search(args.query, limit, includeArchived, searchOptions)
|
|
191
|
+
allResults = results.map((r: any) => ({ ...r, _index: indexName }))
|
|
192
|
+
} finally {
|
|
193
|
+
releaseIndexer(projectRoot, indexName)
|
|
194
|
+
}
|
|
151
195
|
}
|
|
152
196
|
|
|
153
197
|
// ── Score cutoff — drop low-relevance results ──────────────────────────
|
|
154
|
-
const minScore = args.minScore ?? 0.35
|
|
155
198
|
allResults = allResults.filter(r => {
|
|
156
199
|
const score = r._combinedScore ?? (r._distance != null ? Math.max(0, 1 - r._distance / 2) : 0)
|
|
157
200
|
return score >= minScore
|
|
158
201
|
})
|
|
159
202
|
|
|
160
|
-
// ──
|
|
161
|
-
if (
|
|
162
|
-
const prefix =
|
|
203
|
+
// ── Filter — apply path/language constraints from `filter` param ───────
|
|
204
|
+
if (filterParsed.pathPrefix) {
|
|
205
|
+
const prefix = filterParsed.pathPrefix
|
|
163
206
|
allResults = allResults.filter(r => r.file && r.file.startsWith(prefix))
|
|
164
207
|
}
|
|
208
|
+
if (filterParsed.pathContains) {
|
|
209
|
+
const needle = filterParsed.pathContains.toLowerCase()
|
|
210
|
+
allResults = allResults.filter(r => r.file && r.file.toLowerCase().includes(needle))
|
|
211
|
+
}
|
|
212
|
+
// Language filter is already passed to searchOptions above, but double-check
|
|
213
|
+
// in case vectorizer didn't filter (e.g. docs index has no language field)
|
|
214
|
+
if (filterParsed.language) {
|
|
215
|
+
allResults = allResults.filter(r => !r.language || r.language === filterParsed.language || r.language === "unknown")
|
|
216
|
+
}
|
|
165
217
|
|
|
166
218
|
// ── Reranking — boost results where query keywords appear in text ──────
|
|
167
|
-
// Also store score components for breakdown display
|
|
168
219
|
const queryKeywords = args.query.toLowerCase().split(/\s+/).filter((w: string) => w.length > 2)
|
|
169
220
|
for (const r of allResults) {
|
|
170
|
-
// Vector score (L2 → similarity)
|
|
171
221
|
const vectorScore = r._distance != null ? Math.max(0, 1 - r._distance / 2) : 0
|
|
172
222
|
r._vectorScore = vectorScore
|
|
173
|
-
|
|
174
|
-
// BM25 component (present only in hybrid mode — embedded in _combinedScore)
|
|
175
|
-
// If _combinedScore exists and differs from vectorScore, the difference is BM25 contribution
|
|
176
223
|
r._bm25Component = r._combinedScore != null ? Math.max(0, r._combinedScore - vectorScore) : 0
|
|
177
|
-
|
|
178
|
-
// Base score before keyword boost
|
|
179
224
|
const baseScore = r._combinedScore ?? vectorScore
|
|
180
225
|
|
|
181
|
-
// Keyword matching
|
|
182
226
|
const text = (r.content || "").toLowerCase()
|
|
183
227
|
const matchedKeywords: string[] = []
|
|
184
228
|
if (queryKeywords.length > 0) {
|
|
@@ -208,25 +252,24 @@ Examples:
|
|
|
208
252
|
}
|
|
209
253
|
}
|
|
210
254
|
|
|
211
|
-
// Sort groups by best chunk score, take top N unique files
|
|
212
255
|
const sortedGroups = [...fileGroups.values()]
|
|
213
256
|
.sort((a, b) => (b.best._finalScore ?? 0) - (a.best._finalScore ?? 0))
|
|
214
257
|
.slice(0, limit)
|
|
215
258
|
|
|
216
259
|
if (sortedGroups.length === 0) {
|
|
217
260
|
const scope = args.searchAll ? "any index" : `index "${indexName}"`
|
|
218
|
-
|
|
261
|
+
const filterNote = args.filter ? ` with filter "${args.filter}"` : ""
|
|
262
|
+
return `No results found in ${scope}${filterNote} for: "${args.query}" (min score: ${minScore})\n\nTry:\n- Different keywords or phrasing\n- Remove or broaden the filter\n- search({ query: "...", searchAll: true })`
|
|
219
263
|
}
|
|
220
264
|
|
|
221
265
|
// ── Confidence signal ──────────────────────────────────────────────────
|
|
222
266
|
const topScore = sortedGroups[0].best._finalScore ?? 0
|
|
223
267
|
const scope = args.searchAll ? "all indexes" : `index "${indexName}"`
|
|
224
|
-
const
|
|
225
|
-
|
|
226
|
-
let output = `## Search Results for: "${args.query}" (${scope}${hybridLabel}${pathLabel})\n\n`
|
|
268
|
+
const filterLabel = args.filter ? ` filter:"${args.filter}"` : ""
|
|
269
|
+
let output = `## Search Results for: "${args.query}" (${scope}${filterLabel})\n\n`
|
|
227
270
|
|
|
228
271
|
if (topScore < 0.45) {
|
|
229
|
-
output += `> **Low confidence results.** Best score: ${topScore.toFixed(3)}. These results may not be relevant to your query.\n> Try more specific keywords
|
|
272
|
+
output += `> **Low confidence results.** Best score: ${topScore.toFixed(3)}. These results may not be relevant to your query.\n> Try more specific keywords or different phrasing.\n\n`
|
|
230
273
|
}
|
|
231
274
|
|
|
232
275
|
for (let i = 0; i < sortedGroups.length; i++) {
|
|
@@ -235,7 +278,7 @@ Examples:
|
|
|
235
278
|
const indexLabel = args.searchAll ? ` [${r._index}]` : ""
|
|
236
279
|
const chunkNote = chunks.length > 1 ? ` (${chunks.length} matching sections)` : ""
|
|
237
280
|
|
|
238
|
-
//
|
|
281
|
+
// Rich metadata
|
|
239
282
|
const metaParts: string[] = []
|
|
240
283
|
if (r.language && r.language !== "unknown") metaParts.push(r.language)
|
|
241
284
|
if (r.heading_context) metaParts.push(`"${r.heading_context}"`)
|
|
@@ -243,7 +286,7 @@ Examples:
|
|
|
243
286
|
if (r.class_name) metaParts.push(`class: ${r.class_name}`)
|
|
244
287
|
const metaLine = metaParts.length > 0 ? ` (${metaParts.join(", ")})` : ""
|
|
245
288
|
|
|
246
|
-
// Score breakdown
|
|
289
|
+
// Score breakdown
|
|
247
290
|
const breakdownParts: string[] = [`vec: ${(r._vectorScore ?? 0).toFixed(2)}`]
|
|
248
291
|
if (r._bm25Component > 0.005) breakdownParts.push(`bm25: +${r._bm25Component.toFixed(2)}`)
|
|
249
292
|
if (r._keywordBonus > 0.005) breakdownParts.push(`kw: +${r._keywordBonus.toFixed(2)}`)
|
|
@@ -261,7 +304,7 @@ Examples:
|
|
|
261
304
|
output += content
|
|
262
305
|
output += "\n```\n"
|
|
263
306
|
|
|
264
|
-
//
|
|
307
|
+
// Second-best chunk hint
|
|
265
308
|
if (chunks.length > 1) {
|
|
266
309
|
const second = chunks.find((c: any) => c !== r)
|
|
267
310
|
if (second) {
|
package/vectorizer/index.ts
CHANGED
|
@@ -85,6 +85,15 @@ let HYBRID_CONFIG = { ...DEFAULT_HYBRID_CONFIG };
|
|
|
85
85
|
let METRICS_ENABLED = false;
|
|
86
86
|
let CACHE_ENABLED = true;
|
|
87
87
|
|
|
88
|
+
// ── Search defaults (exposed to tool layer) ──────────────────────────────────
|
|
89
|
+
const DEFAULT_SEARCH_CONFIG = {
|
|
90
|
+
freshen: false, // Don't freshen on every search — auto_index handles it
|
|
91
|
+
min_score: 0.35, // Minimum relevance score cutoff
|
|
92
|
+
include_archived: false, // Exclude archived files by default
|
|
93
|
+
default_limit: 10, // Default result count
|
|
94
|
+
};
|
|
95
|
+
let SEARCH_CONFIG = { ...DEFAULT_SEARCH_CONFIG };
|
|
96
|
+
|
|
88
97
|
// ── Graph config (v3) ───────────────────────────────────────────────────────
|
|
89
98
|
const DEFAULT_GRAPH_CONFIG = {
|
|
90
99
|
enabled: true,
|
|
@@ -135,6 +144,10 @@ function defaultVectorizerYaml() {
|
|
|
135
144
|
` search:\n` +
|
|
136
145
|
` hybrid: true\n` +
|
|
137
146
|
` bm25_weight: 0.3\n` +
|
|
147
|
+
` freshen: false # Don't re-index on every search (auto_index handles it)\n` +
|
|
148
|
+
` min_score: 0.35 # Minimum relevance score cutoff\n` +
|
|
149
|
+
` include_archived: false # Exclude archived files\n` +
|
|
150
|
+
` default_limit: 10 # Default number of results\n` +
|
|
138
151
|
`\n` +
|
|
139
152
|
` # Graph-based context (v3)\n` +
|
|
140
153
|
` graph:\n` +
|
|
@@ -299,6 +312,11 @@ async function loadConfig(projectRoot) {
|
|
|
299
312
|
enabled: parseBool(ss, "hybrid", false),
|
|
300
313
|
bm25_weight: parseNumber(ss, "bm25_weight", 0.3),
|
|
301
314
|
};
|
|
315
|
+
// Extended search defaults
|
|
316
|
+
SEARCH_CONFIG.freshen = parseBool(ss, "freshen", DEFAULT_SEARCH_CONFIG.freshen);
|
|
317
|
+
SEARCH_CONFIG.min_score = parseNumber(ss, "min_score", DEFAULT_SEARCH_CONFIG.min_score);
|
|
318
|
+
SEARCH_CONFIG.include_archived = parseBool(ss, "include_archived", DEFAULT_SEARCH_CONFIG.include_archived);
|
|
319
|
+
SEARCH_CONFIG.default_limit = parseNumber(ss, "default_limit", DEFAULT_SEARCH_CONFIG.default_limit);
|
|
302
320
|
}
|
|
303
321
|
|
|
304
322
|
// ── Parse quality config ────────────────────────────────────────────────
|
|
@@ -442,17 +460,24 @@ class CodebaseIndexer {
|
|
|
442
460
|
await this.loadHashes();
|
|
443
461
|
|
|
444
462
|
// Graph DB — only if graph is enabled in config
|
|
463
|
+
// Non-fatal: if LevelDB lock fails (parallel access), search works without graph
|
|
445
464
|
if (GRAPH_CONFIG.enabled) {
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
this.
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
465
|
+
try {
|
|
466
|
+
const graphType = this.indexName === "docs" ? "doc_graph" : "code_graph";
|
|
467
|
+
const graphPath = path.join(this.root, ".opencode", "graph", graphType);
|
|
468
|
+
await fs.mkdir(path.dirname(graphPath), { recursive: true });
|
|
469
|
+
this.graphDB = await new GraphDB(graphPath).init();
|
|
470
|
+
this.graphBuilder = new GraphBuilder(
|
|
471
|
+
this.graphDB,
|
|
472
|
+
this.root,
|
|
473
|
+
GRAPH_CONFIG.lsp.enabled,
|
|
474
|
+
GRAPH_CONFIG.lsp.timeout_ms,
|
|
475
|
+
);
|
|
476
|
+
} catch (e) {
|
|
477
|
+
if (DEBUG) console.log(`[vectorizer] GraphDB init failed (lock?): ${e.message || e}`);
|
|
478
|
+
this.graphDB = null;
|
|
479
|
+
this.graphBuilder = null;
|
|
480
|
+
}
|
|
456
481
|
}
|
|
457
482
|
|
|
458
483
|
// Usage tracker — provenance & usage stats
|
|
@@ -1305,4 +1330,69 @@ function getEmbeddingModel() {
|
|
|
1305
1330
|
return EMBEDDING_MODEL;
|
|
1306
1331
|
}
|
|
1307
1332
|
|
|
1308
|
-
|
|
1333
|
+
function getSearchConfig() {
|
|
1334
|
+
return SEARCH_CONFIG;
|
|
1335
|
+
}
|
|
1336
|
+
|
|
1337
|
+
// ── Singleton indexer pool ──────────────────────────────────────────────────
|
|
1338
|
+
// Prevents LevelDB lock conflicts when parallel searches hit the same index.
|
|
1339
|
+
// Each unique (projectRoot, indexName) gets one shared CodebaseIndexer.
|
|
1340
|
+
const _indexerPool = new Map<string, { indexer: CodebaseIndexer; refCount: number; initPromise: Promise<CodebaseIndexer> }>();
|
|
1341
|
+
|
|
1342
|
+
/**
|
|
1343
|
+
* Get or create a shared CodebaseIndexer for the given project + index.
|
|
1344
|
+
* Multiple callers get the same instance — no LevelDB lock conflicts.
|
|
1345
|
+
*
|
|
1346
|
+
* Usage:
|
|
1347
|
+
* const indexer = await getIndexer(projectRoot, "code");
|
|
1348
|
+
* try {
|
|
1349
|
+
* const results = await indexer.search(...);
|
|
1350
|
+
* } finally {
|
|
1351
|
+
* releaseIndexer(projectRoot, "code");
|
|
1352
|
+
* }
|
|
1353
|
+
*/
|
|
1354
|
+
async function getIndexer(projectRoot: string, indexName: string = "code"): Promise<CodebaseIndexer> {
|
|
1355
|
+
const key = `${projectRoot}::${indexName}`;
|
|
1356
|
+
const existing = _indexerPool.get(key);
|
|
1357
|
+
if (existing) {
|
|
1358
|
+
existing.refCount++;
|
|
1359
|
+
return existing.initPromise;
|
|
1360
|
+
}
|
|
1361
|
+
|
|
1362
|
+
const indexer = new CodebaseIndexer(projectRoot, indexName);
|
|
1363
|
+
const initPromise = indexer.init();
|
|
1364
|
+
_indexerPool.set(key, { indexer, refCount: 1, initPromise });
|
|
1365
|
+
return initPromise;
|
|
1366
|
+
}
|
|
1367
|
+
|
|
1368
|
+
/**
|
|
1369
|
+
* Release a reference to a shared indexer. When refCount reaches 0,
|
|
1370
|
+
* the indexer is kept alive (for future reuse) but model memory is freed.
|
|
1371
|
+
* Call destroyIndexer() to fully close and remove from pool.
|
|
1372
|
+
*/
|
|
1373
|
+
function releaseIndexer(projectRoot: string, indexName: string = "code") {
|
|
1374
|
+
const key = `${projectRoot}::${indexName}`;
|
|
1375
|
+
const entry = _indexerPool.get(key);
|
|
1376
|
+
if (!entry) return;
|
|
1377
|
+
entry.refCount = Math.max(0, entry.refCount - 1);
|
|
1378
|
+
// Keep in pool — don't unload. Next search reuses the same instance.
|
|
1379
|
+
}
|
|
1380
|
+
|
|
1381
|
+
/**
|
|
1382
|
+
* Fully close and remove an indexer from the pool.
|
|
1383
|
+
* Used by CLI clear/reindex operations that need a fresh state.
|
|
1384
|
+
*/
|
|
1385
|
+
async function destroyIndexer(projectRoot: string, indexName: string = "code") {
|
|
1386
|
+
const key = `${projectRoot}::${indexName}`;
|
|
1387
|
+
const entry = _indexerPool.get(key);
|
|
1388
|
+
if (!entry) return;
|
|
1389
|
+
_indexerPool.delete(key);
|
|
1390
|
+
try {
|
|
1391
|
+
const indexer = await entry.initPromise;
|
|
1392
|
+
await indexer.unloadModel();
|
|
1393
|
+
} catch {
|
|
1394
|
+
// best effort
|
|
1395
|
+
}
|
|
1396
|
+
}
|
|
1397
|
+
|
|
1398
|
+
export { CodebaseIndexer, INDEX_PRESETS, getEmbeddingModel, getSearchConfig, getIndexer, releaseIndexer, destroyIndexer };
|