@comfanion/usethis_search 3.0.0-dev.22 → 3.0.0-dev.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/tools/search.ts +156 -98
- package/vectorizer/index.ts +23 -1
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@comfanion/usethis_search",
|
|
3
|
-
"version": "3.0.0-dev.
|
|
3
|
+
"version": "3.0.0-dev.24",
|
|
4
4
|
"description": "OpenCode plugin: semantic search with graph-based context (v3: graph relations, 1-hop context, LSP + regex analyzers)",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./index.ts",
|
package/tools/search.ts
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Semantic Code Search Tool (
|
|
2
|
+
* Semantic Code Search Tool (v3)
|
|
3
3
|
*
|
|
4
4
|
* Uses local embeddings + LanceDB vector store via bundled vectorizer.
|
|
5
|
-
*
|
|
5
|
+
* v3: simplified agent API — 5 params, config-driven defaults, smart filter.
|
|
6
6
|
* Index data is stored in `.opencode/vectors/<index>/`.
|
|
7
7
|
*/
|
|
8
8
|
|
|
@@ -10,7 +10,82 @@ import { tool } from "@opencode-ai/plugin"
|
|
|
10
10
|
import path from "path"
|
|
11
11
|
import fs from "fs/promises"
|
|
12
12
|
|
|
13
|
-
import { CodebaseIndexer } from "../vectorizer/index.ts"
|
|
13
|
+
import { CodebaseIndexer, getSearchConfig } from "../vectorizer/index.ts"
|
|
14
|
+
|
|
15
|
+
// ── Extension → language mapping (for filter parsing) ─────────────────────
|
|
16
|
+
const EXT_TO_LANG: Record<string, string> = {
|
|
17
|
+
go: "go", py: "python", ts: "typescript", tsx: "typescript",
|
|
18
|
+
js: "javascript", jsx: "javascript", mjs: "javascript", cjs: "javascript",
|
|
19
|
+
rs: "rust", java: "java", kt: "kotlin", swift: "swift",
|
|
20
|
+
c: "c", cpp: "cpp", h: "c", hpp: "cpp", cs: "csharp",
|
|
21
|
+
rb: "ruby", php: "php", scala: "scala", clj: "clojure",
|
|
22
|
+
md: "markdown", mdx: "markdown", txt: "text",
|
|
23
|
+
yaml: "yaml", yml: "yaml", json: "json", toml: "toml",
|
|
24
|
+
}
|
|
25
|
+
const LANG_NAMES = new Set(Object.values(EXT_TO_LANG))
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Parse the `filter` param into path prefix and/or language filter.
|
|
29
|
+
*
|
|
30
|
+
* Supported formats:
|
|
31
|
+
* "internal/domain/" → pathPrefix = "internal/domain"
|
|
32
|
+
* "*.go" → language = "go"
|
|
33
|
+
* ".go" → language = "go"
|
|
34
|
+
* "go" → language = "go"
|
|
35
|
+
* "internal/*.go" → pathPrefix = "internal", language = "go"
|
|
36
|
+
* "internal/**\/*.go" → pathPrefix = "internal", language = "go"
|
|
37
|
+
* "service" → pathContains = "service"
|
|
38
|
+
*/
|
|
39
|
+
function parseFilter(filter: string): {
|
|
40
|
+
pathPrefix?: string
|
|
41
|
+
language?: string
|
|
42
|
+
pathContains?: string
|
|
43
|
+
} {
|
|
44
|
+
if (!filter) return {}
|
|
45
|
+
|
|
46
|
+
const f = filter.trim()
|
|
47
|
+
|
|
48
|
+
// "internal/**/*.go" or "internal/*.go" → path + extension
|
|
49
|
+
const globMatch = f.match(/^([^*]+?)(?:\/\*\*)?\/?\*\.(\w+)$/)
|
|
50
|
+
if (globMatch) {
|
|
51
|
+
const prefix = globMatch[1].replace(/\/+$/, "")
|
|
52
|
+
const ext = globMatch[2]
|
|
53
|
+
return {
|
|
54
|
+
pathPrefix: prefix,
|
|
55
|
+
language: EXT_TO_LANG[ext] || undefined,
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// "*.go" or ".go" → extension only
|
|
60
|
+
const extMatch = f.match(/^\*?\.(\w+)$/)
|
|
61
|
+
if (extMatch) {
|
|
62
|
+
const ext = extMatch[1]
|
|
63
|
+
return { language: EXT_TO_LANG[ext] || undefined }
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// "go", "python", "typescript" → language name
|
|
67
|
+
const lower = f.toLowerCase()
|
|
68
|
+
if (LANG_NAMES.has(lower)) {
|
|
69
|
+
return { language: lower }
|
|
70
|
+
}
|
|
71
|
+
// "go" could also be ext
|
|
72
|
+
if (EXT_TO_LANG[lower]) {
|
|
73
|
+
return { language: EXT_TO_LANG[lower] }
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// Ends with "/" → path prefix
|
|
77
|
+
if (f.endsWith("/")) {
|
|
78
|
+
return { pathPrefix: f.replace(/\/+$/, "") }
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// Contains "/" → path prefix (e.g. "internal/domain")
|
|
82
|
+
if (f.includes("/")) {
|
|
83
|
+
return { pathPrefix: f.replace(/\/+$/, "") }
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
// Anything else → substring match on file path
|
|
87
|
+
return { pathContains: f }
|
|
88
|
+
}
|
|
14
89
|
|
|
15
90
|
export default tool({
|
|
16
91
|
description: `Search the codebase semantically. Use this to find relevant code snippets, functions, or files based on meaning, not just text matching.
|
|
@@ -22,84 +97,51 @@ Available indexes:
|
|
|
22
97
|
- searchAll: true - Search across all indexes
|
|
23
98
|
|
|
24
99
|
Examples:
|
|
25
|
-
- "authentication logic"
|
|
26
|
-
-
|
|
27
|
-
-
|
|
28
|
-
- "
|
|
29
|
-
- search({ query: "
|
|
100
|
+
- search({ query: "authentication logic" })
|
|
101
|
+
- search({ query: "how to deploy", index: "docs" })
|
|
102
|
+
- search({ query: "tenant management", filter: "internal/domain/" })
|
|
103
|
+
- search({ query: "event handling", filter: "*.go" })
|
|
104
|
+
- search({ query: "API routes", filter: "internal/**/*.go" })
|
|
105
|
+
- search({ query: "metrics", searchAll: true })`,
|
|
30
106
|
|
|
31
107
|
args: {
|
|
32
108
|
query: tool.schema.string().describe("Semantic search query describing what you're looking for"),
|
|
33
|
-
index: tool.schema.string().optional().default("code").describe("Index to search: code, docs, config
|
|
34
|
-
limit: tool.schema.number().optional().
|
|
109
|
+
index: tool.schema.string().optional().default("code").describe("Index to search: code, docs, config"),
|
|
110
|
+
limit: tool.schema.number().optional().describe("Number of results (default from config, typically 10)"),
|
|
35
111
|
searchAll: tool.schema.boolean().optional().default(false).describe("Search all indexes instead of just one"),
|
|
36
|
-
|
|
37
|
-
includeArchived: tool.schema.boolean().optional().default(false).describe("Include archived files in results (default: false). Files are archived if in /archive/ folder or have 'archived: true' in frontmatter."),
|
|
38
|
-
// v2 params
|
|
39
|
-
hybrid: tool.schema.boolean().optional().describe("Enable hybrid search (vector + BM25 keyword matching). Improves exact keyword recall."),
|
|
40
|
-
fileType: tool.schema.string().optional().describe("Filter by file type: 'code', 'docs', or 'config'"),
|
|
41
|
-
language: tool.schema.string().optional().describe("Filter by language: 'typescript', 'python', 'markdown', etc."),
|
|
42
|
-
modifiedAfter: tool.schema.string().optional().describe("Filter: only files modified after this ISO date (e.g. '2024-01-01')"),
|
|
43
|
-
modifiedBefore: tool.schema.string().optional().describe("Filter: only files modified before this ISO date"),
|
|
44
|
-
tags: tool.schema.string().optional().describe("Filter by frontmatter tags (comma-separated, e.g. 'auth,security')"),
|
|
45
|
-
minScore: tool.schema.number().optional().default(0.35).describe("Minimum relevance score (0-1). Results below this threshold are dropped. Default: 0.35"),
|
|
46
|
-
path: tool.schema.string().optional().describe("Filter by file path prefix (e.g. 'internal/domain/', 'src/components'). Only returns files under this path."),
|
|
112
|
+
filter: tool.schema.string().optional().describe("Filter results by path or language. Examples: 'internal/domain/', '*.go', 'internal/**/*.go', 'service'"),
|
|
47
113
|
},
|
|
48
114
|
|
|
49
115
|
async execute(args) {
|
|
50
116
|
const projectRoot = process.cwd()
|
|
51
117
|
|
|
52
118
|
try {
|
|
53
|
-
|
|
54
|
-
const
|
|
119
|
+
// Load config defaults (parsed from vectorizer.yaml)
|
|
120
|
+
const cfg = getSearchConfig()
|
|
121
|
+
const limit = args.limit || cfg.default_limit || 10
|
|
55
122
|
const indexName = args.index || "code"
|
|
123
|
+
const minScore = cfg.min_score ?? 0.35
|
|
124
|
+
const includeArchived = cfg.include_archived ?? false
|
|
56
125
|
|
|
57
|
-
//
|
|
126
|
+
// Parse filter into path/language constraints
|
|
127
|
+
const filterParsed = args.filter ? parseFilter(args.filter) : {}
|
|
128
|
+
|
|
129
|
+
// Build search options — hybrid is always from per-index config
|
|
58
130
|
const searchOptions: Record<string, any> = {}
|
|
59
|
-
if (
|
|
60
|
-
|
|
61
|
-
//
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
c: "c", cpp: "cpp", h: "c", hpp: "cpp", cs: "csharp",
|
|
70
|
-
rb: "ruby", php: "php", scala: "scala", clj: "clojure",
|
|
71
|
-
md: "markdown", mdx: "markdown", txt: "text",
|
|
72
|
-
yaml: "yaml", yml: "yaml", json: "json", toml: "toml",
|
|
73
|
-
}
|
|
74
|
-
// Also accept full language names
|
|
75
|
-
const langNames = new Set([
|
|
76
|
-
"go", "python", "typescript", "javascript", "rust", "java", "kotlin",
|
|
77
|
-
"swift", "c", "cpp", "csharp", "ruby", "php", "scala", "clojure",
|
|
78
|
-
"markdown", "text", "yaml", "json", "toml",
|
|
79
|
-
])
|
|
80
|
-
|
|
81
|
-
if (ft === "code" || ft === "docs" || ft === "config") {
|
|
82
|
-
searchOptions.fileType = ft
|
|
83
|
-
} else if (extToLanguage[ft]) {
|
|
84
|
-
searchOptions.language = extToLanguage[ft]
|
|
85
|
-
} else if (langNames.has(ft)) {
|
|
86
|
-
searchOptions.language = ft
|
|
87
|
-
} else {
|
|
88
|
-
searchOptions.fileType = ft // pass through as-is
|
|
131
|
+
if (filterParsed.language) searchOptions.language = filterParsed.language
|
|
132
|
+
|
|
133
|
+
// Freshen from config (default: false — auto_index handles it)
|
|
134
|
+
if (cfg.freshen) {
|
|
135
|
+
try {
|
|
136
|
+
const tempIndexer = await new CodebaseIndexer(projectRoot, indexName).init()
|
|
137
|
+
await tempIndexer.freshen()
|
|
138
|
+
await tempIndexer.unloadModel()
|
|
139
|
+
} catch {
|
|
140
|
+
// non-fatal — search can proceed without freshen
|
|
89
141
|
}
|
|
90
142
|
}
|
|
91
143
|
|
|
92
|
-
|
|
93
|
-
if (args.modifiedAfter) searchOptions.modifiedAfter = args.modifiedAfter
|
|
94
|
-
if (args.modifiedBefore) searchOptions.modifiedBefore = args.modifiedBefore
|
|
95
|
-
if (args.tags) searchOptions.tags = args.tags.split(",").map((t: string) => t.trim()).filter(Boolean)
|
|
96
|
-
|
|
97
|
-
// Auto-freshen stale files before searching
|
|
98
|
-
if (args.freshen !== false) {
|
|
99
|
-
const tempIndexer = await new CodebaseIndexer(projectRoot, indexName).init()
|
|
100
|
-
await tempIndexer.freshen()
|
|
101
|
-
await tempIndexer.unloadModel()
|
|
102
|
-
}
|
|
144
|
+
let allResults: any[] = []
|
|
103
145
|
|
|
104
146
|
if (args.searchAll) {
|
|
105
147
|
const tempIndexer = await new CodebaseIndexer(projectRoot, "code").init()
|
|
@@ -107,21 +149,17 @@ Examples:
|
|
|
107
149
|
await tempIndexer.unloadModel()
|
|
108
150
|
|
|
109
151
|
if (indexes.length === 0) {
|
|
110
|
-
return `No indexes found.
|
|
152
|
+
return `No indexes found. The codebase needs to be indexed first.\n\nRun the CLI: bunx usethis_search reindex`
|
|
111
153
|
}
|
|
112
154
|
|
|
113
155
|
for (const idx of indexes) {
|
|
114
156
|
const indexer = await new CodebaseIndexer(projectRoot, idx).init()
|
|
115
|
-
|
|
116
|
-
await indexer.freshen()
|
|
117
|
-
}
|
|
118
|
-
const results = await indexer.search(args.query, limit, args.includeArchived, searchOptions)
|
|
157
|
+
const results = await indexer.search(args.query, limit, includeArchived, searchOptions)
|
|
119
158
|
allResults.push(...results.map((r: any) => ({ ...r, _index: idx })))
|
|
120
159
|
await indexer.unloadModel()
|
|
121
160
|
}
|
|
122
161
|
|
|
123
162
|
allResults.sort((a, b) => {
|
|
124
|
-
// Prefer combinedScore (hybrid), fall back to L2→similarity conversion
|
|
125
163
|
const scoreA = a._combinedScore ?? (a._distance != null ? Math.max(0, 1 - a._distance / 2) : 0)
|
|
126
164
|
const scoreB = b._combinedScore ?? (b._distance != null ? Math.max(0, 1 - b._distance / 2) : 0)
|
|
127
165
|
return scoreB - scoreA
|
|
@@ -145,43 +183,53 @@ Examples:
|
|
|
145
183
|
}
|
|
146
184
|
|
|
147
185
|
const indexer = await new CodebaseIndexer(projectRoot, indexName).init()
|
|
148
|
-
const results = await indexer.search(args.query, limit,
|
|
186
|
+
const results = await indexer.search(args.query, limit, includeArchived, searchOptions)
|
|
149
187
|
allResults = results.map((r: any) => ({ ...r, _index: indexName }))
|
|
150
188
|
await indexer.unloadModel()
|
|
151
189
|
}
|
|
152
190
|
|
|
153
191
|
// ── Score cutoff — drop low-relevance results ──────────────────────────
|
|
154
|
-
const minScore = args.minScore ?? 0.35
|
|
155
192
|
allResults = allResults.filter(r => {
|
|
156
193
|
const score = r._combinedScore ?? (r._distance != null ? Math.max(0, 1 - r._distance / 2) : 0)
|
|
157
194
|
return score >= minScore
|
|
158
195
|
})
|
|
159
196
|
|
|
160
|
-
// ──
|
|
161
|
-
if (
|
|
162
|
-
const prefix =
|
|
197
|
+
// ── Filter — apply path/language constraints from `filter` param ───────
|
|
198
|
+
if (filterParsed.pathPrefix) {
|
|
199
|
+
const prefix = filterParsed.pathPrefix
|
|
163
200
|
allResults = allResults.filter(r => r.file && r.file.startsWith(prefix))
|
|
164
201
|
}
|
|
202
|
+
if (filterParsed.pathContains) {
|
|
203
|
+
const needle = filterParsed.pathContains.toLowerCase()
|
|
204
|
+
allResults = allResults.filter(r => r.file && r.file.toLowerCase().includes(needle))
|
|
205
|
+
}
|
|
206
|
+
// Language filter is already passed to searchOptions above, but double-check
|
|
207
|
+
// in case vectorizer didn't filter (e.g. docs index has no language field)
|
|
208
|
+
if (filterParsed.language) {
|
|
209
|
+
allResults = allResults.filter(r => !r.language || r.language === filterParsed.language || r.language === "unknown")
|
|
210
|
+
}
|
|
165
211
|
|
|
166
212
|
// ── Reranking — boost results where query keywords appear in text ──────
|
|
167
213
|
const queryKeywords = args.query.toLowerCase().split(/\s+/).filter((w: string) => w.length > 2)
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
214
|
+
for (const r of allResults) {
|
|
215
|
+
const vectorScore = r._distance != null ? Math.max(0, 1 - r._distance / 2) : 0
|
|
216
|
+
r._vectorScore = vectorScore
|
|
217
|
+
r._bm25Component = r._combinedScore != null ? Math.max(0, r._combinedScore - vectorScore) : 0
|
|
218
|
+
const baseScore = r._combinedScore ?? vectorScore
|
|
219
|
+
|
|
220
|
+
const text = (r.content || "").toLowerCase()
|
|
221
|
+
const matchedKeywords: string[] = []
|
|
222
|
+
if (queryKeywords.length > 0) {
|
|
172
223
|
for (const kw of queryKeywords) {
|
|
173
|
-
if (text.includes(kw))
|
|
224
|
+
if (text.includes(kw)) matchedKeywords.push(kw)
|
|
174
225
|
}
|
|
175
|
-
const keywordBonus = queryKeywords.length > 0 ? (keywordHits / queryKeywords.length) * 0.15 : 0
|
|
176
|
-
const baseScore = r._combinedScore ?? (r._distance != null ? Math.max(0, 1 - r._distance / 2) : 0)
|
|
177
|
-
r._finalScore = baseScore + keywordBonus
|
|
178
|
-
}
|
|
179
|
-
allResults.sort((a: any, b: any) => (b._finalScore ?? 0) - (a._finalScore ?? 0))
|
|
180
|
-
} else {
|
|
181
|
-
for (const r of allResults) {
|
|
182
|
-
r._finalScore = r._combinedScore ?? (r._distance != null ? Math.max(0, 1 - r._distance / 2) : 0)
|
|
183
226
|
}
|
|
227
|
+
r._matchedKeywords = matchedKeywords
|
|
228
|
+
const keywordBonus = queryKeywords.length > 0 ? (matchedKeywords.length / queryKeywords.length) * 0.15 : 0
|
|
229
|
+
r._keywordBonus = keywordBonus
|
|
230
|
+
r._finalScore = baseScore + keywordBonus
|
|
184
231
|
}
|
|
232
|
+
allResults.sort((a: any, b: any) => (b._finalScore ?? 0) - (a._finalScore ?? 0))
|
|
185
233
|
|
|
186
234
|
// ── Group by file — best chunk per file, with chunk count ─────────────
|
|
187
235
|
const fileGroups = new Map<string, { best: any, chunks: any[] }>()
|
|
@@ -198,25 +246,24 @@ Examples:
|
|
|
198
246
|
}
|
|
199
247
|
}
|
|
200
248
|
|
|
201
|
-
// Sort groups by best chunk score, take top N unique files
|
|
202
249
|
const sortedGroups = [...fileGroups.values()]
|
|
203
250
|
.sort((a, b) => (b.best._finalScore ?? 0) - (a.best._finalScore ?? 0))
|
|
204
251
|
.slice(0, limit)
|
|
205
252
|
|
|
206
253
|
if (sortedGroups.length === 0) {
|
|
207
254
|
const scope = args.searchAll ? "any index" : `index "${indexName}"`
|
|
208
|
-
|
|
255
|
+
const filterNote = args.filter ? ` with filter "${args.filter}"` : ""
|
|
256
|
+
return `No results found in ${scope}${filterNote} for: "${args.query}" (min score: ${minScore})\n\nTry:\n- Different keywords or phrasing\n- Remove or broaden the filter\n- search({ query: "...", searchAll: true })`
|
|
209
257
|
}
|
|
210
258
|
|
|
211
259
|
// ── Confidence signal ──────────────────────────────────────────────────
|
|
212
260
|
const topScore = sortedGroups[0].best._finalScore ?? 0
|
|
213
261
|
const scope = args.searchAll ? "all indexes" : `index "${indexName}"`
|
|
214
|
-
const
|
|
215
|
-
|
|
216
|
-
let output = `## Search Results for: "${args.query}" (${scope}${hybridLabel}${pathLabel})\n\n`
|
|
262
|
+
const filterLabel = args.filter ? ` filter:"${args.filter}"` : ""
|
|
263
|
+
let output = `## Search Results for: "${args.query}" (${scope}${filterLabel})\n\n`
|
|
217
264
|
|
|
218
265
|
if (topScore < 0.45) {
|
|
219
|
-
output += `> **Low confidence results.** Best score: ${topScore.toFixed(3)}. These results may not be relevant to your query.\n> Try more specific keywords
|
|
266
|
+
output += `> **Low confidence results.** Best score: ${topScore.toFixed(3)}. These results may not be relevant to your query.\n> Try more specific keywords or different phrasing.\n\n`
|
|
220
267
|
}
|
|
221
268
|
|
|
222
269
|
for (let i = 0; i < sortedGroups.length; i++) {
|
|
@@ -225,7 +272,7 @@ Examples:
|
|
|
225
272
|
const indexLabel = args.searchAll ? ` [${r._index}]` : ""
|
|
226
273
|
const chunkNote = chunks.length > 1 ? ` (${chunks.length} matching sections)` : ""
|
|
227
274
|
|
|
228
|
-
//
|
|
275
|
+
// Rich metadata
|
|
229
276
|
const metaParts: string[] = []
|
|
230
277
|
if (r.language && r.language !== "unknown") metaParts.push(r.language)
|
|
231
278
|
if (r.heading_context) metaParts.push(`"${r.heading_context}"`)
|
|
@@ -233,14 +280,25 @@ Examples:
|
|
|
233
280
|
if (r.class_name) metaParts.push(`class: ${r.class_name}`)
|
|
234
281
|
const metaLine = metaParts.length > 0 ? ` (${metaParts.join(", ")})` : ""
|
|
235
282
|
|
|
283
|
+
// Score breakdown
|
|
284
|
+
const breakdownParts: string[] = [`vec: ${(r._vectorScore ?? 0).toFixed(2)}`]
|
|
285
|
+
if (r._bm25Component > 0.005) breakdownParts.push(`bm25: +${r._bm25Component.toFixed(2)}`)
|
|
286
|
+
if (r._keywordBonus > 0.005) breakdownParts.push(`kw: +${r._keywordBonus.toFixed(2)}`)
|
|
287
|
+
const breakdown = breakdownParts.join(", ")
|
|
288
|
+
|
|
289
|
+
// Matched keywords
|
|
290
|
+
const kwDisplay = r._matchedKeywords && r._matchedKeywords.length > 0
|
|
291
|
+
? ` | matched: "${r._matchedKeywords.join('", "')}"`
|
|
292
|
+
: ""
|
|
293
|
+
|
|
236
294
|
output += `### ${i + 1}. ${r.file}${indexLabel}${chunkNote}\n`
|
|
237
|
-
output += `**
|
|
295
|
+
output += `**Score:** ${score} (${breakdown}${kwDisplay})${metaLine}\n\n`
|
|
238
296
|
output += "```\n"
|
|
239
297
|
const content = r.content.length > 500 ? r.content.substring(0, 500) + "\n... (truncated)" : r.content
|
|
240
298
|
output += content
|
|
241
299
|
output += "\n```\n"
|
|
242
300
|
|
|
243
|
-
//
|
|
301
|
+
// Second-best chunk hint
|
|
244
302
|
if (chunks.length > 1) {
|
|
245
303
|
const second = chunks.find((c: any) => c !== r)
|
|
246
304
|
if (second) {
|
package/vectorizer/index.ts
CHANGED
|
@@ -85,6 +85,15 @@ let HYBRID_CONFIG = { ...DEFAULT_HYBRID_CONFIG };
|
|
|
85
85
|
let METRICS_ENABLED = false;
|
|
86
86
|
let CACHE_ENABLED = true;
|
|
87
87
|
|
|
88
|
+
// ── Search defaults (exposed to tool layer) ──────────────────────────────────
|
|
89
|
+
const DEFAULT_SEARCH_CONFIG = {
|
|
90
|
+
freshen: false, // Don't freshen on every search — auto_index handles it
|
|
91
|
+
min_score: 0.35, // Minimum relevance score cutoff
|
|
92
|
+
include_archived: false, // Exclude archived files by default
|
|
93
|
+
default_limit: 10, // Default result count
|
|
94
|
+
};
|
|
95
|
+
let SEARCH_CONFIG = { ...DEFAULT_SEARCH_CONFIG };
|
|
96
|
+
|
|
88
97
|
// ── Graph config (v3) ───────────────────────────────────────────────────────
|
|
89
98
|
const DEFAULT_GRAPH_CONFIG = {
|
|
90
99
|
enabled: true,
|
|
@@ -135,6 +144,10 @@ function defaultVectorizerYaml() {
|
|
|
135
144
|
` search:\n` +
|
|
136
145
|
` hybrid: true\n` +
|
|
137
146
|
` bm25_weight: 0.3\n` +
|
|
147
|
+
` freshen: false # Don't re-index on every search (auto_index handles it)\n` +
|
|
148
|
+
` min_score: 0.35 # Minimum relevance score cutoff\n` +
|
|
149
|
+
` include_archived: false # Exclude archived files\n` +
|
|
150
|
+
` default_limit: 10 # Default number of results\n` +
|
|
138
151
|
`\n` +
|
|
139
152
|
` # Graph-based context (v3)\n` +
|
|
140
153
|
` graph:\n` +
|
|
@@ -299,6 +312,11 @@ async function loadConfig(projectRoot) {
|
|
|
299
312
|
enabled: parseBool(ss, "hybrid", false),
|
|
300
313
|
bm25_weight: parseNumber(ss, "bm25_weight", 0.3),
|
|
301
314
|
};
|
|
315
|
+
// Extended search defaults
|
|
316
|
+
SEARCH_CONFIG.freshen = parseBool(ss, "freshen", DEFAULT_SEARCH_CONFIG.freshen);
|
|
317
|
+
SEARCH_CONFIG.min_score = parseNumber(ss, "min_score", DEFAULT_SEARCH_CONFIG.min_score);
|
|
318
|
+
SEARCH_CONFIG.include_archived = parseBool(ss, "include_archived", DEFAULT_SEARCH_CONFIG.include_archived);
|
|
319
|
+
SEARCH_CONFIG.default_limit = parseNumber(ss, "default_limit", DEFAULT_SEARCH_CONFIG.default_limit);
|
|
302
320
|
}
|
|
303
321
|
|
|
304
322
|
// ── Parse quality config ────────────────────────────────────────────────
|
|
@@ -1305,4 +1323,8 @@ function getEmbeddingModel() {
|
|
|
1305
1323
|
return EMBEDDING_MODEL;
|
|
1306
1324
|
}
|
|
1307
1325
|
|
|
1308
|
-
|
|
1326
|
+
function getSearchConfig() {
|
|
1327
|
+
return SEARCH_CONFIG;
|
|
1328
|
+
}
|
|
1329
|
+
|
|
1330
|
+
export { CodebaseIndexer, INDEX_PRESETS, getEmbeddingModel, getSearchConfig };
|