@comfanion/usethis_search 4.4.0 → 4.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/api.ts +34 -17
- package/cache/manager.ts +30 -19
- package/cli.ts +8 -5
- package/file-indexer.ts +28 -11
- package/hooks/message-before.ts +5 -5
- package/hooks/tool-substitution.ts +4 -120
- package/index.ts +17 -6
- package/package.json +3 -2
- package/tools/codeindex.ts +192 -184
- package/tools/graph.ts +265 -0
- package/tools/read-interceptor.ts +7 -3
- package/tools/search.ts +268 -190
- package/tools/workspace-state.ts +1 -2
- package/tools/workspace.ts +76 -108
- package/vectorizer/analyzers/lsp-client.ts +52 -6
- package/vectorizer/chunkers/chunker-factory.ts +6 -0
- package/vectorizer/chunkers/code-chunker.ts +73 -16
- package/vectorizer/chunkers/lsp-chunker.ts +313 -191
- package/vectorizer/graph-db.ts +6 -4
- package/vectorizer/index.ts +329 -134
- package/vectorizer/usage-tracker.ts +36 -0
- package/vectorizer.yaml +2 -2
package/tools/search.ts
CHANGED
|
@@ -13,7 +13,7 @@ import { tool } from "@opencode-ai/plugin"
|
|
|
13
13
|
import path from "path"
|
|
14
14
|
import fs from "fs/promises"
|
|
15
15
|
|
|
16
|
-
import { CodebaseIndexer, getSearchConfig, getDecomposerConfig, getIndexer, releaseIndexer } from "../vectorizer/index.ts"
|
|
16
|
+
import { CodebaseIndexer, getSearchConfig, getWorkspaceConfig, getDecomposerConfig, getIndexer, releaseIndexer } from "../vectorizer/index.ts"
|
|
17
17
|
import { workspaceCache } from "../cache/manager.ts"
|
|
18
18
|
import { buildWorkspaceOutput } from "./workspace-state.ts"
|
|
19
19
|
import { decomposeQuery } from "../vectorizer/query-decomposer.ts"
|
|
@@ -91,7 +91,8 @@ async function expandChunkContext(
|
|
|
91
91
|
if (alreadyAttached.has(chunkId)) continue
|
|
92
92
|
|
|
93
93
|
// Skip the main chunk itself
|
|
94
|
-
|
|
94
|
+
const mainChunkId = mainChunk.chunk_id || `${mainChunk.file}:chunk-${mainChunk.chunk_index ?? 0}`
|
|
95
|
+
if (chunkId === mainChunkId) continue
|
|
95
96
|
|
|
96
97
|
// Add other sections from same document
|
|
97
98
|
expanded.push({
|
|
@@ -188,11 +189,14 @@ Available indexes:
|
|
|
188
189
|
- "config" - Configuration files (*.yaml, *.json, etc.)
|
|
189
190
|
- searchAll: true - Search across all indexes
|
|
190
191
|
|
|
191
|
-
Auto-detects query type:
|
|
192
|
+
Auto-detects query type (per item in queries array):
|
|
192
193
|
- Semantic: "authentication logic" → vector search for relevant code
|
|
193
194
|
- File path: "docs/architecture.md" → attaches entire file to workspace
|
|
194
195
|
- Chunk ID: "src/auth.ts:chunk-5" → attaches specific chunk
|
|
195
196
|
|
|
197
|
+
Multi-query: pass multiple queries to search/attach in one call.
|
|
198
|
+
Each query is processed independently — semantic searches are merged, files/chunks are attached.
|
|
199
|
+
|
|
196
200
|
How workspace works:
|
|
197
201
|
- Top results are AUTO-ATTACHED to workspace with expanded context (class methods, imports, related code via graph)
|
|
198
202
|
- Workspace has a TOKEN BUDGET (~50K tokens, ~100 chunks). When full, oldest chunks are evicted
|
|
@@ -207,13 +211,19 @@ IMPORTANT: Chunks contain DIRECT file content dumps (raw code/text from files).
|
|
|
207
211
|
- Use Read tool only if you need content OUTSIDE the indexed chunks
|
|
208
212
|
|
|
209
213
|
Context management (CRITICAL — follow these rules):
|
|
210
|
-
- BEFORE searching a new topic, you MUST call
|
|
211
|
-
- Workspace has LIMITED token budget. If budget >60%, evict old chunks with
|
|
212
|
-
- Use
|
|
213
|
-
- After editing files, forget stale chunks:
|
|
214
|
+
- BEFORE searching a new topic, you MUST call forget() to remove irrelevant old context
|
|
215
|
+
- Workspace has LIMITED token budget. If budget >60%, evict old chunks with forget({ queries: ["5"] })
|
|
216
|
+
- Use clear() when switching to a completely different task
|
|
217
|
+
- After editing files, forget stale chunks: forget({ queries: ["edited-file.ts"] })
|
|
214
218
|
- The workspace is your working memory — KEEP IT FOCUSED. Stale context degrades search quality
|
|
215
219
|
- Rule of thumb: forget BEFORE you search, not after
|
|
216
220
|
|
|
221
|
+
Graph navigation workflow:
|
|
222
|
+
1. search({ queries: ["authentication"] }) → find relevant chunks
|
|
223
|
+
2. explore({ node: "chunk:src/auth.ts::AuthService" }) → see imports, callers, methods
|
|
224
|
+
3. search({ queries: ["chunk:src/auth.ts::login", "chunk:src/types/User.ts::User"] }) → attach specific chunks
|
|
225
|
+
4. forget() to clean up before next search
|
|
226
|
+
|
|
217
227
|
Filter narrows results by path or language:
|
|
218
228
|
- "internal/domain/" → only files under that path
|
|
219
229
|
- "*.go" → only Go files
|
|
@@ -221,19 +231,22 @@ Filter narrows results by path or language:
|
|
|
221
231
|
- "service" → files containing "service" in path
|
|
222
232
|
|
|
223
233
|
Examples:
|
|
224
|
-
- search({
|
|
225
|
-
- search({
|
|
226
|
-
- search({
|
|
227
|
-
- search({
|
|
228
|
-
- search({
|
|
229
|
-
- search({
|
|
230
|
-
- search({
|
|
231
|
-
- search({
|
|
234
|
+
- search({ queries: ["authentication logic"] })
|
|
235
|
+
- search({ queries: ["how to deploy"], index: "docs" })
|
|
236
|
+
- search({ queries: ["tenant management"], filter: "internal/domain/" })
|
|
237
|
+
- search({ queries: ["event handling"], filter: "*.go" })
|
|
238
|
+
- search({ queries: ["API routes"], filter: "internal/**/*.go" })
|
|
239
|
+
- search({ queries: ["metrics"], searchAll: true })
|
|
240
|
+
- search({ queries: ["docs/prd.md"] })
|
|
241
|
+
- search({ queries: ["src/auth.ts:chunk-5"] })
|
|
242
|
+
- search({ queries: ["auth logic", "user model", "api routes"] })
|
|
243
|
+
- search({ queries: ["src/auth.ts", "src/types/User.ts"] })
|
|
244
|
+
- search({ queries: ["chunk:src/auth.ts::login", "src/types/User.ts"] })`,
|
|
232
245
|
|
|
233
246
|
args: {
|
|
234
|
-
|
|
247
|
+
queries: tool.schema.array(tool.schema.string()).describe("Queries: semantic, file paths, or chunk IDs"),
|
|
235
248
|
index: tool.schema.string().optional().default("code").describe("Where to search: 'code', 'docs', or leave empty for auto-detect"),
|
|
236
|
-
limit: tool.schema.number().optional().describe("Max results (default: 10)"),
|
|
249
|
+
limit: tool.schema.number().optional().describe("Max results per query (default: 10)"),
|
|
237
250
|
searchAll: tool.schema.boolean().optional().default(false).describe("Search all indexes instead of one"),
|
|
238
251
|
filter: tool.schema.string().optional().describe("Filter by path/language: 'internal/domain/', '*.go', 'service'"),
|
|
239
252
|
},
|
|
@@ -242,36 +255,16 @@ Examples:
|
|
|
242
255
|
const projectRoot = process.cwd()
|
|
243
256
|
|
|
244
257
|
try {
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
258
|
+
const queryList: string[] = args.queries && args.queries.length > 0
|
|
259
|
+
? args.queries
|
|
260
|
+
: []
|
|
248
261
|
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
let chunkId: string | undefined
|
|
252
|
-
let filePath: string | undefined
|
|
253
|
-
let semanticQuery: string | undefined
|
|
254
|
-
|
|
255
|
-
// 1. Check if it's a chunk ID (contains ":chunk-")
|
|
256
|
-
if (args.query.includes(":chunk-")) {
|
|
257
|
-
mode = "chunkId"
|
|
258
|
-
chunkId = args.query
|
|
259
|
-
}
|
|
260
|
-
// 2. Check if it's a file path (has extension or starts with common paths)
|
|
261
|
-
else if (
|
|
262
|
-
args.query.match(/\.(md|ts|js|go|py|tsx|jsx|rs|java|kt|swift|txt|yaml|json|yml|toml)$/i) ||
|
|
263
|
-
args.query.match(/^(src|docs|internal|pkg|lib|app|pages|components|api)\//i) ||
|
|
264
|
-
args.query.includes("/")
|
|
265
|
-
) {
|
|
266
|
-
mode = "path"
|
|
267
|
-
filePath = args.query
|
|
268
|
-
}
|
|
269
|
-
// 3. Otherwise, it's a semantic search
|
|
270
|
-
else {
|
|
271
|
-
mode = "semantic"
|
|
272
|
-
semanticQuery = args.query
|
|
262
|
+
if (queryList.length === 0) {
|
|
263
|
+
return `Error: queries is required\n\nExamples:\n- search({ queries: ["authentication logic"] })\n- search({ queries: ["auth logic", "user model"] })\n- search({ queries: ["src/auth.ts:chunk-5"] })`
|
|
273
264
|
}
|
|
274
265
|
|
|
266
|
+
const isMulti = queryList.length > 1
|
|
267
|
+
|
|
275
268
|
// Load config defaults (parsed from vectorizer.yaml)
|
|
276
269
|
const cfg = getSearchConfig()
|
|
277
270
|
const limit = args.limit || cfg.default_limit || 10
|
|
@@ -279,100 +272,120 @@ Examples:
|
|
|
279
272
|
const minScore = cfg.min_score ?? 0.35
|
|
280
273
|
const includeArchived = cfg.include_archived ?? false
|
|
281
274
|
|
|
282
|
-
// Workspace config
|
|
275
|
+
// Workspace config — refresh from parsed yaml (loadConfig may have run after plugin init)
|
|
276
|
+
const latestWsConfig = getWorkspaceConfig()
|
|
277
|
+
workspaceCache.updateConfig(latestWsConfig)
|
|
283
278
|
const wsConfig = workspaceCache.getConfig()
|
|
284
279
|
|
|
285
280
|
// ══════════════════════════════════════════════════════════════════════
|
|
286
|
-
//
|
|
281
|
+
// Classify each query: chunkId / path / semantic
|
|
287
282
|
// ══════════════════════════════════════════════════════════════════════
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
chunkIndex: chunk.chunk_index ?? 0,
|
|
302
|
-
role: "manual",
|
|
303
|
-
attachedAt: Date.now(),
|
|
304
|
-
attachedBy: `direct:${chunkId}`,
|
|
305
|
-
metadata: {
|
|
306
|
-
language: chunk.language,
|
|
307
|
-
function_name: chunk.function_name,
|
|
308
|
-
class_name: chunk.class_name,
|
|
309
|
-
heading_context: chunk.heading_context,
|
|
310
|
-
startLine: chunk.start_line,
|
|
311
|
-
endLine: chunk.end_line,
|
|
312
|
-
},
|
|
313
|
-
})
|
|
314
|
-
|
|
315
|
-
workspaceCache.save().catch(() => {})
|
|
316
|
-
|
|
317
|
-
const entry = workspaceCache.get(chunkId!)!
|
|
318
|
-
let result = `✓ Attached chunk to workspace\n\nChunk: ${chunkId}\nFile: ${chunk.file}\nTokens: ${entry.tokens.toLocaleString()}\nLanguage: ${chunk.language}\nLines: ${chunk.start_line}-${chunk.end_line}`
|
|
319
|
-
result += buildWorkspaceOutput()
|
|
320
|
-
return result
|
|
321
|
-
} finally {
|
|
322
|
-
releaseIndexer(projectRoot, indexName)
|
|
283
|
+
const directAttachItems: Array<{ q: string; type: "chunkId" | "path" }> = []
|
|
284
|
+
const semanticQueries: string[] = []
|
|
285
|
+
|
|
286
|
+
for (const q of queryList) {
|
|
287
|
+
if (q.includes(":chunk-") || q.startsWith("chunk:")) {
|
|
288
|
+
directAttachItems.push({ q, type: "chunkId" })
|
|
289
|
+
} else if (
|
|
290
|
+
q.match(/\.(md|ts|js|go|py|tsx|jsx|rs|java|kt|swift|txt|yaml|json|yml|toml)$/i) ||
|
|
291
|
+
q.match(/^(src|docs|internal|pkg|lib|app|pages|components|api)\//i)
|
|
292
|
+
) {
|
|
293
|
+
directAttachItems.push({ q, type: "path" })
|
|
294
|
+
} else {
|
|
295
|
+
semanticQueries.push(q)
|
|
323
296
|
}
|
|
324
297
|
}
|
|
325
298
|
|
|
326
299
|
// ══════════════════════════════════════════════════════════════════════
|
|
327
|
-
//
|
|
300
|
+
// Direct attach: chunkIds and file paths
|
|
328
301
|
// ══════════════════════════════════════════════════════════════════════
|
|
329
|
-
|
|
302
|
+
const directResults: string[] = []
|
|
303
|
+
const directErrors: string[] = []
|
|
304
|
+
|
|
305
|
+
if (directAttachItems.length > 0) {
|
|
330
306
|
const indexer = await getIndexer(projectRoot, indexName)
|
|
331
307
|
try {
|
|
332
|
-
const
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
},
|
|
358
|
-
}
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
308
|
+
for (const { q, type } of directAttachItems) {
|
|
309
|
+
if (type === "chunkId") {
|
|
310
|
+
const chunk = await indexer.findChunkById(q)
|
|
311
|
+
if (!chunk) {
|
|
312
|
+
directErrors.push(`Chunk "${q}" not found`)
|
|
313
|
+
continue
|
|
314
|
+
}
|
|
315
|
+
workspaceCache.attach({
|
|
316
|
+
chunkId: q,
|
|
317
|
+
path: chunk.file,
|
|
318
|
+
content: chunk.content,
|
|
319
|
+
chunkIndex: chunk.chunk_index ?? 0,
|
|
320
|
+
role: "manual",
|
|
321
|
+
attachedAt: Date.now(),
|
|
322
|
+
attachedBy: `direct:${q}`,
|
|
323
|
+
metadata: {
|
|
324
|
+
language: chunk.language,
|
|
325
|
+
function_name: chunk.function_name,
|
|
326
|
+
class_name: chunk.class_name,
|
|
327
|
+
heading_context: chunk.heading_context,
|
|
328
|
+
startLine: chunk.start_line,
|
|
329
|
+
endLine: chunk.end_line,
|
|
330
|
+
},
|
|
331
|
+
})
|
|
332
|
+
const entry = workspaceCache.get(q)!
|
|
333
|
+
directResults.push(`✓ ${q} (${chunk.file}, ${entry.tokens} tokens)`)
|
|
334
|
+
} else {
|
|
335
|
+
const chunks = await indexer.findChunksByPath(q)
|
|
336
|
+
if (chunks.length === 0) {
|
|
337
|
+
directErrors.push(`"${q}" — no chunks found`)
|
|
338
|
+
continue
|
|
339
|
+
}
|
|
340
|
+
let fileTokens = 0
|
|
341
|
+
for (const chunk of chunks) {
|
|
342
|
+
const cid = chunk.chunk_id || `${q}:chunk-${chunk.chunk_index ?? 0}`
|
|
343
|
+
workspaceCache.attach({
|
|
344
|
+
chunkId: cid,
|
|
345
|
+
path: q,
|
|
346
|
+
content: chunk.content,
|
|
347
|
+
chunkIndex: chunk.chunk_index ?? 0,
|
|
348
|
+
role: "manual",
|
|
349
|
+
attachedAt: Date.now(),
|
|
350
|
+
attachedBy: `file:${q}`,
|
|
351
|
+
metadata: {
|
|
352
|
+
language: chunk.language,
|
|
353
|
+
function_name: chunk.function_name,
|
|
354
|
+
class_name: chunk.class_name,
|
|
355
|
+
heading_context: chunk.heading_context,
|
|
356
|
+
startLine: chunk.start_line,
|
|
357
|
+
endLine: chunk.end_line,
|
|
358
|
+
},
|
|
359
|
+
})
|
|
360
|
+
const entry = workspaceCache.get(cid)
|
|
361
|
+
fileTokens += entry?.tokens ?? 0
|
|
362
|
+
}
|
|
363
|
+
directResults.push(`✓ ${q} (${chunks.length} chunks, ${fileTokens} tokens)`)
|
|
364
|
+
}
|
|
362
365
|
}
|
|
363
|
-
|
|
364
|
-
workspaceCache.save().catch(() => {})
|
|
365
|
-
|
|
366
|
-
let result = `✓ Attached file to workspace\n\nFile: ${filePath}\nChunks: ${chunks.length}\nTokens: ${totalTokens.toLocaleString()}\nLanguage: ${chunks[0].language}`
|
|
367
|
-
result += buildWorkspaceOutput()
|
|
368
|
-
return result
|
|
369
366
|
} finally {
|
|
370
367
|
releaseIndexer(projectRoot, indexName)
|
|
371
368
|
}
|
|
369
|
+
workspaceCache.save().catch(() => {})
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
// If all queries were direct attaches (no semantic), return early
|
|
373
|
+
if (semanticQueries.length === 0) {
|
|
374
|
+
let output = `## Attached\n\n`
|
|
375
|
+
for (const r of directResults) output += `- ${r}\n`
|
|
376
|
+
if (directErrors.length > 0) {
|
|
377
|
+
output += `\n**Errors:**\n`
|
|
378
|
+
for (const e of directErrors) output += `- ${e}\n`
|
|
379
|
+
}
|
|
380
|
+
output += buildWorkspaceOutput()
|
|
381
|
+
return output
|
|
372
382
|
}
|
|
373
383
|
|
|
384
|
+
// Use first semantic query as the primary (for output header, reranking, etc.)
|
|
385
|
+
const semanticQuery = semanticQueries[0]
|
|
386
|
+
|
|
374
387
|
// ══════════════════════════════════════════════════════════════════════
|
|
375
|
-
//
|
|
388
|
+
// Semantic search (single or multi-query merge)
|
|
376
389
|
// ══════════════════════════════════════════════════════════════════════
|
|
377
390
|
|
|
378
391
|
// Parse filter into path/language constraints
|
|
@@ -386,8 +399,11 @@ Examples:
|
|
|
386
399
|
if (cfg.freshen) {
|
|
387
400
|
try {
|
|
388
401
|
const indexer = await getIndexer(projectRoot, indexName)
|
|
389
|
-
|
|
390
|
-
|
|
402
|
+
try {
|
|
403
|
+
await indexer.freshen()
|
|
404
|
+
} finally {
|
|
405
|
+
releaseIndexer(projectRoot, indexName)
|
|
406
|
+
}
|
|
391
407
|
} catch {
|
|
392
408
|
// non-fatal — search can proceed without freshen
|
|
393
409
|
}
|
|
@@ -395,70 +411,82 @@ Examples:
|
|
|
395
411
|
|
|
396
412
|
let allResults: any[] = []
|
|
397
413
|
|
|
414
|
+
// Determine which indexes to search
|
|
415
|
+
let targetIndexes: string[]
|
|
416
|
+
|
|
398
417
|
if (args.searchAll) {
|
|
399
418
|
const tempIndexer = await getIndexer(projectRoot, "code")
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
return `No indexes found. The codebase needs to be indexed first.\n\nRun the CLI: bunx usethis_search reindex`
|
|
405
|
-
}
|
|
406
|
-
|
|
407
|
-
for (const idx of indexes) {
|
|
408
|
-
const indexer = await getIndexer(projectRoot, idx)
|
|
409
|
-
try {
|
|
410
|
-
const results = await indexer.search(semanticQuery!, limit, includeArchived, searchOptions)
|
|
411
|
-
allResults.push(...results.map((r: any) => ({ ...r, _index: idx })))
|
|
412
|
-
} finally {
|
|
413
|
-
releaseIndexer(projectRoot, idx)
|
|
414
|
-
}
|
|
419
|
+
try {
|
|
420
|
+
targetIndexes = await tempIndexer.listIndexes()
|
|
421
|
+
} finally {
|
|
422
|
+
releaseIndexer(projectRoot, "code")
|
|
415
423
|
}
|
|
416
424
|
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
const deduplicated: any[] = []
|
|
420
|
-
|
|
421
|
-
for (const result of allResults) {
|
|
422
|
-
const chunkId = result.chunkId || `${result.file}:chunk-${result.index ?? 0}`
|
|
423
|
-
if (!seen.has(chunkId)) {
|
|
424
|
-
seen.add(chunkId)
|
|
425
|
-
deduplicated.push(result)
|
|
426
|
-
}
|
|
425
|
+
if (targetIndexes.length === 0) {
|
|
426
|
+
return `No indexes found. The codebase needs to be indexed first.\n\nRun the CLI: bunx usethis_search reindex`
|
|
427
427
|
}
|
|
428
|
-
|
|
429
|
-
allResults = deduplicated
|
|
430
|
-
allResults.sort((a, b) => {
|
|
431
|
-
const scoreA = a._combinedScore ?? (a._distance != null ? Math.max(0, 1 - a._distance / 2) : 0)
|
|
432
|
-
const scoreB = b._combinedScore ?? (b._distance != null ? Math.max(0, 1 - b._distance / 2) : 0)
|
|
433
|
-
return scoreB - scoreA
|
|
434
|
-
})
|
|
435
|
-
allResults = allResults.slice(0, limit)
|
|
436
428
|
} else {
|
|
429
|
+
// Verify index exists
|
|
437
430
|
const hashesFile = path.join(projectRoot, ".opencode", "vectors", indexName, "hashes.json")
|
|
438
431
|
try {
|
|
439
432
|
await fs.access(hashesFile)
|
|
440
433
|
} catch {
|
|
441
|
-
// Index doesn't exist — check what indexes ARE available
|
|
442
434
|
const tempIndexer = await getIndexer(projectRoot, "code")
|
|
443
|
-
|
|
444
|
-
|
|
435
|
+
let available: string[]
|
|
436
|
+
try {
|
|
437
|
+
available = await tempIndexer.listIndexes()
|
|
438
|
+
} finally {
|
|
439
|
+
releaseIndexer(projectRoot, "code")
|
|
440
|
+
}
|
|
445
441
|
|
|
446
442
|
if (available.length > 0) {
|
|
447
|
-
const
|
|
448
|
-
return `Index "${indexName}" not found. Available indexes: ${
|
|
443
|
+
const availList = available.map(i => `"${i}"`).join(", ")
|
|
444
|
+
return `Index "${indexName}" not found. Available indexes: ${availList}.\n\nTry: search({ query: "${semanticQuery}", index: "${available[0]}" })\nOr search all: search({ query: "${semanticQuery}", searchAll: true })`
|
|
449
445
|
}
|
|
450
446
|
return `No indexes found. The codebase needs to be indexed first.\n\nRun the CLI: bunx usethis_search reindex`
|
|
451
447
|
}
|
|
448
|
+
targetIndexes = [indexName]
|
|
449
|
+
}
|
|
452
450
|
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
451
|
+
// Run each semantic query against each target index, collect results
|
|
452
|
+
for (const sq of semanticQueries) {
|
|
453
|
+
for (const idx of targetIndexes) {
|
|
454
|
+
const indexer = await getIndexer(projectRoot, idx)
|
|
455
|
+
try {
|
|
456
|
+
const results = await indexer.search(sq, limit, includeArchived, searchOptions)
|
|
457
|
+
allResults.push(...results.map((r: any) => ({ ...r, _index: idx, _query: sq })))
|
|
458
|
+
} finally {
|
|
459
|
+
releaseIndexer(projectRoot, idx)
|
|
460
|
+
}
|
|
461
|
+
}
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
// Deduplicate by chunk_id — keep highest score
|
|
465
|
+
{
|
|
466
|
+
const bestByChunk = new Map<string, any>()
|
|
467
|
+
for (const result of allResults) {
|
|
468
|
+
const chunkId = result.chunk_id || `${result.file}:chunk-${result.chunk_index ?? 0}`
|
|
469
|
+
const score = result._combinedScore ?? (result._distance != null ? Math.max(0, 1 - result._distance / 2) : 0)
|
|
470
|
+
const existing = bestByChunk.get(chunkId)
|
|
471
|
+
if (!existing) {
|
|
472
|
+
bestByChunk.set(chunkId, result)
|
|
473
|
+
} else {
|
|
474
|
+
const existingScore = existing._combinedScore ?? (existing._distance != null ? Math.max(0, 1 - existing._distance / 2) : 0)
|
|
475
|
+
if (score > existingScore) {
|
|
476
|
+
bestByChunk.set(chunkId, result)
|
|
477
|
+
}
|
|
478
|
+
}
|
|
459
479
|
}
|
|
480
|
+
allResults = [...bestByChunk.values()]
|
|
460
481
|
}
|
|
461
482
|
|
|
483
|
+
allResults.sort((a, b) => {
|
|
484
|
+
const scoreA = a._combinedScore ?? (a._distance != null ? Math.max(0, 1 - a._distance / 2) : 0)
|
|
485
|
+
const scoreB = b._combinedScore ?? (b._distance != null ? Math.max(0, 1 - b._distance / 2) : 0)
|
|
486
|
+
return scoreB - scoreA
|
|
487
|
+
})
|
|
488
|
+
allResults = allResults.slice(0, limit)
|
|
489
|
+
|
|
462
490
|
// ── Score cutoff — drop low-relevance results ──────────────────────────
|
|
463
491
|
allResults = allResults.filter(r => {
|
|
464
492
|
const score = r._combinedScore ?? (r._distance != null ? Math.max(0, 1 - r._distance / 2) : 0)
|
|
@@ -516,8 +544,8 @@ Examples:
|
|
|
516
544
|
filterApplied = true
|
|
517
545
|
}
|
|
518
546
|
|
|
519
|
-
// ── Reranking —
|
|
520
|
-
const queryKeywords =
|
|
547
|
+
// ── Reranking — keyword match + metadata signals ──────────────────────
|
|
548
|
+
const queryKeywords = [...new Set(semanticQueries.flatMap(q => q.toLowerCase().split(/\s+/).filter((w: string) => w.length > 2)))]
|
|
521
549
|
for (const r of allResults) {
|
|
522
550
|
const isBM25Only = !!r._bm25Only
|
|
523
551
|
const vectorScore = r._distance != null ? Math.max(0, 1 - r._distance / 2) : 0
|
|
@@ -527,6 +555,7 @@ Examples:
|
|
|
527
555
|
: (r._combinedScore != null ? Math.max(0, r._combinedScore - vectorScore) : 0)
|
|
528
556
|
const baseScore = r._combinedScore ?? vectorScore
|
|
529
557
|
|
|
558
|
+
// ── Keyword bonus (content match) ──
|
|
530
559
|
const text = (r.content || "").toLowerCase()
|
|
531
560
|
const matchedKeywords: string[] = []
|
|
532
561
|
if (queryKeywords.length > 0) {
|
|
@@ -537,17 +566,61 @@ Examples:
|
|
|
537
566
|
r._matchedKeywords = matchedKeywords
|
|
538
567
|
const keywordBonus = queryKeywords.length > 0 ? (matchedKeywords.length / queryKeywords.length) * 0.15 : 0
|
|
539
568
|
r._keywordBonus = keywordBonus
|
|
540
|
-
|
|
569
|
+
|
|
570
|
+
// ── Metadata reranking ──
|
|
571
|
+
let metadataBonus = 0
|
|
572
|
+
const filePath = (r.file || "") as string
|
|
573
|
+
|
|
574
|
+
// File type: boost source, penalize tests
|
|
575
|
+
const isTest = /[/.](?:test|spec|__test__|_test)\b/i.test(filePath)
|
|
576
|
+
const isHelper = /[/.](?:mock|fixture|helper|stub|fake)/i.test(filePath)
|
|
577
|
+
if (isTest) {
|
|
578
|
+
metadataBonus -= 0.05
|
|
579
|
+
} else if (isHelper) {
|
|
580
|
+
metadataBonus -= 0.03
|
|
581
|
+
} else {
|
|
582
|
+
// Source file — small boost
|
|
583
|
+
metadataBonus += 0.03
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
// Function name match — strong signal
|
|
587
|
+
if (r.function_name && queryKeywords.length > 0) {
|
|
588
|
+
const fnLower = (r.function_name as string).toLowerCase()
|
|
589
|
+
for (const kw of queryKeywords) {
|
|
590
|
+
if (fnLower.includes(kw)) {
|
|
591
|
+
metadataBonus += 0.12
|
|
592
|
+
break
|
|
593
|
+
}
|
|
594
|
+
}
|
|
595
|
+
}
|
|
596
|
+
|
|
597
|
+
// Class name match
|
|
598
|
+
if (r.class_name && queryKeywords.length > 0) {
|
|
599
|
+
const clsLower = (r.class_name as string).toLowerCase()
|
|
600
|
+
for (const kw of queryKeywords) {
|
|
601
|
+
if (clsLower.includes(kw)) {
|
|
602
|
+
metadataBonus += 0.08
|
|
603
|
+
break
|
|
604
|
+
}
|
|
605
|
+
}
|
|
606
|
+
}
|
|
607
|
+
|
|
608
|
+
r._metadataBonus = metadataBonus
|
|
609
|
+
r._finalScore = baseScore + keywordBonus + metadataBonus
|
|
541
610
|
}
|
|
542
611
|
allResults.sort((a: any, b: any) => (b._finalScore ?? 0) - (a._finalScore ?? 0))
|
|
543
612
|
|
|
544
613
|
// ── Sort by final score (chunks, not files) ──────────────────────────
|
|
545
614
|
const topChunks = allResults.slice(0, limit)
|
|
546
615
|
|
|
616
|
+
const queryLabel = semanticQueries.length === 1
|
|
617
|
+
? `"${semanticQuery}"`
|
|
618
|
+
: semanticQueries.map(q => `"${q}"`).join(", ")
|
|
619
|
+
|
|
547
620
|
if (topChunks.length === 0) {
|
|
548
621
|
const scope = args.searchAll ? "any index" : `index "${indexName}"`
|
|
549
622
|
const filterNote = args.filter ? ` with filter "${args.filter}"` : ""
|
|
550
|
-
let noResultsOutput = `No results found in ${scope}${filterNote} for:
|
|
623
|
+
let noResultsOutput = `No results found in ${scope}${filterNote} for: ${queryLabel} (min score: ${minScore})\n\nTry:\n- Different keywords or phrasing\n- Remove or broaden the filter\n- search({ query: "...", searchAll: true })`
|
|
551
624
|
noResultsOutput += buildWorkspaceOutput()
|
|
552
625
|
return noResultsOutput
|
|
553
626
|
}
|
|
@@ -566,30 +639,31 @@ Examples:
|
|
|
566
639
|
|
|
567
640
|
// Get indexer for context expansion (reuse same indexer)
|
|
568
641
|
const indexerForExpansion = await getIndexer(projectRoot, indexName)
|
|
642
|
+
try {
|
|
569
643
|
|
|
570
644
|
for (const chunk of mainChunks) {
|
|
571
645
|
// Skip if score too low
|
|
572
646
|
if ((chunk._finalScore ?? 0) < wsConfig.minScoreMain) continue
|
|
573
647
|
|
|
574
648
|
// Attach main chunk
|
|
575
|
-
const chunkId = chunk.
|
|
649
|
+
const chunkId = chunk.chunk_id || `${chunk.file}:chunk-${chunk.chunk_index ?? 0}`
|
|
576
650
|
|
|
577
651
|
workspaceCache.attach({
|
|
578
652
|
chunkId,
|
|
579
653
|
path: chunk.file,
|
|
580
654
|
content: chunk.content,
|
|
581
|
-
chunkIndex: chunk.
|
|
655
|
+
chunkIndex: chunk.chunk_index ?? 0,
|
|
582
656
|
role: "search-main",
|
|
583
657
|
attachedAt: Date.now(),
|
|
584
|
-
attachedBy: semanticQuery!,
|
|
585
|
-
|
|
658
|
+
attachedBy: chunk._query || semanticQuery!,
|
|
659
|
+
score: chunk._finalScore,
|
|
586
660
|
metadata: {
|
|
587
661
|
language: chunk.language,
|
|
588
662
|
function_name: chunk.function_name,
|
|
589
663
|
class_name: chunk.class_name,
|
|
590
664
|
heading_context: chunk.heading_context,
|
|
591
|
-
startLine: chunk.
|
|
592
|
-
endLine: chunk.
|
|
665
|
+
startLine: chunk.start_line,
|
|
666
|
+
endLine: chunk.end_line,
|
|
593
667
|
},
|
|
594
668
|
})
|
|
595
669
|
|
|
@@ -613,7 +687,7 @@ Examples:
|
|
|
613
687
|
chunkIndex: expChunk.chunk_index ?? 0,
|
|
614
688
|
role: "search-context",
|
|
615
689
|
attachedAt: Date.now(),
|
|
616
|
-
attachedBy: `${semanticQuery} (${reason})`,
|
|
690
|
+
attachedBy: `${chunk._query || semanticQuery} (${reason})`,
|
|
617
691
|
score: chunk._finalScore * 0.9, // Slightly lower score than main
|
|
618
692
|
metadata: {
|
|
619
693
|
language: expChunk.language,
|
|
@@ -642,24 +716,24 @@ Examples:
|
|
|
642
716
|
.slice(0, wsConfig.attachRelatedPerChunk)
|
|
643
717
|
|
|
644
718
|
for (const rel of topRelated) {
|
|
645
|
-
const relChunkId = rel.
|
|
719
|
+
const relChunkId = rel.chunk_id || `${rel.file}:chunk-${rel.chunk_index ?? 0}`
|
|
646
720
|
if (alreadyAttached.has(relChunkId)) continue
|
|
647
721
|
|
|
648
722
|
workspaceCache.attach({
|
|
649
723
|
chunkId: relChunkId,
|
|
650
724
|
path: rel.file,
|
|
651
725
|
content: rel.content,
|
|
652
|
-
chunkIndex: rel.
|
|
726
|
+
chunkIndex: rel.chunk_index ?? 0,
|
|
653
727
|
role: "search-graph",
|
|
654
728
|
attachedAt: Date.now(),
|
|
655
|
-
attachedBy: `${semanticQuery} (${rel.relation} from ${chunkId})`,
|
|
729
|
+
attachedBy: `${chunk._query || semanticQuery} (${rel.relation} from ${chunkId})`,
|
|
656
730
|
score: rel.score,
|
|
657
731
|
metadata: {
|
|
658
732
|
language: rel.language,
|
|
659
733
|
relation: rel.relation,
|
|
660
734
|
mainChunkId: chunkId,
|
|
661
|
-
startLine: rel.
|
|
662
|
-
endLine: rel.
|
|
735
|
+
startLine: rel.start_line,
|
|
736
|
+
endLine: rel.end_line,
|
|
663
737
|
},
|
|
664
738
|
})
|
|
665
739
|
|
|
@@ -669,8 +743,10 @@ Examples:
|
|
|
669
743
|
}
|
|
670
744
|
}
|
|
671
745
|
|
|
672
|
-
|
|
673
|
-
|
|
746
|
+
} finally {
|
|
747
|
+
// Release indexer used for expansion
|
|
748
|
+
releaseIndexer(projectRoot, indexName)
|
|
749
|
+
}
|
|
674
750
|
|
|
675
751
|
// ── Flush workspace to disk immediately (don't rely on debounce) ─────
|
|
676
752
|
if (attachedMain.length > 0 || attachedGraph.length > 0 || attachedContext.length > 0) {
|
|
@@ -686,12 +762,14 @@ Examples:
|
|
|
686
762
|
const hasRRF = allResults.some((r: any) => r._rrfScore != null)
|
|
687
763
|
const scope = args.searchAll ? "all indexes" : `index "${indexName}"`
|
|
688
764
|
const filterLabel = args.filter ? ` filter:"${args.filter}"` : ""
|
|
689
|
-
let output = `## Search:
|
|
765
|
+
let output = `## Search: ${queryLabel} (${scope}${filterLabel})\n\n`
|
|
690
766
|
|
|
691
|
-
// Show decomposition info if query was decomposed
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
767
|
+
// Show decomposition info if single query was decomposed
|
|
768
|
+
if (semanticQueries.length === 1) {
|
|
769
|
+
const decomposition = decomposeQuery(semanticQuery!, getDecomposerConfig())
|
|
770
|
+
if (decomposition.decomposed) {
|
|
771
|
+
output += `> **Query decomposed** (${decomposition.strategy}): ${decomposition.subQueries.map(q => `"${q}"`).join(", ")}\n\n`
|
|
772
|
+
}
|
|
695
773
|
}
|
|
696
774
|
|
|
697
775
|
if (hasBM25Only) {
|
|
@@ -744,7 +822,7 @@ Examples:
|
|
|
744
822
|
output += `### Additional results (summary only)\n\n`
|
|
745
823
|
for (let i = 0; i < restChunks.length; i++) {
|
|
746
824
|
const chunk = restChunks[i]
|
|
747
|
-
|
|
825
|
+
const chunkId = chunk.chunk_id || `${chunk.file}:chunk-${chunk.chunk_index ?? 0}`
|
|
748
826
|
const score = (chunk._finalScore ?? 0).toFixed(3)
|
|
749
827
|
const indexLabel = args.searchAll ? ` [${chunk._index}]` : ""
|
|
750
828
|
|