@comfanion/usethis_search 3.0.0-dev.21 → 3.0.0-dev.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@comfanion/usethis_search",
3
- "version": "3.0.0-dev.21",
3
+ "version": "3.0.0-dev.23",
4
4
  "description": "OpenCode plugin: semantic search with graph-based context (v3: graph relations, 1-hop context, LSP + regex analyzers)",
5
5
  "type": "module",
6
6
  "main": "./index.ts",
package/tools/search.ts CHANGED
@@ -25,7 +25,8 @@ Examples:
25
25
  - "authentication logic" → finds auth-related code
26
26
  - "database connection handling" → finds DB setup code
27
27
  - "how to deploy" with index: "docs" → finds deployment docs
28
- - "API keys" with index: "config" → finds config with API settings`,
28
+ - "API keys" with index: "config" → finds config with API settings
29
+ - search({ query: "tenant", path: "internal/domain/" }) → searches only in internal/domain/`,
29
30
 
30
31
  args: {
31
32
  query: tool.schema.string().describe("Semantic search query describing what you're looking for"),
@@ -42,6 +43,7 @@ Examples:
42
43
  modifiedBefore: tool.schema.string().optional().describe("Filter: only files modified before this ISO date"),
43
44
  tags: tool.schema.string().optional().describe("Filter by frontmatter tags (comma-separated, e.g. 'auth,security')"),
44
45
  minScore: tool.schema.number().optional().default(0.35).describe("Minimum relevance score (0-1). Results below this threshold are dropped. Default: 0.35"),
46
+ path: tool.schema.string().optional().describe("Filter by file path prefix (e.g. 'internal/domain/', 'src/components'). Only returns files under this path."),
45
47
  },
46
48
 
47
49
  async execute(args) {
@@ -119,9 +121,9 @@ Examples:
119
121
  }
120
122
 
121
123
  allResults.sort((a, b) => {
122
- // Prefer combinedScore (hybrid), fall back to distance
123
- const scoreA = a._combinedScore ?? (a._distance != null ? 1 - a._distance : 0)
124
- const scoreB = b._combinedScore ?? (b._distance != null ? 1 - b._distance : 0)
124
+ // Prefer combinedScore (hybrid), fall back to L2→similarity conversion
125
+ const scoreA = a._combinedScore ?? (a._distance != null ? Math.max(0, 1 - a._distance / 2) : 0)
126
+ const scoreB = b._combinedScore ?? (b._distance != null ? Math.max(0, 1 - b._distance / 2) : 0)
125
127
  return scoreB - scoreA
126
128
  })
127
129
  allResults = allResults.slice(0, limit)
@@ -130,7 +132,16 @@ Examples:
130
132
  try {
131
133
  await fs.access(hashesFile)
132
134
  } catch {
133
- return `Index "${indexName}" not found. Create it with: codeindex({ action: "reindex", index: "${indexName}" })`
135
+ // Index doesn't exist check what indexes ARE available
136
+ const tempIndexer = await new CodebaseIndexer(projectRoot, "code").init()
137
+ const available = await tempIndexer.listIndexes()
138
+ await tempIndexer.unloadModel()
139
+
140
+ if (available.length > 0) {
141
+ const list = available.map(i => `"${i}"`).join(", ")
142
+ return `Index "${indexName}" not found. Available indexes: ${list}.\n\nTry: search({ query: "${args.query}", index: "${available[0]}" })\nOr search all: search({ query: "${args.query}", searchAll: true })`
143
+ }
144
+ return `No indexes found. The codebase needs to be indexed first.\n\nRun the CLI: bunx usethis_search reindex`
134
145
  }
135
146
 
136
147
  const indexer = await new CodebaseIndexer(projectRoot, indexName).init()
@@ -146,23 +157,83 @@ Examples:
146
157
  return score >= minScore
147
158
  })
148
159
 
149
- if (allResults.length === 0) {
160
+ // ── Path filter — restrict to subtree ──────────────────────────────────
161
+ if (args.path) {
162
+ const prefix = args.path.replace(/\/+$/, "") // normalize trailing slash
163
+ allResults = allResults.filter(r => r.file && r.file.startsWith(prefix))
164
+ }
165
+
166
+ // ── Reranking — boost results where query keywords appear in text ──────
167
+ // Also store score components for breakdown display
168
+ const queryKeywords = args.query.toLowerCase().split(/\s+/).filter((w: string) => w.length > 2)
169
+ for (const r of allResults) {
170
+ // Vector score (L2 → similarity)
171
+ const vectorScore = r._distance != null ? Math.max(0, 1 - r._distance / 2) : 0
172
+ r._vectorScore = vectorScore
173
+
174
+ // BM25 component (present only in hybrid mode — embedded in _combinedScore)
175
+ // If _combinedScore exists and differs from vectorScore, the difference is BM25 contribution
176
+ r._bm25Component = r._combinedScore != null ? Math.max(0, r._combinedScore - vectorScore) : 0
177
+
178
+ // Base score before keyword boost
179
+ const baseScore = r._combinedScore ?? vectorScore
180
+
181
+ // Keyword matching
182
+ const text = (r.content || "").toLowerCase()
183
+ const matchedKeywords: string[] = []
184
+ if (queryKeywords.length > 0) {
185
+ for (const kw of queryKeywords) {
186
+ if (text.includes(kw)) matchedKeywords.push(kw)
187
+ }
188
+ }
189
+ r._matchedKeywords = matchedKeywords
190
+ const keywordBonus = queryKeywords.length > 0 ? (matchedKeywords.length / queryKeywords.length) * 0.15 : 0
191
+ r._keywordBonus = keywordBonus
192
+ r._finalScore = baseScore + keywordBonus
193
+ }
194
+ allResults.sort((a: any, b: any) => (b._finalScore ?? 0) - (a._finalScore ?? 0))
195
+
196
+ // ── Group by file — best chunk per file, with chunk count ─────────────
197
+ const fileGroups = new Map<string, { best: any, chunks: any[] }>()
198
+ for (const r of allResults) {
199
+ const key = r.file
200
+ if (!fileGroups.has(key)) {
201
+ fileGroups.set(key, { best: r, chunks: [r] })
202
+ } else {
203
+ const group = fileGroups.get(key)!
204
+ group.chunks.push(r)
205
+ if ((r._finalScore ?? 0) > (group.best._finalScore ?? 0)) {
206
+ group.best = r
207
+ }
208
+ }
209
+ }
210
+
211
+ // Sort groups by best chunk score, take top N unique files
212
+ const sortedGroups = [...fileGroups.values()]
213
+ .sort((a, b) => (b.best._finalScore ?? 0) - (a.best._finalScore ?? 0))
214
+ .slice(0, limit)
215
+
216
+ if (sortedGroups.length === 0) {
150
217
  const scope = args.searchAll ? "any index" : `index "${indexName}"`
151
218
  return `No results found in ${scope} for: "${args.query}" (min score: ${minScore})\n\nTry:\n- Different keywords\n- Lower minScore threshold: search({ query: "...", minScore: 0.2 })\n- Enable hybrid search: search({ query: "...", hybrid: true })`
152
219
  }
153
220
 
221
+ // ── Confidence signal ──────────────────────────────────────────────────
222
+ const topScore = sortedGroups[0].best._finalScore ?? 0
154
223
  const scope = args.searchAll ? "all indexes" : `index "${indexName}"`
155
224
  const hybridLabel = args.hybrid ? " [hybrid]" : ""
156
- let output = `## Search Results for: "${args.query}" (${scope}${hybridLabel})\n\n`
157
-
158
- for (let i = 0; i < allResults.length; i++) {
159
- const r = allResults[i]
160
- const score = r._combinedScore != null
161
- ? r._combinedScore.toFixed(3)
162
- : r._distance != null
163
- ? (1 - r._distance).toFixed(3)
164
- : "N/A"
225
+ const pathLabel = args.path ? ` path:"${args.path}"` : ""
226
+ let output = `## Search Results for: "${args.query}" (${scope}${hybridLabel}${pathLabel})\n\n`
227
+
228
+ if (topScore < 0.45) {
229
+ output += `> **Low confidence results.** Best score: ${topScore.toFixed(3)}. These results may not be relevant to your query.\n> Try more specific keywords, different phrasing, or hybrid: true.\n\n`
230
+ }
231
+
232
+ for (let i = 0; i < sortedGroups.length; i++) {
233
+ const { best: r, chunks } = sortedGroups[i]
234
+ const score = (r._finalScore ?? 0).toFixed(3)
165
235
  const indexLabel = args.searchAll ? ` [${r._index}]` : ""
236
+ const chunkNote = chunks.length > 1 ? ` (${chunks.length} matching sections)` : ""
166
237
 
167
238
  // v2: show rich metadata when available
168
239
  const metaParts: string[] = []
@@ -172,13 +243,36 @@ Examples:
172
243
  if (r.class_name) metaParts.push(`class: ${r.class_name}`)
173
244
  const metaLine = metaParts.length > 0 ? ` (${metaParts.join(", ")})` : ""
174
245
 
175
- output += `### ${i + 1}. ${r.file}${indexLabel}\n`
176
- output += `**Relevance:** ${score}${metaLine}\n\n`
246
+ // Score breakdown: vector + bm25 + keywords
247
+ const breakdownParts: string[] = [`vec: ${(r._vectorScore ?? 0).toFixed(2)}`]
248
+ if (r._bm25Component > 0.005) breakdownParts.push(`bm25: +${r._bm25Component.toFixed(2)}`)
249
+ if (r._keywordBonus > 0.005) breakdownParts.push(`kw: +${r._keywordBonus.toFixed(2)}`)
250
+ const breakdown = breakdownParts.join(", ")
251
+
252
+ // Matched keywords
253
+ const kwDisplay = r._matchedKeywords && r._matchedKeywords.length > 0
254
+ ? ` | matched: "${r._matchedKeywords.join('", "')}"`
255
+ : ""
256
+
257
+ output += `### ${i + 1}. ${r.file}${indexLabel}${chunkNote}\n`
258
+ output += `**Score:** ${score} (${breakdown}${kwDisplay})${metaLine}\n\n`
177
259
  output += "```\n"
178
260
  const content = r.content.length > 500 ? r.content.substring(0, 500) + "\n... (truncated)" : r.content
179
261
  output += content
180
262
  output += "\n```\n"
181
263
 
264
+ // Show second-best chunk from same file if available (brief)
265
+ if (chunks.length > 1) {
266
+ const second = chunks.find((c: any) => c !== r)
267
+ if (second) {
268
+ const secMeta: string[] = []
269
+ if (second.function_name) secMeta.push(`fn: ${second.function_name}`)
270
+ if (second.heading_context) secMeta.push(`"${second.heading_context}"`)
271
+ const secLabel = secMeta.length > 0 ? ` ${secMeta.join(", ")}` : ""
272
+ output += `\n*Also:${secLabel}*\n`
273
+ }
274
+ }
275
+
182
276
  if (r.relatedContext && r.relatedContext.length > 0) {
183
277
  output += "\n**Related Context:**\n"
184
278
  for (const rel of r.relatedContext) {
@@ -193,7 +287,9 @@ Examples:
193
287
  output += "\n"
194
288
  }
195
289
 
196
- output += `---\n*Found ${allResults.length} results. Use Read tool to see full files.*`
290
+ const totalChunks = allResults.length
291
+ const uniqueFiles = sortedGroups.length
292
+ output += `---\n*${uniqueFiles} files (${totalChunks} chunks). Use Read tool to see full files.*`
197
293
  return output
198
294
  } catch (error: any) {
199
295
  return `Search failed: ${error.message || String(error)}`
@@ -239,7 +239,7 @@ async function loadConfig(projectRoot) {
239
239
  }
240
240
 
241
241
  // Parse vectorizer section from YAML
242
- const vectorizerMatch = content.match(/^vectorizer:([\s\S]*?)(?=^[a-zA-Z_\-]+:|\Z)/m);
242
+ const vectorizerMatch = content.match(/^vectorizer:([\s\S]*?)(?=^[a-zA-Z_\-]+:|(?![\s\S]))/m);
243
243
  if (!vectorizerMatch) {
244
244
  await ensureDefaultConfig(projectRoot);
245
245
  return;
@@ -255,7 +255,7 @@ async function loadConfig(projectRoot) {
255
255
  }
256
256
 
257
257
  // ── Parse cleaning config ───────────────────────────────────────────────
258
- const cleaningMatch = section.match(/^\s{2}cleaning:\s*\n([\s\S]*?)(?=^\s{2}[a-zA-Z_\-]+:|\Z)/m);
258
+ const cleaningMatch = section.match(/^\s{2}cleaning:\s*\n([\s\S]*?)(?=^\s{2}[a-zA-Z_\-]+:|(?![\s\S]))/m);
259
259
  if (cleaningMatch) {
260
260
  const cs = cleaningMatch[1];
261
261
  CLEANING_CONFIG = {
@@ -267,7 +267,7 @@ async function loadConfig(projectRoot) {
267
267
  }
268
268
 
269
269
  // ── Parse chunking config ───────────────────────────────────────────────
270
- const chunkingMatch = section.match(/^\s{2}chunking:\s*\n([\s\S]*?)(?=^\s{2}[a-zA-Z_\-]+:|\Z)/m);
270
+ const chunkingMatch = section.match(/^\s{2}chunking:\s*\n([\s\S]*?)(?=^\s{2}[a-zA-Z_\-]+:|(?![\s\S]))/m);
271
271
  if (chunkingMatch) {
272
272
  const cs = chunkingMatch[1];
273
273
  const strategy = parseString(cs, "strategy", "semantic");
@@ -292,7 +292,7 @@ async function loadConfig(projectRoot) {
292
292
  }
293
293
 
294
294
  // ── Parse search config ─────────────────────────────────────────────────
295
- const searchMatch = section.match(/^\s{2}search:\s*\n([\s\S]*?)(?=^\s{2}[a-zA-Z_\-]+:|\Z)/m);
295
+ const searchMatch = section.match(/^\s{2}search:\s*\n([\s\S]*?)(?=^\s{2}[a-zA-Z_\-]+:|(?![\s\S]))/m);
296
296
  if (searchMatch) {
297
297
  const ss = searchMatch[1];
298
298
  HYBRID_CONFIG = {
@@ -302,7 +302,7 @@ async function loadConfig(projectRoot) {
302
302
  }
303
303
 
304
304
  // ── Parse quality config ────────────────────────────────────────────────
305
- const qualityMatch = section.match(/^\s{2}quality:\s*\n([\s\S]*?)(?=^\s{2}[a-zA-Z_\-]+:|\Z)/m);
305
+ const qualityMatch = section.match(/^\s{2}quality:\s*\n([\s\S]*?)(?=^\s{2}[a-zA-Z_\-]+:|(?![\s\S]))/m);
306
306
  if (qualityMatch) {
307
307
  const qs = qualityMatch[1];
308
308
  METRICS_ENABLED = parseBool(qs, "enable_metrics", false);
@@ -310,7 +310,7 @@ async function loadConfig(projectRoot) {
310
310
  }
311
311
 
312
312
  // ── Parse graph config (v3) ──────────────────────────────────────────────
313
- const graphMatch = section.match(/^\s{2}graph:\s*\n([\s\S]*?)(?=^\s{2}[a-zA-Z_\-]+:|\Z)/m);
313
+ const graphMatch = section.match(/^\s{2}graph:\s*\n([\s\S]*?)(?=^\s{2}[a-zA-Z_\-]+:|(?![\s\S]))/m);
314
314
  if (graphMatch) {
315
315
  const gs = graphMatch[1];
316
316
  GRAPH_CONFIG.enabled = parseBool(gs, "enabled", DEFAULT_GRAPH_CONFIG.enabled);
@@ -321,7 +321,7 @@ async function loadConfig(projectRoot) {
321
321
  GRAPH_CONFIG.read_intercept = parseBool(gs, "read_intercept", DEFAULT_GRAPH_CONFIG.read_intercept);
322
322
 
323
323
  // Nested lsp: section
324
- const lspMatch = gs.match(/^\s+lsp:\s*\n([\s\S]*?)(?=^\s{4}[a-zA-Z_\-]+:|\Z)/m);
324
+ const lspMatch = gs.match(/^\s+lsp:\s*\n([\s\S]*?)(?=^\s{4}[a-zA-Z_\-]+:|(?![\s\S]))/m);
325
325
  if (lspMatch) {
326
326
  const ls = lspMatch[1];
327
327
  GRAPH_CONFIG.lsp.enabled = parseBool(ls, "enabled", DEFAULT_GRAPH_CONFIG.lsp.enabled);
@@ -342,7 +342,7 @@ async function loadConfig(projectRoot) {
342
342
  }
343
343
 
344
344
  // Parse indexes section
345
- const indexesMatch = section.match(/^\s{2}indexes:\s*\n([\s\S]*?)(?=^\s{2}[a-zA-Z_\-]+:|\s{2}exclude:|\Z)/m);
345
+ const indexesMatch = section.match(/^\s{2}indexes:\s*\n([\s\S]*?)(?=^\s{2}[a-zA-Z_\-]+:|\s{2}exclude:|(?![\s\S]))/m);
346
346
  if (!indexesMatch) return;
347
347
 
348
348
  const indexesSection = indexesMatch[1];
@@ -350,7 +350,7 @@ async function loadConfig(projectRoot) {
350
350
  // Parse each index (code, docs, config)
351
351
  for (const indexName of ["code", "docs", "config"]) {
352
352
  const indexRegex = new RegExp(
353
- `^\\s{4}${indexName}:\\s*\\n([\\s\\S]*?)(?=^\\s{4}[a-zA-Z_\\-]+:|\\Z)`,
353
+ `^\\s{4}${indexName}:\\s*\\n([\\s\\S]*?)(?=^\\s{4}[a-zA-Z_\\-]+:|(?![\\s\\S]))`,
354
354
  "m",
355
355
  );
356
356
  const indexMatch = indexesSection.match(indexRegex);