@comfanion/usethis_search 0.1.5 → 0.2.0-dev.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -11,9 +11,13 @@ Forget about `grep` and `find` — search code by meaning, not by text!
11
11
  An OpenCode plugin that adds **smart search** to your project:
12
12
 
13
13
  - 🧠 **Semantic search** — finds code by meaning, even when words don't match
14
+ - 🔀 **Hybrid search (v2)** — combines vector similarity + BM25 keyword matching
15
+ - 🧩 **Semantic chunking (v2)** — structure-aware splitting for Markdown (headings) and code (functions/classes)
16
+ - 🏷️ **Rich metadata (v2)** — filter by file type, language, date, tags
14
17
  - ⚡ **Automatic indexing** — files are indexed on change (zero effort)
15
18
  - 📦 **Local vectorization** — works offline, no API keys needed
16
19
  - 🎯 **Three indexes** — separate for code, docs, and configs
20
+ - 📊 **Quality metrics (v2)** — track search relevance and usage
17
21
  - 🌍 **Multilingual** — supports Ukrainian, Russian, and English
18
22
 
19
23
  ---
@@ -79,6 +83,31 @@ search({
79
83
  query: "database connection",
80
84
  searchAll: true
81
85
  })
86
+
87
+ // v2: Hybrid search (vector + keyword matching)
88
+ search({
89
+ query: "getUserById",
90
+ hybrid: true
91
+ })
92
+
93
+ // v2: Filter by file type and language
94
+ search({
95
+ query: "authentication logic",
96
+ fileType: "code",
97
+ language: "typescript"
98
+ })
99
+
100
+ // v2: Filter by date
101
+ search({
102
+ query: "recent changes",
103
+ modifiedAfter: "2024-06-01"
104
+ })
105
+
106
+ // v2: Filter by frontmatter tags
107
+ search({
108
+ query: "security",
109
+ tags: "auth,security"
110
+ })
82
111
  ```
83
112
 
84
113
  ### Index Management
@@ -99,6 +128,9 @@ codeindex({
99
128
  index: "docs",
100
129
  dir: "docs/"
101
130
  })
131
+
132
+ // v2: Run quality tests against gold dataset
133
+ codeindex({ action: "test", index: "code" })
102
134
  ```
103
135
 
104
136
  ---
@@ -108,9 +140,12 @@ codeindex({
108
140
  ### Semantic Search
109
141
 
110
142
  Instead of searching for exact text matches, the plugin:
111
- 1. Converts code into **vectors** (numerical representations of meaning)
112
- 2. Compares vectors of your query with vectors of code
113
- 3. Finds the most **semantically similar** fragments
143
+ 1. **Cleans** content (removes TOC, noise, auto-generated markers)
144
+ 2. **Chunks** intelligently (Markdown by headings, code by functions/classes)
145
+ 3. Converts chunks into **vectors** (numerical representations of meaning)
146
+ 4. Compares vectors of your query with vectors of code
147
+ 5. Optionally combines with **BM25 keyword search** (hybrid mode)
148
+ 6. Returns the most **semantically similar** fragments with rich metadata
114
149
 
115
150
  **Example:**
116
151
  ```javascript
@@ -138,16 +173,44 @@ vectorizer:
138
173
  auto_index: true # Automatic indexing
139
174
  debounce_ms: 1000 # Delay before indexing (ms)
140
175
 
176
+ # v2: Content cleaning
177
+ cleaning:
178
+ remove_toc: true
179
+ remove_frontmatter_metadata: false
180
+ remove_imports: false
181
+ remove_comments: false
182
+
183
+ # v2: Semantic chunking
184
+ chunking:
185
+ strategy: "semantic" # fixed | semantic
186
+ markdown:
187
+ split_by_headings: true
188
+ min_chunk_size: 200
189
+ max_chunk_size: 2000
190
+ preserve_heading_hierarchy: true
191
+ code:
192
+ split_by_functions: true
193
+ include_function_signature: true
194
+ min_chunk_size: 300
195
+ max_chunk_size: 1500
196
+
197
+ # v2: Hybrid search
198
+ search:
199
+ hybrid: false # vector + BM25
200
+ bm25_weight: 0.3
201
+
202
+ # v2: Quality monitoring
203
+ quality:
204
+ enable_metrics: false
205
+ enable_cache: true
206
+
141
207
  indexes:
142
208
  code:
143
209
  enabled: true
144
- extensions: [.js, .ts, .jsx, .tsx, .py, .go, ...]
145
210
  docs:
146
211
  enabled: true
147
- extensions: [.md, .mdx, .txt, .rst, .adoc]
148
212
  config:
149
- enabled: false # Disabled by default
150
- extensions: [.yaml, .yml, .json, .toml, ...]
213
+ enabled: false
151
214
 
152
215
  exclude:
153
216
  - node_modules
@@ -324,6 +387,34 @@ codeindex({ action: "list" })
324
387
  - **Model size:** ~23 MB (downloaded once)
325
388
  - **Speed:** ~0.5 sec/file (after model loading)
326
389
 
390
+ ### v2 Architecture
391
+
392
+ ```
393
+ File → Content Cleaner → Chunker Factory → Embedder → LanceDB
394
+ ├── Markdown Chunker (heading-aware)
395
+ ├── Code Chunker (function/class-aware)
396
+ └── Fixed Chunker (fallback)
397
+
398
+ Query → Query Cache → Embedder → Vector Search ─┐
399
+ └──────────→ BM25 Search ────┤→ Hybrid Merge → Filter → Results
400
+
401
+ Metadata Filter (type, lang, date, tags)
402
+ ```
403
+
404
+ ### New Modules (v2)
405
+
406
+ | Module | Purpose |
407
+ |--------|---------|
408
+ | `content-cleaner.ts` | Remove noise (TOC, breadcrumbs, markers) |
409
+ | `metadata-extractor.ts` | Extract file_type, language, tags, dates |
410
+ | `markdown-chunker.ts` | Heading-aware splitting with hierarchy |
411
+ | `code-chunker.ts` | Function/class-aware splitting |
412
+ | `chunker-factory.ts` | Route to correct chunker by file type |
413
+ | `bm25-index.ts` | Inverted index for keyword search |
414
+ | `hybrid-search.ts` | Merge vector + BM25 scores |
415
+ | `query-cache.ts` | LRU cache for query embeddings |
416
+ | `search-metrics.ts` | Track search quality metrics |
417
+
327
418
  ---
328
419
 
329
420
  ## 🤝 Contributing
package/file-indexer.ts CHANGED
@@ -326,6 +326,8 @@ async function ensureIndexOnSessionStart(
326
326
  return { totalFiles, elapsedSeconds, action }
327
327
  }
328
328
 
329
+ const STALE_THRESHOLD_MS = 5 * 60 * 1000 // 5 minutes — evict stuck entries
330
+
329
331
  async function processPendingFiles(projectRoot: string, config: VectorizerConfig): Promise<void> {
330
332
  if (pendingFiles.size === 0) return
331
333
  if (SKIP_AUTO_INDEX) {
@@ -335,6 +337,7 @@ async function processPendingFiles(projectRoot: string, config: VectorizerConfig
335
337
 
336
338
  const now = Date.now()
337
339
  const filesToProcess: Map<string, string[]> = new Map()
340
+ const staleKeys: string[] = []
338
341
 
339
342
  for (const [filePath, info] of pendingFiles.entries()) {
340
343
  if (now - info.timestamp >= config.debounce_ms) {
@@ -342,9 +345,17 @@ async function processPendingFiles(projectRoot: string, config: VectorizerConfig
342
345
  files.push(filePath)
343
346
  filesToProcess.set(info.indexName, files)
344
347
  pendingFiles.delete(filePath)
348
+ } else if (now - info.timestamp > STALE_THRESHOLD_MS) {
349
+ staleKeys.push(filePath)
345
350
  }
346
351
  }
347
352
 
353
+ // Evict entries stuck for >5 minutes (prevents unbounded growth)
354
+ for (const key of staleKeys) {
355
+ debug(`Evicting stale pending file: ${key}`)
356
+ pendingFiles.delete(key)
357
+ }
358
+
348
359
  if (filesToProcess.size === 0) return
349
360
 
350
361
  debug(`Processing ${filesToProcess.size} index(es)...`)
@@ -425,6 +436,9 @@ export const FileIndexerPlugin: Plugin = async ({ directory, client }) => {
425
436
  }, 1000)
426
437
  }
427
438
 
439
+ let lastProcessTime = Date.now()
440
+ const MAX_DEBOUNCE_WAIT_MS = 5000 // Force processing after 5s of rapid edits
441
+
428
442
  function queueFileForIndexing(filePath: string): void {
429
443
  const relativePath = path.relative(directory, filePath)
430
444
  if (relativePath.startsWith("..") || path.isAbsolute(relativePath)) return
@@ -439,9 +453,15 @@ export const FileIndexerPlugin: Plugin = async ({ directory, client }) => {
439
453
  if (processingTimeout) {
440
454
  clearTimeout(processingTimeout)
441
455
  }
456
+
457
+ // If rapid edits keep resetting the timer, force processing after MAX_DEBOUNCE_WAIT_MS
458
+ const timeSinceLast = Date.now() - lastProcessTime
459
+ const waitTime = timeSinceLast > MAX_DEBOUNCE_WAIT_MS ? 0 : config.debounce_ms + 100
460
+
442
461
  processingTimeout = setTimeout(async () => {
462
+ lastProcessTime = Date.now()
443
463
  await processPendingFiles(directory, config)
444
- }, config.debounce_ms + 100)
464
+ }, waitTime)
445
465
  }
446
466
 
447
467
  return {
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@comfanion/usethis_search",
3
- "version": "0.1.5",
4
- "description": "OpenCode plugin: semantic search + code index management",
3
+ "version": "0.2.0-dev.0",
4
+ "description": "OpenCode plugin: semantic search + code index management (v2: hybrid search, semantic chunking, metadata filtering)",
5
5
  "type": "module",
6
6
  "main": "./index.ts",
7
7
  "exports": {
@@ -16,6 +16,16 @@
16
16
  "tools/search.ts",
17
17
  "tools/codeindex.ts",
18
18
  "vectorizer/index.js",
19
+ "vectorizer/content-cleaner.ts",
20
+ "vectorizer/metadata-extractor.ts",
21
+ "vectorizer/bm25-index.ts",
22
+ "vectorizer/hybrid-search.ts",
23
+ "vectorizer/query-cache.ts",
24
+ "vectorizer/search-metrics.ts",
25
+ "vectorizer/chunkers/markdown-chunker.ts",
26
+ "vectorizer/chunkers/code-chunker.ts",
27
+ "vectorizer/chunkers/chunker-factory.ts",
28
+ "vectorizer.yaml",
19
29
  "README.md",
20
30
  "LICENSE"
21
31
  ],
@@ -1,7 +1,8 @@
1
1
  /**
2
- * Code Index Status & Management Tool
2
+ * Code Index Status & Management Tool (v2)
3
3
  *
4
4
  * Uses bundled vectorizer. Index data is stored in `.opencode/vectors/<index>/`.
5
+ * v2: added "test" action for gold dataset testing, richer stats.
5
6
  */
6
7
 
7
8
  import { tool } from "@opencode-ai/plugin"
@@ -59,6 +60,7 @@ Actions:
59
60
  - "status" → Show index statistics
60
61
  - "list" → List all available indexes with stats
61
62
  - "reindex" → Re-index files using local vectorizer
63
+ - "test" → Run gold dataset quality tests (if configured)
62
64
 
63
65
  Available indexes:
64
66
  - "code" - Source code files
@@ -66,7 +68,7 @@ Available indexes:
66
68
  - "config" - Configuration files`,
67
69
 
68
70
  args: {
69
- action: tool.schema.enum(["status", "list", "reindex"]).describe("Action to perform"),
71
+ action: tool.schema.enum(["status", "list", "reindex", "test"]).describe("Action to perform"),
70
72
  index: tool.schema.string().optional().default("code").describe("Index name: code, docs, config"),
71
73
  dir: tool.schema.string().optional().describe("Directory to index (default: project root)"),
72
74
  },
@@ -87,7 +89,7 @@ Available indexes:
87
89
  } catch {}
88
90
 
89
91
  if (indexes.length === 0) {
90
- output += `⚠️ No indexes created yet\n\nCreate indexes:\n\n\`\`\`\n`
92
+ output += `No indexes created yet\n\nCreate indexes:\n\n\`\`\`\n`
91
93
  output += `codeindex({ action: "reindex", index: "code" })\n`
92
94
  output += `codeindex({ action: "reindex", index: "docs", dir: "docs/" })\n`
93
95
  output += `\`\`\`\n`
@@ -95,31 +97,62 @@ Available indexes:
95
97
  output += `### Active Indexes\n\n`
96
98
  for (const idx of indexes) {
97
99
  try {
98
- const hashesPath = path.join(vectorsDir, idx, "hashes.json")
99
- const hashes = JSON.parse(await fs.readFile(hashesPath, "utf8"))
100
- const fileCount = Object.keys(hashes).length
100
+ const indexer = await new CodebaseIndexer(projectRoot, idx).init()
101
+ const stats = await indexer.getStats()
102
+ await indexer.unloadModel()
101
103
  const desc = INDEX_DESCRIPTIONS[idx] || "Custom index"
102
- output += `- ${idx} - ${desc} (files: ${fileCount})\n`
104
+ const features = stats.features
105
+ ? ` | chunking: ${stats.features.chunking}, hybrid: ${stats.features.hybrid ? "on" : "off"}`
106
+ : ""
107
+ output += `- **${idx}** - ${desc} (files: ${stats.fileCount}, chunks: ${stats.chunkCount}${features})\n`
103
108
  } catch {
104
109
  output += `- ${idx}\n`
105
110
  }
106
111
  }
107
112
  }
108
113
 
109
- output += `\n### Usage\n\n\`\`\`\nsearch({ query: "your query", index: "code" })\n\`\`\``
114
+ output += `\n### Usage\n\n\`\`\`\nsearch({ query: "your query", index: "code" })\nsearch({ query: "your query", hybrid: true }) // v2: hybrid search\nsearch({ query: "your query", fileType: "code", language: "typescript" }) // v2: filters\n\`\`\``
110
115
  return output
111
116
  }
112
117
 
113
118
  if (args.action === "status") {
114
119
  const hashesFile = path.join(vectorsDir, indexName, "hashes.json")
115
120
  try {
116
- const hashesContent = await fs.readFile(hashesFile, "utf8")
117
- const hashes = JSON.parse(hashesContent)
118
- const fileCount = Object.keys(hashes).length
119
- const sampleFiles = Object.keys(hashes).slice(0, 5)
121
+ const indexer = await new CodebaseIndexer(projectRoot, indexName).init()
122
+ const stats = await indexer.getStats()
123
+ await indexer.unloadModel()
124
+
125
+ const sampleFiles = Object.keys(JSON.parse(await fs.readFile(hashesFile, "utf8"))).slice(0, 5)
120
126
  const desc = INDEX_DESCRIPTIONS[indexName] || "Custom index"
121
127
 
122
- return `## Index Status: "${indexName}"\n\n**Description:** ${desc}\n**Files indexed:** ${fileCount}\n\n**Sample indexed files:**\n${sampleFiles.map((f) => `- ${f}`).join("\n")}${fileCount > 5 ? `\n- ... and ${fileCount - 5} more` : ""}`
128
+ let output = `## Index Status: "${indexName}"\n\n`
129
+ output += `**Description:** ${desc}\n`
130
+ output += `**Files indexed:** ${stats.fileCount}\n`
131
+ output += `**Total chunks:** ${stats.chunkCount}\n`
132
+ output += `**Model:** ${stats.model}\n`
133
+
134
+ if (stats.features) {
135
+ output += `\n**Features:**\n`
136
+ output += `- Chunking strategy: ${stats.features.chunking}\n`
137
+ output += `- Hybrid search: ${stats.features.hybrid ? "enabled" : "disabled"}\n`
138
+ output += `- Metrics: ${stats.features.metrics ? "enabled" : "disabled"}\n`
139
+ output += `- Query cache: ${stats.features.cache ? "enabled" : "disabled"}\n`
140
+ }
141
+
142
+ // Show metrics summary if available
143
+ try {
144
+ const metrics = await indexer.getMetrics()
145
+ if (metrics.total_queries > 0) {
146
+ output += `\n**Search Metrics:**\n`
147
+ output += `- Total queries: ${metrics.total_queries}\n`
148
+ output += `- Avg results/query: ${metrics.avg_results_per_query.toFixed(1)}\n`
149
+ output += `- Zero results rate: ${(metrics.zero_results_rate * 100).toFixed(1)}%\n`
150
+ output += `- Avg relevance: ${metrics.avg_relevance.toFixed(3)}\n`
151
+ }
152
+ } catch {}
153
+
154
+ output += `\n**Sample indexed files:**\n${sampleFiles.map((f) => `- ${f}`).join("\n")}${stats.fileCount > 5 ? `\n- ... and ${stats.fileCount - 5} more` : ""}`
155
+ return output
123
156
  } catch {
124
157
  return `## Index Status: "${indexName}"\n\nIndex "${indexName}" not created yet. Create it with: codeindex({ action: "reindex", index: "${indexName}" })`
125
158
  }
@@ -148,12 +181,98 @@ Available indexes:
148
181
  await indexer.unloadModel()
149
182
  const stats = await indexer.getStats()
150
183
 
151
- return `## Re-indexing Complete ✅\n\n**Index:** ${indexName}\n**Directory:** ${args.dir || "(project root)"}\n**Files found:** ${files.length}\n**Files indexed:** ${indexed}\n**Files unchanged:** ${skipped}\n**Total chunks:** ${stats.chunkCount}`
184
+ let output = `## Re-indexing Complete\n\n`
185
+ output += `**Index:** ${indexName}\n`
186
+ output += `**Directory:** ${args.dir || "(project root)"}\n`
187
+ output += `**Files found:** ${files.length}\n`
188
+ output += `**Files indexed:** ${indexed}\n`
189
+ output += `**Files unchanged:** ${skipped}\n`
190
+ output += `**Total chunks:** ${stats.chunkCount}\n`
191
+ if (stats.features) {
192
+ output += `**Chunking:** ${stats.features.chunking}\n`
193
+ }
194
+ return output
195
+ } catch (error: any) {
196
+ return `Re-indexing failed: ${error.message || String(error)}`
197
+ }
198
+ }
199
+
200
+ if (args.action === "test") {
201
+ try {
202
+ const goldPath = path.join(projectRoot, ".opencode", "vectors", "gold-dataset.yaml")
203
+ let goldContent: string
204
+ try {
205
+ goldContent = await fs.readFile(goldPath, "utf8")
206
+ } catch {
207
+ return `## Gold Dataset Test\n\nNo gold dataset found at: ${goldPath}\n\nCreate one with test queries and expected results.\nSee docs/search-plugin-upgrade-plan.md for format.`
208
+ }
209
+
210
+ // Simple YAML parsing for test queries
211
+ const tests: { query: string; expected_files: string[]; min_relevance: number; description?: string }[] = []
212
+ const queryBlocks = goldContent.split(/\n\s+-\s+query:\s*/)
213
+ for (const block of queryBlocks.slice(1)) {
214
+ const queryMatch = block.match(/^["']?([^"'\n]+)["']?/)
215
+ const filesMatch = block.match(/expected_files:\s*\n((?:\s+-\s+.+\n?)+)/)
216
+ const relMatch = block.match(/min_relevance:\s*([\d.]+)/)
217
+ const descMatch = block.match(/description:\s*["']?([^"'\n]+)/)
218
+
219
+ if (queryMatch) {
220
+ const expectedFiles = filesMatch
221
+ ? filesMatch[1].split("\n").map(l => l.replace(/^\s+-\s+["']?/, "").replace(/["']$/, "").trim()).filter(Boolean)
222
+ : []
223
+ tests.push({
224
+ query: queryMatch[1].trim(),
225
+ expected_files: expectedFiles,
226
+ min_relevance: relMatch ? parseFloat(relMatch[1]) : 0.7,
227
+ description: descMatch ? descMatch[1].trim() : undefined,
228
+ })
229
+ }
230
+ }
231
+
232
+ if (tests.length === 0) {
233
+ return `## Gold Dataset Test\n\nNo test queries found in gold dataset.`
234
+ }
235
+
236
+ const indexer = await new CodebaseIndexer(projectRoot, indexName).init()
237
+ let passed = 0
238
+ let failed = 0
239
+ let output = `## Gold Dataset Test Results\n\n`
240
+
241
+ for (const t of tests) {
242
+ const results = await indexer.search(t.query, 10, false)
243
+ const foundFiles = results.map((r: any) => r.file)
244
+ const foundExpected = t.expected_files.filter(f => foundFiles.includes(f))
245
+ const topScore = results.length > 0 && results[0]._distance != null
246
+ ? 1 - results[0]._distance
247
+ : 0
248
+
249
+ const pass = foundExpected.length >= Math.ceil(t.expected_files.length * 0.5) && topScore >= t.min_relevance
250
+
251
+ if (pass) {
252
+ passed++
253
+ output += `**PASS** Query: "${t.query}"\n`
254
+ } else {
255
+ failed++
256
+ output += `**FAIL** Query: "${t.query}"\n`
257
+ }
258
+
259
+ output += ` Found: ${foundFiles.slice(0, 3).map((f: string) => `${f} (${(1 - (results.find((r: any) => r.file === f)?._distance ?? 1)).toFixed(2)})`).join(", ")}\n`
260
+ if (foundExpected.length < t.expected_files.length) {
261
+ const missing = t.expected_files.filter(f => !foundFiles.includes(f))
262
+ output += ` Missing: ${missing.join(", ")}\n`
263
+ }
264
+ output += `\n`
265
+ }
266
+
267
+ await indexer.unloadModel()
268
+
269
+ output += `---\n**Summary:** ${passed}/${tests.length} tests passed (${Math.round(passed / tests.length * 100)}%)\n`
270
+ return output
152
271
  } catch (error: any) {
153
- return `❌ Re-indexing failed: ${error.message || String(error)}`
272
+ return `Gold dataset test failed: ${error.message || String(error)}`
154
273
  }
155
274
  }
156
275
 
157
- return `Unknown action: ${args.action}. Use: status, list, or reindex`
276
+ return `Unknown action: ${args.action}. Use: status, list, reindex, or test`
158
277
  },
159
278
  })
package/tools/search.ts CHANGED
@@ -1,7 +1,8 @@
1
1
  /**
2
- * Semantic Code Search Tool
2
+ * Semantic Code Search Tool (v2)
3
3
  *
4
4
  * Uses local embeddings + LanceDB vector store via bundled vectorizer.
5
+ * v2: hybrid search, metadata filtering, rich result metadata.
5
6
  * Index data is stored in `.opencode/vectors/<index>/`.
6
7
  */
7
8
 
@@ -33,6 +34,13 @@ Examples:
33
34
  searchAll: tool.schema.boolean().optional().default(false).describe("Search all indexes instead of just one"),
34
35
  freshen: tool.schema.boolean().optional().default(true).describe("Auto-update stale files before searching (default: true)"),
35
36
  includeArchived: tool.schema.boolean().optional().default(false).describe("Include archived files in results (default: false). Files are archived if in /archive/ folder or have 'archived: true' in frontmatter."),
37
+ // v2 params
38
+ hybrid: tool.schema.boolean().optional().describe("Enable hybrid search (vector + BM25 keyword matching). Improves exact keyword recall."),
39
+ fileType: tool.schema.string().optional().describe("Filter by file type: 'code', 'docs', or 'config'"),
40
+ language: tool.schema.string().optional().describe("Filter by language: 'typescript', 'python', 'markdown', etc."),
41
+ modifiedAfter: tool.schema.string().optional().describe("Filter: only files modified after this ISO date (e.g. '2024-01-01')"),
42
+ modifiedBefore: tool.schema.string().optional().describe("Filter: only files modified before this ISO date"),
43
+ tags: tool.schema.string().optional().describe("Filter by frontmatter tags (comma-separated, e.g. 'auth,security')"),
36
44
  },
37
45
 
38
46
  async execute(args) {
@@ -43,6 +51,15 @@ Examples:
43
51
  const limit = args.limit || 10
44
52
  const indexName = args.index || "code"
45
53
 
54
+ // Build search options from v2 params
55
+ const searchOptions: Record<string, any> = {}
56
+ if (args.hybrid != null) searchOptions.hybrid = args.hybrid
57
+ if (args.fileType) searchOptions.fileType = args.fileType
58
+ if (args.language) searchOptions.language = args.language
59
+ if (args.modifiedAfter) searchOptions.modifiedAfter = args.modifiedAfter
60
+ if (args.modifiedBefore) searchOptions.modifiedBefore = args.modifiedBefore
61
+ if (args.tags) searchOptions.tags = args.tags.split(",").map((t: string) => t.trim()).filter(Boolean)
62
+
46
63
  // Auto-freshen stale files before searching
47
64
  if (args.freshen !== false) {
48
65
  const tempIndexer = await new CodebaseIndexer(projectRoot, indexName).init()
@@ -56,7 +73,7 @@ Examples:
56
73
  await tempIndexer.unloadModel()
57
74
 
58
75
  if (indexes.length === 0) {
59
- return `❌ No indexes found. Create one with: codeindex({ action: "reindex", index: "code" })`
76
+ return `No indexes found. Create one with: codeindex({ action: "reindex", index: "code" })`
60
77
  }
61
78
 
62
79
  for (const idx of indexes) {
@@ -64,42 +81,60 @@ Examples:
64
81
  if (args.freshen !== false) {
65
82
  await indexer.freshen()
66
83
  }
67
- const results = await indexer.search(args.query, limit, args.includeArchived)
84
+ const results = await indexer.search(args.query, limit, args.includeArchived, searchOptions)
68
85
  allResults.push(...results.map((r: any) => ({ ...r, _index: idx })))
69
86
  await indexer.unloadModel()
70
87
  }
71
88
 
72
- allResults.sort((a, b) => (a._distance || 0) - (b._distance || 0))
89
+ allResults.sort((a, b) => {
90
+ // Prefer combinedScore (hybrid), fall back to distance
91
+ const scoreA = a._combinedScore ?? (a._distance != null ? 1 - a._distance : 0)
92
+ const scoreB = b._combinedScore ?? (b._distance != null ? 1 - b._distance : 0)
93
+ return scoreB - scoreA
94
+ })
73
95
  allResults = allResults.slice(0, limit)
74
96
  } else {
75
97
  const hashesFile = path.join(projectRoot, ".opencode", "vectors", indexName, "hashes.json")
76
98
  try {
77
99
  await fs.access(hashesFile)
78
100
  } catch {
79
- return `❌ Index "${indexName}" not found. Create it with: codeindex({ action: "reindex", index: "${indexName}" })`
101
+ return `Index "${indexName}" not found. Create it with: codeindex({ action: "reindex", index: "${indexName}" })`
80
102
  }
81
103
 
82
104
  const indexer = await new CodebaseIndexer(projectRoot, indexName).init()
83
- const results = await indexer.search(args.query, limit, args.includeArchived)
105
+ const results = await indexer.search(args.query, limit, args.includeArchived, searchOptions)
84
106
  allResults = results.map((r: any) => ({ ...r, _index: indexName }))
85
107
  await indexer.unloadModel()
86
108
  }
87
109
 
88
110
  if (allResults.length === 0) {
89
111
  const scope = args.searchAll ? "any index" : `index "${indexName}"`
90
- return `No results found in ${scope} for: "${args.query}"\n\nTry:\n- Different keywords\n- Re-index with: codeindex({ action: "reindex", index: "${indexName}" })`
112
+ return `No results found in ${scope} for: "${args.query}"\n\nTry:\n- Different keywords\n- Enable hybrid search: search({ query: "...", hybrid: true })\n- Re-index with: codeindex({ action: "reindex", index: "${indexName}" })`
91
113
  }
92
114
 
93
115
  const scope = args.searchAll ? "all indexes" : `index "${indexName}"`
94
- let output = `## Search Results for: "${args.query}" (${scope})\n\n`
116
+ const hybridLabel = args.hybrid ? " [hybrid]" : ""
117
+ let output = `## Search Results for: "${args.query}" (${scope}${hybridLabel})\n\n`
95
118
 
96
119
  for (let i = 0; i < allResults.length; i++) {
97
120
  const r = allResults[i]
98
- const score = r._distance ? (1 - r._distance).toFixed(3) : "N/A"
121
+ const score = r._combinedScore != null
122
+ ? r._combinedScore.toFixed(3)
123
+ : r._distance != null
124
+ ? (1 - r._distance).toFixed(3)
125
+ : "N/A"
99
126
  const indexLabel = args.searchAll ? ` [${r._index}]` : ""
100
127
 
128
+ // v2: show rich metadata when available
129
+ const metaParts: string[] = []
130
+ if (r.language && r.language !== "unknown") metaParts.push(r.language)
131
+ if (r.heading_context) metaParts.push(`"${r.heading_context}"`)
132
+ if (r.function_name) metaParts.push(`fn: ${r.function_name}`)
133
+ if (r.class_name) metaParts.push(`class: ${r.class_name}`)
134
+ const metaLine = metaParts.length > 0 ? ` (${metaParts.join(", ")})` : ""
135
+
101
136
  output += `### ${i + 1}. ${r.file}${indexLabel}\n`
102
- output += `**Relevance:** ${score}\n\n`
137
+ output += `**Relevance:** ${score}${metaLine}\n\n`
103
138
  output += "```\n"
104
139
  const content = r.content.length > 500 ? r.content.substring(0, 500) + "\n... (truncated)" : r.content
105
140
  output += content
@@ -109,7 +144,7 @@ Examples:
109
144
  output += `---\n*Found ${allResults.length} results. Use Read tool to see full files.*`
110
145
  return output
111
146
  } catch (error: any) {
112
- return `❌ Search failed: ${error.message || String(error)}`
147
+ return `Search failed: ${error.message || String(error)}`
113
148
  }
114
149
  },
115
150
  })