@comfanion/usethis_search 0.1.5 → 3.0.0-dev.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/tools/search.ts CHANGED
@@ -1,7 +1,8 @@
1
1
  /**
2
- * Semantic Code Search Tool
2
+ * Semantic Code Search Tool (v2)
3
3
  *
4
4
  * Uses local embeddings + LanceDB vector store via bundled vectorizer.
5
+ * v2: hybrid search, metadata filtering, rich result metadata.
5
6
  * Index data is stored in `.opencode/vectors/<index>/`.
6
7
  */
7
8
 
@@ -33,6 +34,13 @@ Examples:
33
34
  searchAll: tool.schema.boolean().optional().default(false).describe("Search all indexes instead of just one"),
34
35
  freshen: tool.schema.boolean().optional().default(true).describe("Auto-update stale files before searching (default: true)"),
35
36
  includeArchived: tool.schema.boolean().optional().default(false).describe("Include archived files in results (default: false). Files are archived if in /archive/ folder or have 'archived: true' in frontmatter."),
37
+ // v2 params
38
+ hybrid: tool.schema.boolean().optional().describe("Enable hybrid search (vector + BM25 keyword matching). Improves exact keyword recall."),
39
+ fileType: tool.schema.string().optional().describe("Filter by file type: 'code', 'docs', or 'config'"),
40
+ language: tool.schema.string().optional().describe("Filter by language: 'typescript', 'python', 'markdown', etc."),
41
+ modifiedAfter: tool.schema.string().optional().describe("Filter: only files modified after this ISO date (e.g. '2024-01-01')"),
42
+ modifiedBefore: tool.schema.string().optional().describe("Filter: only files modified before this ISO date"),
43
+ tags: tool.schema.string().optional().describe("Filter by frontmatter tags (comma-separated, e.g. 'auth,security')"),
36
44
  },
37
45
 
38
46
  async execute(args) {
@@ -43,6 +51,15 @@ Examples:
43
51
  const limit = args.limit || 10
44
52
  const indexName = args.index || "code"
45
53
 
54
+ // Build search options from v2 params
55
+ const searchOptions: Record<string, any> = {}
56
+ if (args.hybrid != null) searchOptions.hybrid = args.hybrid
57
+ if (args.fileType) searchOptions.fileType = args.fileType
58
+ if (args.language) searchOptions.language = args.language
59
+ if (args.modifiedAfter) searchOptions.modifiedAfter = args.modifiedAfter
60
+ if (args.modifiedBefore) searchOptions.modifiedBefore = args.modifiedBefore
61
+ if (args.tags) searchOptions.tags = args.tags.split(",").map((t: string) => t.trim()).filter(Boolean)
62
+
46
63
  // Auto-freshen stale files before searching
47
64
  if (args.freshen !== false) {
48
65
  const tempIndexer = await new CodebaseIndexer(projectRoot, indexName).init()
@@ -56,7 +73,7 @@ Examples:
56
73
  await tempIndexer.unloadModel()
57
74
 
58
75
  if (indexes.length === 0) {
59
- return `❌ No indexes found. Create one with: codeindex({ action: "reindex", index: "code" })`
76
+ return `No indexes found. Create one with: codeindex({ action: "reindex", index: "code" })`
60
77
  }
61
78
 
62
79
  for (const idx of indexes) {
@@ -64,52 +81,83 @@ Examples:
64
81
  if (args.freshen !== false) {
65
82
  await indexer.freshen()
66
83
  }
67
- const results = await indexer.search(args.query, limit, args.includeArchived)
84
+ const results = await indexer.search(args.query, limit, args.includeArchived, searchOptions)
68
85
  allResults.push(...results.map((r: any) => ({ ...r, _index: idx })))
69
86
  await indexer.unloadModel()
70
87
  }
71
88
 
72
- allResults.sort((a, b) => (a._distance || 0) - (b._distance || 0))
89
+ allResults.sort((a, b) => {
90
+ // Prefer combinedScore (hybrid), fall back to distance
91
+ const scoreA = a._combinedScore ?? (a._distance != null ? 1 - a._distance : 0)
92
+ const scoreB = b._combinedScore ?? (b._distance != null ? 1 - b._distance : 0)
93
+ return scoreB - scoreA
94
+ })
73
95
  allResults = allResults.slice(0, limit)
74
96
  } else {
75
97
  const hashesFile = path.join(projectRoot, ".opencode", "vectors", indexName, "hashes.json")
76
98
  try {
77
99
  await fs.access(hashesFile)
78
100
  } catch {
79
- return `❌ Index "${indexName}" not found. Create it with: codeindex({ action: "reindex", index: "${indexName}" })`
101
+ return `Index "${indexName}" not found. Create it with: codeindex({ action: "reindex", index: "${indexName}" })`
80
102
  }
81
103
 
82
104
  const indexer = await new CodebaseIndexer(projectRoot, indexName).init()
83
- const results = await indexer.search(args.query, limit, args.includeArchived)
105
+ const results = await indexer.search(args.query, limit, args.includeArchived, searchOptions)
84
106
  allResults = results.map((r: any) => ({ ...r, _index: indexName }))
85
107
  await indexer.unloadModel()
86
108
  }
87
109
 
88
110
  if (allResults.length === 0) {
89
111
  const scope = args.searchAll ? "any index" : `index "${indexName}"`
90
- return `No results found in ${scope} for: "${args.query}"\n\nTry:\n- Different keywords\n- Re-index with: codeindex({ action: "reindex", index: "${indexName}" })`
112
+ return `No results found in ${scope} for: "${args.query}"\n\nTry:\n- Different keywords\n- Enable hybrid search: search({ query: "...", hybrid: true })\n- Re-index with: codeindex({ action: "reindex", index: "${indexName}" })`
91
113
  }
92
114
 
93
115
  const scope = args.searchAll ? "all indexes" : `index "${indexName}"`
94
- let output = `## Search Results for: "${args.query}" (${scope})\n\n`
116
+ const hybridLabel = args.hybrid ? " [hybrid]" : ""
117
+ let output = `## Search Results for: "${args.query}" (${scope}${hybridLabel})\n\n`
95
118
 
96
119
  for (let i = 0; i < allResults.length; i++) {
97
120
  const r = allResults[i]
98
- const score = r._distance ? (1 - r._distance).toFixed(3) : "N/A"
121
+ const score = r._combinedScore != null
122
+ ? r._combinedScore.toFixed(3)
123
+ : r._distance != null
124
+ ? (1 - r._distance).toFixed(3)
125
+ : "N/A"
99
126
  const indexLabel = args.searchAll ? ` [${r._index}]` : ""
100
127
 
128
+ // v2: show rich metadata when available
129
+ const metaParts: string[] = []
130
+ if (r.language && r.language !== "unknown") metaParts.push(r.language)
131
+ if (r.heading_context) metaParts.push(`"${r.heading_context}"`)
132
+ if (r.function_name) metaParts.push(`fn: ${r.function_name}`)
133
+ if (r.class_name) metaParts.push(`class: ${r.class_name}`)
134
+ const metaLine = metaParts.length > 0 ? ` (${metaParts.join(", ")})` : ""
135
+
101
136
  output += `### ${i + 1}. ${r.file}${indexLabel}\n`
102
- output += `**Relevance:** ${score}\n\n`
137
+ output += `**Relevance:** ${score}${metaLine}\n\n`
103
138
  output += "```\n"
104
139
  const content = r.content.length > 500 ? r.content.substring(0, 500) + "\n... (truncated)" : r.content
105
140
  output += content
106
- output += "\n```\n\n"
141
+ output += "\n```\n"
142
+
143
+ if (r.relatedContext && r.relatedContext.length > 0) {
144
+ output += "\n**Related Context:**\n"
145
+ for (const rel of r.relatedContext) {
146
+ const snippet = rel.content.length > 200
147
+ ? rel.content.substring(0, 200) + "..."
148
+ : rel.content
149
+ output += `- **${rel.file}** (${rel.relation}, via ${rel.via}, score: ${rel.score.toFixed(2)})\n`
150
+ output += ` \`\`\`\n ${snippet}\n \`\`\`\n`
151
+ }
152
+ }
153
+
154
+ output += "\n"
107
155
  }
108
156
 
109
157
  output += `---\n*Found ${allResults.length} results. Use Read tool to see full files.*`
110
158
  return output
111
159
  } catch (error: any) {
112
- return `❌ Search failed: ${error.message || String(error)}`
160
+ return `Search failed: ${error.message || String(error)}`
113
161
  }
114
162
  },
115
163
  })
@@ -0,0 +1,162 @@
1
+ import { ChunkWithId } from "../graph-builder"
2
+
3
+ export interface Relation {
4
+ from: string
5
+ to: string
6
+ predicate: string
7
+ weight: number
8
+ source: "lsp"
9
+ line?: number
10
+ }
11
+
12
+ export class LSPAnalyzer {
13
+ private readonly timeout = 5000
14
+
15
+ async isAvailable(filePath: string): Promise<boolean> {
16
+ try {
17
+ const ext = filePath.split(".").pop()
18
+ if (!ext) return false
19
+
20
+ const language = this.getLanguage(ext)
21
+ if (!language) return false
22
+
23
+ return this.checkLSPServer(language)
24
+ } catch {
25
+ return false
26
+ }
27
+ }
28
+
29
+ async analyzeFile(filePath: string, chunks: ChunkWithId[]): Promise<Relation[]> {
30
+ const relations: Relation[] = []
31
+
32
+ try {
33
+ const ext = filePath.split(".").pop()
34
+ if (!ext) return []
35
+
36
+ const language = this.getLanguage(ext)
37
+ if (!language) return []
38
+
39
+ const lines = await this.readFileLines(filePath)
40
+
41
+ const symbols = await this.getDocumentSymbols(filePath, language)
42
+ if (!symbols) return []
43
+
44
+ for (const symbol of symbols) {
45
+ const fromChunkId = this.findChunkForPosition(chunks, symbol.line)
46
+ if (!fromChunkId) continue
47
+
48
+ if (symbol.type === "class" || symbol.type === "interface") {
49
+ const implementations = await this.getImplementations(filePath, symbol.line, symbol.character, language)
50
+ for (const impl of implementations) {
51
+ const toChunkId = this.resolveTargetChunk(filePath, impl)
52
+ if (toChunkId) {
53
+ relations.push({
54
+ from: fromChunkId,
55
+ to: toChunkId,
56
+ predicate: "implements",
57
+ weight: 1.0,
58
+ source: "lsp"
59
+ })
60
+ }
61
+ }
62
+ }
63
+
64
+ const references = await this.getReferences(filePath, symbol.line, symbol.character, language)
65
+ for (const ref of references) {
66
+ const toChunkId = this.resolveTargetChunk(filePath, ref)
67
+ if (toChunkId && toChunkId !== fromChunkId) {
68
+ relations.push({
69
+ from: toChunkId,
70
+ to: fromChunkId,
71
+ predicate: "used_by",
72
+ weight: 1.0,
73
+ source: "lsp"
74
+ })
75
+ }
76
+ }
77
+
78
+ const definitions = await this.getDefinitions(filePath, symbol.line, symbol.character, language)
79
+ for (const def of definitions) {
80
+ const toChunkId = this.resolveTargetChunk(filePath, def)
81
+ if (toChunkId && toChunkId !== fromChunkId) {
82
+ relations.push({
83
+ from: fromChunkId,
84
+ to: toChunkId,
85
+ predicate: "references",
86
+ weight: 1.0,
87
+ source: "lsp"
88
+ })
89
+ }
90
+ }
91
+ }
92
+ } catch (error) {
93
+ return []
94
+ }
95
+
96
+ return relations
97
+ }
98
+
99
+ private getLanguage(ext: string): string | null {
100
+ const map: Record<string, string> = {
101
+ ts: "typescript",
102
+ js: "javascript",
103
+ tsx: "typescriptreact",
104
+ jsx: "javascriptreact",
105
+ py: "python",
106
+ go: "go",
107
+ rs: "rust",
108
+ java: "java",
109
+ cpp: "cpp",
110
+ c: "c",
111
+ cs: "csharp"
112
+ }
113
+ return map[ext] || null
114
+ }
115
+
116
+ private checkLSPServer(language: string): Promise<boolean> {
117
+ return Promise.resolve(false)
118
+ }
119
+
120
+ private async readFileLines(filePath: string): Promise<string[]> {
121
+ const fs = await import("fs/promises")
122
+ const content = await fs.readFile(filePath, "utf-8")
123
+ return content.split("\n")
124
+ }
125
+
126
+ private async getDocumentSymbols(filePath: string, language: string): Promise<Array<{ name: string; type: string; line: number; character: number }> | null> {
127
+ return null
128
+ }
129
+
130
+ private async getImplementations(filePath: string, line: number, character: number, language: string): Promise<Array<{ file: string; line: number; character: number }>> {
131
+ return []
132
+ }
133
+
134
+ private async getReferences(filePath: string, line: number, character: number, language: string): Promise<Array<{ file: string; line: number; character: number }>> {
135
+ return []
136
+ }
137
+
138
+ private async getDefinitions(filePath: string, line: number, character: number, language: string): Promise<Array<{ file: string; line: number; character: number }>> {
139
+ return []
140
+ }
141
+
142
+ private findChunkForPosition(chunks: ChunkWithId[], line: number): string | null {
143
+ for (const chunk of chunks) {
144
+ if (chunk.start_line !== undefined && chunk.end_line !== undefined) {
145
+ if (line >= chunk.start_line && line <= chunk.end_line) {
146
+ return chunk.chunk_id
147
+ }
148
+ }
149
+ }
150
+ return null
151
+ }
152
+
153
+ private resolveTargetChunk(currentFile: string, target: { file: string; line: number; character: number }): string | null {
154
+ if (target.file !== currentFile) {
155
+ const path = target.file.replace(/[^a-zA-Z0-9]/g, "_")
156
+ return `chunk_${path}_0`
157
+ }
158
+
159
+ const normalized = currentFile.replace(/[^a-zA-Z0-9]/g, "_")
160
+ return `chunk_${normalized}_0`
161
+ }
162
+ }
@@ -0,0 +1,255 @@
1
+ import path from "path"
2
+ import { ChunkWithId } from "../graph-builder"
3
+
4
+ export interface Relation {
5
+ from: string
6
+ to: string
7
+ predicate: string
8
+ weight: number
9
+ source: "regex" | "markdown"
10
+ line?: number
11
+ }
12
+
13
+ export class RegexAnalyzer {
14
+ private readonly patterns = {
15
+ jsImports: /import\s+(?:\{[^}]+\}|\w+)\s+from\s+['"]([^'"]+)['"]/g,
16
+ pythonFromImport: /from\s+(\S+)\s+import/g,
17
+ pythonImport: /import\s+(\S+)/g,
18
+ extends: /class\s+\w+\s+extends\s+(\w+)/g,
19
+ implements: /class\s+\w+\s+implements\s+([^{]+)/g,
20
+ markdownLink: /\[([^\]]+)\]\(([^)]+)\)/g
21
+ }
22
+
23
+ analyzeCode(filePath: string, content: string, chunks: ChunkWithId[]): Relation[] {
24
+ const relations: Relation[] = []
25
+ const ext = path.extname(filePath)
26
+ const lines = content.split("\n")
27
+
28
+ if ([".js", ".ts", ".jsx", ".tsx"].includes(ext)) {
29
+ this.analyzeJSCode(content, lines, filePath, chunks, relations)
30
+ } else if ([".py"].includes(ext)) {
31
+ this.analyzePythonCode(content, lines, filePath, chunks, relations)
32
+ }
33
+
34
+ return relations
35
+ }
36
+
37
+ analyzeMarkdown(filePath: string, content: string, chunks: ChunkWithId[]): Relation[] {
38
+ const relations: Relation[] = []
39
+ const lines = content.split("\n")
40
+ const dir = path.dirname(filePath)
41
+
42
+ let match
43
+ this.patterns.markdownLink.lastIndex = 0
44
+ while ((match = this.patterns.markdownLink.exec(content)) !== null) {
45
+ const linkText = match[1]
46
+ const linkTarget = match[2]
47
+ const lineIndex = content.substring(0, match.index).split("\n").length - 1
48
+ const line = lines[lineIndex]
49
+
50
+ const targetPath = this.resolvePath(filePath, linkTarget)
51
+ if (!targetPath) continue
52
+
53
+ const fromChunkId = this.findChunkForLine(chunks, lineIndex)
54
+ if (!fromChunkId) continue
55
+
56
+ const toChunkId = this.findChunkForLinkTarget(targetPath, linkTarget, chunks)
57
+ if (toChunkId) {
58
+ relations.push({
59
+ from: fromChunkId,
60
+ to: toChunkId,
61
+ predicate: "links_to",
62
+ weight: 1.0,
63
+ source: "markdown",
64
+ line: lineIndex
65
+ })
66
+ }
67
+ }
68
+
69
+ return relations
70
+ }
71
+
72
+ private analyzeJSCode(content: string, lines: string[], filePath: string, chunks: ChunkWithId[], relations: Relation[]) {
73
+ let match
74
+
75
+ this.patterns.jsImports.lastIndex = 0
76
+ while ((match = this.patterns.jsImports.exec(content)) !== null) {
77
+ const importPath = match[1]
78
+ const lineIndex = content.substring(0, match.index).split("\n").length - 1
79
+ const line = lines[lineIndex]
80
+
81
+ if (importPath.startsWith(".")) {
82
+ const targetPath = this.resolvePath(filePath, importPath)
83
+ if (!targetPath) continue
84
+
85
+ const fromChunkId = this.findChunkForLine(chunks, lineIndex)
86
+ if (!fromChunkId) continue
87
+
88
+ const toChunkId = this.findFirstChunkInFile(targetPath)
89
+ if (toChunkId) {
90
+ relations.push({
91
+ from: fromChunkId,
92
+ to: toChunkId,
93
+ predicate: "imports",
94
+ weight: 0.8,
95
+ source: "regex",
96
+ line: lineIndex
97
+ })
98
+ }
99
+ }
100
+ }
101
+
102
+ this.patterns.extends.lastIndex = 0
103
+ while ((match = this.patterns.extends.exec(content)) !== null) {
104
+ const parentClass = match[1]
105
+ const lineIndex = content.substring(0, match.index).split("\n").length - 1
106
+
107
+ const fromChunkId = this.findChunkForLine(chunks, lineIndex)
108
+ if (!fromChunkId) continue
109
+
110
+ const toChunkId = this.findChunkContainingSymbol(chunks, parentClass)
111
+ if (toChunkId) {
112
+ relations.push({
113
+ from: fromChunkId,
114
+ to: toChunkId,
115
+ predicate: "extends",
116
+ weight: 0.8,
117
+ source: "regex",
118
+ line: lineIndex
119
+ })
120
+ }
121
+ }
122
+
123
+ this.patterns.implements.lastIndex = 0
124
+ while ((match = this.patterns.implements.exec(content)) !== null) {
125
+ const interfaces = match[1].split(",").map(s => s.trim())
126
+ const lineIndex = content.substring(0, match.index).split("\n").length - 1
127
+
128
+ const fromChunkId = this.findChunkForLine(chunks, lineIndex)
129
+ if (!fromChunkId) continue
130
+
131
+ for (const iface of interfaces) {
132
+ const toChunkId = this.findChunkContainingSymbol(chunks, iface)
133
+ if (toChunkId) {
134
+ relations.push({
135
+ from: fromChunkId,
136
+ to: toChunkId,
137
+ predicate: "implements",
138
+ weight: 0.8,
139
+ source: "regex",
140
+ line: lineIndex
141
+ })
142
+ }
143
+ }
144
+ }
145
+ }
146
+
147
+ private analyzePythonCode(content: string, lines: string[], filePath: string, chunks: ChunkWithId[], relations: Relation[]) {
148
+ let match
149
+
150
+ this.patterns.pythonFromImport.lastIndex = 0
151
+ while ((match = this.patterns.pythonFromImport.exec(content)) !== null) {
152
+ const importPath = match[1]
153
+ const lineIndex = content.substring(0, match.index).split("\n").length - 1
154
+
155
+ if (importPath.startsWith(".")) {
156
+ const targetPath = this.resolvePath(filePath, importPath)
157
+ if (!targetPath) continue
158
+
159
+ const fromChunkId = this.findChunkForLine(chunks, lineIndex)
160
+ if (!fromChunkId) continue
161
+
162
+ const toChunkId = this.findFirstChunkInFile(targetPath)
163
+ if (toChunkId) {
164
+ relations.push({
165
+ from: fromChunkId,
166
+ to: toChunkId,
167
+ predicate: "imports",
168
+ weight: 0.8,
169
+ source: "regex",
170
+ line: lineIndex
171
+ })
172
+ }
173
+ }
174
+ }
175
+
176
+ this.patterns.pythonImport.lastIndex = 0
177
+ while ((match = this.patterns.pythonImport.exec(content)) !== null) {
178
+ const importPath = match[1]
179
+ const lineIndex = content.substring(0, match.index).split("\n").length - 1
180
+
181
+ if (importPath.startsWith(".")) {
182
+ const targetPath = this.resolvePath(filePath, importPath)
183
+ if (!targetPath) continue
184
+
185
+ const fromChunkId = this.findChunkForLine(chunks, lineIndex)
186
+ if (!fromChunkId) continue
187
+
188
+ const toChunkId = this.findFirstChunkInFile(targetPath)
189
+ if (toChunkId) {
190
+ relations.push({
191
+ from: fromChunkId,
192
+ to: toChunkId,
193
+ predicate: "imports",
194
+ weight: 0.8,
195
+ source: "regex",
196
+ line: lineIndex
197
+ })
198
+ }
199
+ }
200
+ }
201
+ }
202
+
203
+ private resolvePath(filePath: string, target: string): string | null {
204
+ try {
205
+ const dir = path.dirname(filePath)
206
+ const absoluteTarget = path.resolve(dir, target)
207
+
208
+ if (!absoluteTarget.startsWith(process.cwd())) {
209
+ return null
210
+ }
211
+
212
+ return path.relative(process.cwd(), absoluteTarget)
213
+ } catch {
214
+ return null
215
+ }
216
+ }
217
+
218
+ private findChunkForLine(chunks: ChunkWithId[], lineIndex: number): string | null {
219
+ for (const chunk of chunks) {
220
+ if (chunk.start_line !== undefined && chunk.end_line !== undefined) {
221
+ if (lineIndex >= chunk.start_line && lineIndex <= chunk.end_line) {
222
+ return chunk.chunk_id
223
+ }
224
+ }
225
+ }
226
+ return null
227
+ }
228
+
229
+ private findFirstChunkInFile(targetPath: string): string | null {
230
+ const normalized = targetPath.replace(/[^a-zA-Z0-9]/g, "_")
231
+ return `chunk_${normalized}_0`
232
+ }
233
+
234
+ private findChunkContainingSymbol(chunks: ChunkWithId[], symbol: string): string | null {
235
+ for (const chunk of chunks) {
236
+ if (chunk.content.includes(symbol)) {
237
+ return chunk.chunk_id
238
+ }
239
+ }
240
+ return null
241
+ }
242
+
243
+ private findChunkForLinkTarget(targetPath: string, linkTarget: string, chunks: ChunkWithId[]): string | null {
244
+ const hashIndex = linkTarget.indexOf("#")
245
+ if (hashIndex !== -1) {
246
+ const heading = linkTarget.substring(hashIndex + 1).toLowerCase()
247
+ for (const chunk of chunks) {
248
+ if (chunk.heading_context && chunk.heading_context.toLowerCase().includes(heading)) {
249
+ return chunk.chunk_id
250
+ }
251
+ }
252
+ }
253
+ return this.findFirstChunkInFile(targetPath)
254
+ }
255
+ }