@comfanion/usethis_search 3.0.0-dev.17 → 3.0.0-dev.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@comfanion/usethis_search",
3
- "version": "3.0.0-dev.17",
3
+ "version": "3.0.0-dev.18",
4
4
  "description": "OpenCode plugin: semantic search with graph-based context (v3: graph relations, 1-hop context, LSP + regex analyzers)",
5
5
  "type": "module",
6
6
  "main": "./index.ts",
@@ -12,7 +12,7 @@
12
12
 
13
13
  import path from "path"
14
14
  import fs from "fs/promises"
15
- import { ChunkWithId } from "../graph-builder"
15
+ import { ChunkWithId, buildDefaultChunkId } from "../graph-builder"
16
16
  import { LSPClient, LSPSymbolInformation, SymbolKind } from "./lsp-client"
17
17
 
18
18
  export interface Relation {
@@ -252,7 +252,9 @@ export class LSPAnalyzer {
252
252
  return result
253
253
  }
254
254
 
255
- /** Convert LSP location URI + line → chunk_id. */
255
+ /** Convert LSP location URI + line → chunk_id.
256
+ * For same-file refs, resolves to exact chunk by line.
257
+ * For cross-file refs, returns the default (first) chunk of the target file. */
256
258
  private locationToChunkId(currentFile: string, uri: string, line: number, root: string): string | null {
257
259
  // uri = file:///absolute/path/to/file.ts
258
260
  const filePath = uri.startsWith("file://") ? uri.slice(7) : uri
@@ -261,11 +263,9 @@ export class LSPAnalyzer {
261
263
  // Skip external files (node_modules, etc.)
262
264
  if (relPath.startsWith("..") || relPath.includes("node_modules")) return null
263
265
 
264
- const withoutExt = relPath.replace(/\.[^/.]+$/, "")
265
- const normalized = withoutExt.replace(/[^a-zA-Z0-9]/g, "_")
266
- // For cross-file references, point to chunk 0 (first chunk of target file)
267
- // For same-file, we could be more precise but chunk 0 is sufficient for graph
268
- return `chunk_${normalized}_0`
266
+ // Same file → use findChunkForPosition (called separately with chunks)
267
+ // Cross-file default chunk
268
+ return buildDefaultChunkId(relPath)
269
269
  }
270
270
 
271
271
  private findChunkForPosition(chunks: ChunkWithId[], line: number): string | null {
@@ -1,5 +1,6 @@
1
1
  import path from "path"
2
- import { ChunkWithId } from "../graph-builder"
2
+ import fs from "fs"
3
+ import { ChunkWithId, buildDefaultChunkId } from "../graph-builder"
3
4
 
4
5
  export interface Relation {
5
6
  from: string
@@ -10,14 +11,127 @@ export interface Relation {
10
11
  line?: number
11
12
  }
12
13
 
14
+ // ── Module resolution ─────────────────────────────────────────────────────────
15
+
16
+ /** Extensions to try when resolving JS/TS imports (in order). */
17
+ const JS_EXTENSIONS = [".ts", ".tsx", ".js", ".jsx", ".mjs", ".cjs"]
18
+
19
+ /** Extensions to try when resolving Python imports. */
20
+ const PY_EXTENSIONS = [".py"]
21
+
22
+ /**
23
+ * Resolve a relative import to an actual file on disk.
24
+ *
25
+ * Follows Node.js / TypeScript module resolution:
26
+ * 1. Exact path (has extension) → check exists
27
+ * 2. Try each extension: `target.ts`, `target.tsx`, ...
28
+ * 3. Try directory index: `target/index.ts`, `target/index.tsx`, ...
29
+ *
30
+ * For Python:
31
+ * 1. `target.py`
32
+ * 2. `target/__init__.py`
33
+ *
34
+ * Fallback: if nothing exists on disk, infer extension from the source file
35
+ * (so offline / unit-test scenarios still produce useful edges).
36
+ *
37
+ * Returns a project-relative path (e.g. `src/utils.ts`) or null.
38
+ */
39
+ function resolveModulePath(
40
+ projectRoot: string,
41
+ sourceFile: string,
42
+ importSpecifier: string,
43
+ language: "js" | "python" | "markdown",
44
+ ): string | null {
45
+ const dir = path.dirname(path.resolve(projectRoot, sourceFile))
46
+ const base = path.resolve(dir, importSpecifier)
47
+
48
+ // Security: must stay inside project root
49
+ if (!base.startsWith(projectRoot)) return null
50
+
51
+ const hasExtension = !!path.extname(base)
52
+
53
+ // 1. If specifier already has an extension, check it directly
54
+ if (hasExtension) {
55
+ if (fileExists(base)) return path.relative(projectRoot, base)
56
+ // Even if the exact file doesn't exist, return the relative path so we
57
+ // can still build a "best effort" edge (e.g. markdown link to ./api.md
58
+ // in a test without real files).
59
+ return path.relative(projectRoot, base)
60
+ }
61
+
62
+ const exts = language === "python" ? PY_EXTENSIONS : JS_EXTENSIONS
63
+
64
+ // 2. Try appending each extension
65
+ for (const ext of exts) {
66
+ const candidate = base + ext
67
+ if (fileExists(candidate)) return path.relative(projectRoot, candidate)
68
+ }
69
+
70
+ // 3. Try directory index files
71
+ const indexNames = language === "python" ? ["__init__.py"] : exts.map(e => "index" + e)
72
+ for (const idx of indexNames) {
73
+ const candidate = path.join(base, idx)
74
+ if (fileExists(candidate)) return path.relative(projectRoot, candidate)
75
+ }
76
+
77
+ // 4. Fallback: infer extension from source file
78
+ // `app.ts` imports `./utils` → assume `utils.ts`
79
+ const sourceExt = path.extname(sourceFile)
80
+ if (sourceExt && exts.includes(sourceExt)) {
81
+ return path.relative(projectRoot, base + sourceExt)
82
+ }
83
+
84
+ // Last resort for Python: assume .py
85
+ if (language === "python") {
86
+ return path.relative(projectRoot, base + ".py")
87
+ }
88
+
89
+ return null
90
+ }
91
+
92
+ /** Synchronous file-exists check (cheap for module resolution). */
93
+ function fileExists(absPath: string): boolean {
94
+ try {
95
+ return fs.statSync(absPath).isFile()
96
+ } catch {
97
+ return false
98
+ }
99
+ }
100
+
101
+ /**
102
+ * Convert Python relative import specifier to a path.
103
+ * `.utils` → `./utils`
104
+ * `..utils` → `../utils`
105
+ * `...pkg` → `../../pkg`
106
+ */
107
+ function pythonRelativeToPath(spec: string): string {
108
+ const match = spec.match(/^(\.+)(.*)$/)
109
+ if (!match) return spec
110
+ const dots = match[1].length // number of leading dots
111
+ const module = match[2] // remainder, e.g. "utils"
112
+ // 1 dot = current dir "./", 2 dots = "../", 3 = "../../", ...
113
+ const prefix = dots === 1 ? "./" : "../".repeat(dots - 1)
114
+ // Module part: dots→slashes (e.g. "pkg.sub" → "pkg/sub")
115
+ const modulePath = module.replace(/\./g, "/")
116
+ return prefix + modulePath
117
+ }
118
+
119
+ // ── RegexAnalyzer ────────────────────────────────────────────────────────────
120
+
13
121
  export class RegexAnalyzer {
122
+ private projectRoot: string
123
+
14
124
  private readonly patterns = {
15
125
  jsImports: /import\s+(?:\{[^}]+\}|\w+)\s+from\s+['"]([^'"]+)['"]/g,
16
126
  pythonFromImport: /from\s+(\S+)\s+import/g,
17
127
  pythonImport: /import\s+(\S+)/g,
18
128
  extends: /class\s+\w+\s+extends\s+(\w+)/g,
19
129
  implements: /class\s+\w+\s+implements\s+([^{]+)/g,
20
- markdownLink: /\[([^\]]+)\]\(([^)]+)\)/g
130
+ markdownLink: /\[([^\]]+)\]\(([^)]+)\)/g,
131
+ }
132
+
133
+ constructor(projectRoot?: string) {
134
+ this.projectRoot = projectRoot || process.cwd()
21
135
  }
22
136
 
23
137
  analyzeCode(filePath: string, content: string, chunks: ChunkWithId[]): Relation[] {
@@ -25,9 +139,9 @@ export class RegexAnalyzer {
25
139
  const ext = path.extname(filePath)
26
140
  const lines = content.split("\n")
27
141
 
28
- if ([".js", ".ts", ".jsx", ".tsx"].includes(ext)) {
142
+ if ([".js", ".ts", ".jsx", ".tsx", ".mjs", ".cjs"].includes(ext)) {
29
143
  this.analyzeJSCode(content, lines, filePath, chunks, relations)
30
- } else if ([".py"].includes(ext)) {
144
+ } else if (ext === ".py") {
31
145
  this.analyzePythonCode(content, lines, filePath, chunks, relations)
32
146
  }
33
147
 
@@ -37,17 +151,14 @@ export class RegexAnalyzer {
37
151
  analyzeMarkdown(filePath: string, content: string, chunks: ChunkWithId[]): Relation[] {
38
152
  const relations: Relation[] = []
39
153
  const lines = content.split("\n")
40
- const dir = path.dirname(filePath)
41
154
 
42
155
  let match
43
156
  this.patterns.markdownLink.lastIndex = 0
44
157
  while ((match = this.patterns.markdownLink.exec(content)) !== null) {
45
- const linkText = match[1]
46
158
  const linkTarget = match[2]
47
159
  const lineIndex = content.substring(0, match.index).split("\n").length - 1
48
- const line = lines[lineIndex]
49
160
 
50
- const targetPath = this.resolvePath(filePath, linkTarget)
161
+ const targetPath = this.resolveMarkdownLink(filePath, linkTarget)
51
162
  if (!targetPath) continue
52
163
 
53
164
  const fromChunkId = this.findChunkForLine(chunks, lineIndex)
@@ -61,7 +172,7 @@ export class RegexAnalyzer {
61
172
  predicate: "links_to",
62
173
  weight: 1.0,
63
174
  source: "markdown",
64
- line: lineIndex
175
+ line: lineIndex,
65
176
  })
66
177
  }
67
178
  }
@@ -69,33 +180,34 @@ export class RegexAnalyzer {
69
180
  return relations
70
181
  }
71
182
 
72
- private analyzeJSCode(content: string, lines: string[], filePath: string, chunks: ChunkWithId[], relations: Relation[]) {
183
+ // ── JS / TS ───────────────────────────────────────────────────────────────
184
+
185
+ private analyzeJSCode(
186
+ content: string, lines: string[], filePath: string,
187
+ chunks: ChunkWithId[], relations: Relation[],
188
+ ) {
73
189
  let match
74
190
 
75
191
  this.patterns.jsImports.lastIndex = 0
76
192
  while ((match = this.patterns.jsImports.exec(content)) !== null) {
77
193
  const importPath = match[1]
78
194
  const lineIndex = content.substring(0, match.index).split("\n").length - 1
79
- const line = lines[lineIndex]
80
195
 
81
196
  if (importPath.startsWith(".")) {
82
- const targetPath = this.resolvePath(filePath, importPath)
197
+ const targetPath = resolveModulePath(this.projectRoot, filePath, importPath, "js")
83
198
  if (!targetPath) continue
84
199
 
85
200
  const fromChunkId = this.findChunkForLine(chunks, lineIndex)
86
201
  if (!fromChunkId) continue
87
202
 
88
- const toChunkId = this.findFirstChunkInFile(targetPath)
89
- if (toChunkId) {
90
- relations.push({
91
- from: fromChunkId,
92
- to: toChunkId,
93
- predicate: "imports",
94
- weight: 0.8,
95
- source: "regex",
96
- line: lineIndex
97
- })
98
- }
203
+ relations.push({
204
+ from: fromChunkId,
205
+ to: buildDefaultChunkId(targetPath),
206
+ predicate: "imports",
207
+ weight: 0.8,
208
+ source: "regex",
209
+ line: lineIndex,
210
+ })
99
211
  }
100
212
  }
101
213
 
@@ -115,7 +227,7 @@ export class RegexAnalyzer {
115
227
  predicate: "extends",
116
228
  weight: 0.8,
117
229
  source: "regex",
118
- line: lineIndex
230
+ line: lineIndex,
119
231
  })
120
232
  }
121
233
  }
@@ -137,14 +249,19 @@ export class RegexAnalyzer {
137
249
  predicate: "implements",
138
250
  weight: 0.8,
139
251
  source: "regex",
140
- line: lineIndex
252
+ line: lineIndex,
141
253
  })
142
254
  }
143
255
  }
144
256
  }
145
257
  }
146
258
 
147
- private analyzePythonCode(content: string, lines: string[], filePath: string, chunks: ChunkWithId[], relations: Relation[]) {
259
+ // ── Python ────────────────────────────────────────────────────────────────
260
+
261
+ private analyzePythonCode(
262
+ content: string, lines: string[], filePath: string,
263
+ chunks: ChunkWithId[], relations: Relation[],
264
+ ) {
148
265
  let match
149
266
 
150
267
  this.patterns.pythonFromImport.lastIndex = 0
@@ -153,23 +270,21 @@ export class RegexAnalyzer {
153
270
  const lineIndex = content.substring(0, match.index).split("\n").length - 1
154
271
 
155
272
  if (importPath.startsWith(".")) {
156
- const targetPath = this.resolvePath(filePath, importPath)
273
+ const pyPath = pythonRelativeToPath(importPath)
274
+ const targetPath = resolveModulePath(this.projectRoot, filePath, pyPath, "python")
157
275
  if (!targetPath) continue
158
276
 
159
277
  const fromChunkId = this.findChunkForLine(chunks, lineIndex)
160
278
  if (!fromChunkId) continue
161
279
 
162
- const toChunkId = this.findFirstChunkInFile(targetPath)
163
- if (toChunkId) {
164
- relations.push({
165
- from: fromChunkId,
166
- to: toChunkId,
167
- predicate: "imports",
168
- weight: 0.8,
169
- source: "regex",
170
- line: lineIndex
171
- })
172
- }
280
+ relations.push({
281
+ from: fromChunkId,
282
+ to: buildDefaultChunkId(targetPath),
283
+ predicate: "imports",
284
+ weight: 0.8,
285
+ source: "regex",
286
+ line: lineIndex,
287
+ })
173
288
  }
174
289
  }
175
290
 
@@ -179,42 +294,38 @@ export class RegexAnalyzer {
179
294
  const lineIndex = content.substring(0, match.index).split("\n").length - 1
180
295
 
181
296
  if (importPath.startsWith(".")) {
182
- const targetPath = this.resolvePath(filePath, importPath)
297
+ const pyPath = pythonRelativeToPath(importPath)
298
+ const targetPath = resolveModulePath(this.projectRoot, filePath, pyPath, "python")
183
299
  if (!targetPath) continue
184
300
 
185
301
  const fromChunkId = this.findChunkForLine(chunks, lineIndex)
186
302
  if (!fromChunkId) continue
187
303
 
188
- const toChunkId = this.findFirstChunkInFile(targetPath)
189
- if (toChunkId) {
190
- relations.push({
191
- from: fromChunkId,
192
- to: toChunkId,
193
- predicate: "imports",
194
- weight: 0.8,
195
- source: "regex",
196
- line: lineIndex
197
- })
198
- }
304
+ relations.push({
305
+ from: fromChunkId,
306
+ to: buildDefaultChunkId(targetPath),
307
+ predicate: "imports",
308
+ weight: 0.8,
309
+ source: "regex",
310
+ line: lineIndex,
311
+ })
199
312
  }
200
313
  }
201
314
  }
202
315
 
203
- private resolvePath(filePath: string, target: string): string | null {
204
- try {
205
- const dir = path.dirname(filePath)
206
- const absoluteTarget = path.resolve(dir, target)
316
+ // ── Markdown link resolution ──────────────────────────────────────────────
207
317
 
208
- if (!absoluteTarget.startsWith(process.cwd())) {
209
- return null
210
- }
318
+ private resolveMarkdownLink(filePath: string, target: string): string | null {
319
+ // Strip anchor (#section)
320
+ const hashIdx = target.indexOf("#")
321
+ const cleanTarget = hashIdx >= 0 ? target.substring(0, hashIdx) : target
322
+ if (!cleanTarget) return null
211
323
 
212
- return path.relative(process.cwd(), absoluteTarget)
213
- } catch {
214
- return null
215
- }
324
+ return resolveModulePath(this.projectRoot, filePath, cleanTarget, "markdown")
216
325
  }
217
326
 
327
+ // ── Chunk lookup helpers ──────────────────────────────────────────────────
328
+
218
329
  private findChunkForLine(chunks: ChunkWithId[], lineIndex: number): string | null {
219
330
  for (const chunk of chunks) {
220
331
  if (chunk.start_line !== undefined && chunk.end_line !== undefined) {
@@ -226,11 +337,6 @@ export class RegexAnalyzer {
226
337
  return null
227
338
  }
228
339
 
229
- private findFirstChunkInFile(targetPath: string): string | null {
230
- const normalized = targetPath.replace(/[^a-zA-Z0-9]/g, "_")
231
- return `chunk_${normalized}_0`
232
- }
233
-
234
340
  private findChunkContainingSymbol(chunks: ChunkWithId[], symbol: string): string | null {
235
341
  for (const chunk of chunks) {
236
342
  if (chunk.content.includes(symbol)) {
@@ -250,6 +356,6 @@ export class RegexAnalyzer {
250
356
  }
251
357
  }
252
358
  }
253
- return this.findFirstChunkInFile(targetPath)
359
+ return buildDefaultChunkId(targetPath)
254
360
  }
255
361
  }
@@ -9,29 +9,117 @@ export interface ChunkWithId {
9
9
  start_line?: number
10
10
  end_line?: number
11
11
  heading_context?: string
12
+ function_name?: string
13
+ class_name?: string
14
+ }
15
+
16
+ // ── Chunk ID helpers ────────────────────────────────────────────────────────
17
+
18
+ /** Build a symbol-aware chunk ID.
19
+ *
20
+ * Format: `chunk:{relPath}::{symbol}`
21
+ * Examples:
22
+ * chunk:src/user-service.ts::UserService
23
+ * chunk:src/user-service.ts::UserService.findById
24
+ * chunk:src/utils.ts::helper
25
+ * chunk:docs/api.md::authentication
26
+ * chunk:src/index.ts::_chunk_0
27
+ */
28
+ export function buildChunkId(filePath: string, chunk: { class_name?: string; function_name?: string; heading_context?: string }, index: number): string {
29
+ let symbol: string
30
+
31
+ if (chunk.class_name && chunk.function_name) {
32
+ symbol = `${chunk.class_name}.${chunk.function_name}`
33
+ } else if (chunk.class_name) {
34
+ symbol = chunk.class_name
35
+ } else if (chunk.function_name) {
36
+ symbol = chunk.function_name
37
+ } else if (chunk.heading_context) {
38
+ // Markdown: slugify heading
39
+ symbol = chunk.heading_context
40
+ .toLowerCase()
41
+ .replace(/[^a-z0-9]+/g, "-")
42
+ .replace(/^-|-$/g, "")
43
+ if (!symbol) symbol = `_chunk_${index}`
44
+ } else {
45
+ symbol = `_chunk_${index}`
46
+ }
47
+
48
+ return `chunk:${filePath}::${symbol}`
49
+ }
50
+
51
+ /** Build the file-level node ID. */
52
+ export function buildFileNodeId(filePath: string): string {
53
+ return `file:${filePath}`
54
+ }
55
+
56
+ /** Extract the file path from any node ID (chunk: or file:). */
57
+ export function filePathFromNodeId(nodeId: string): string | null {
58
+ if (nodeId.startsWith("chunk:")) {
59
+ const sep = nodeId.indexOf("::")
60
+ return sep === -1 ? null : nodeId.slice(6, sep)
61
+ }
62
+ if (nodeId.startsWith("file:")) {
63
+ return nodeId.slice(5)
64
+ }
65
+ if (nodeId.startsWith("meta:")) {
66
+ return nodeId.slice(5)
67
+ }
68
+ return null
69
+ }
70
+
71
+ /** Build a cross-file chunk ID that points to the default (first) chunk of the target file.
72
+ * Used by regex/LSP analyzers when we don't know the exact target chunk. */
73
+ export function buildDefaultChunkId(filePath: string): string {
74
+ return `chunk:${filePath}::_chunk_0`
75
+ }
76
+
77
+ // ── Structural edge predicates ──────────────────────────────────────────────
78
+
79
+ const STRUCTURAL_PREDICATES = new Set([
80
+ "contains_class",
81
+ "contains_function",
82
+ "contains_interface",
83
+ "contains",
84
+ "has_method",
85
+ ])
86
+
87
+ export function isStructuralPredicate(predicate: string): boolean {
88
+ return STRUCTURAL_PREDICATES.has(predicate)
12
89
  }
13
90
 
14
91
  export class GraphBuilder {
15
92
  private lspAnalyzer: LSPAnalyzer
16
93
  private regexAnalyzer: RegexAnalyzer
94
+ private lspEnabled: boolean
17
95
 
18
96
  constructor(
19
97
  private graphDB: GraphDB,
20
- private projectRoot: string
98
+ private projectRoot: string,
99
+ lspEnabled: boolean = true,
100
+ lspTimeoutMs: number = 5000,
21
101
  ) {
22
- this.lspAnalyzer = new LSPAnalyzer()
23
- this.regexAnalyzer = new RegexAnalyzer()
102
+ this.lspEnabled = lspEnabled
103
+ this.lspAnalyzer = new LSPAnalyzer(projectRoot, lspTimeoutMs)
104
+ this.regexAnalyzer = new RegexAnalyzer(projectRoot)
24
105
  }
25
106
 
26
107
  assignChunkIds(filePath: string, chunks: any[]): ChunkWithId[] {
27
- const withoutExt = filePath.replace(/\.[^/.]+$/, "")
28
- const normalizedPath = withoutExt.replace(/[^a-zA-Z0-9]/g, "_")
108
+ const seen = new Map<string, number>()
29
109
 
30
110
  return chunks.map((chunk, index) => {
31
- const chunkId = `chunk_${normalizedPath}_${index}`
111
+ let chunkId = buildChunkId(filePath, chunk, index)
112
+
113
+ // Handle duplicate symbols (e.g. two chunks for same class split by size)
114
+ const count = seen.get(chunkId) || 0
115
+ if (count > 0) {
116
+ chunkId = `${chunkId}#${count}`
117
+ }
118
+ seen.set(chunkId.replace(/#\d+$/, ""), count + 1)
119
+
32
120
  return {
33
121
  ...chunk,
34
- chunk_id: chunkId
122
+ chunk_id: chunkId,
35
123
  } as ChunkWithId
36
124
  })
37
125
  }
@@ -47,7 +135,7 @@ export class GraphBuilder {
47
135
  if (fileType === "docs") {
48
136
  relations = this.regexAnalyzer.analyzeMarkdown(filePath, content, chunks)
49
137
  } else if (fileType === "code") {
50
- const lspAvailable = await this.lspAnalyzer.isAvailable(filePath)
138
+ const lspAvailable = this.lspEnabled && await this.lspAnalyzer.isAvailable(filePath)
51
139
 
52
140
  if (lspAvailable) {
53
141
  try {
@@ -63,7 +151,7 @@ export class GraphBuilder {
63
151
  }
64
152
  }
65
153
 
66
- const triples: Triple[] = relations.map(rel => ({
154
+ const relationTriples: Triple[] = relations.map(rel => ({
67
155
  subject: rel.from,
68
156
  predicate: rel.predicate,
69
157
  object: rel.to,
@@ -73,14 +161,117 @@ export class GraphBuilder {
73
161
  line: rel.line
74
162
  }))
75
163
 
76
- await this.graphDB.putEdges(triples)
77
- return triples.length
164
+ // ── Structural edges ────────────────────────────────────────────────────
165
+ const fileNode = buildFileNodeId(filePath)
166
+ const structuralTriples: Triple[] = []
167
+
168
+ // Anchor: every chunk belongs_to its file
169
+ for (const c of chunks) {
170
+ structuralTriples.push({
171
+ subject: c.chunk_id,
172
+ predicate: "belongs_to",
173
+ object: filePath,
174
+ weight: 0,
175
+ source: "anchor",
176
+ file: filePath,
177
+ })
178
+ }
179
+
180
+ // File node → symbol chunks
181
+ // Track class chunks for has_method edges
182
+ const classChunkMap = new Map<string, string>() // className → chunk_id
183
+
184
+ for (const c of chunks) {
185
+ if (c.class_name && !c.function_name) {
186
+ // Class/interface chunk (no method = class-level)
187
+ const predicate = c.content.match(/\binterface\s/) ? "contains_interface" : "contains_class"
188
+ structuralTriples.push({
189
+ subject: fileNode,
190
+ predicate,
191
+ object: c.chunk_id,
192
+ weight: 1.0,
193
+ source: "structure",
194
+ file: filePath,
195
+ })
196
+ classChunkMap.set(c.class_name, c.chunk_id)
197
+ } else if (c.function_name && !c.class_name) {
198
+ // Top-level function
199
+ structuralTriples.push({
200
+ subject: fileNode,
201
+ predicate: "contains_function",
202
+ object: c.chunk_id,
203
+ weight: 1.0,
204
+ source: "structure",
205
+ file: filePath,
206
+ })
207
+ } else if (c.function_name && c.class_name) {
208
+ // Method inside a class → has_method edge from class chunk
209
+ const parentChunkId = classChunkMap.get(c.class_name)
210
+ if (parentChunkId) {
211
+ structuralTriples.push({
212
+ subject: parentChunkId,
213
+ predicate: "has_method",
214
+ object: c.chunk_id,
215
+ weight: 1.0,
216
+ source: "structure",
217
+ file: filePath,
218
+ })
219
+ } else {
220
+ // No class chunk found yet (methods appeared before class preamble, or class was not split)
221
+ // Fall back to file → method
222
+ structuralTriples.push({
223
+ subject: fileNode,
224
+ predicate: "contains_function",
225
+ object: c.chunk_id,
226
+ weight: 1.0,
227
+ source: "structure",
228
+ file: filePath,
229
+ })
230
+ }
231
+ } else if (c.heading_context) {
232
+ // Markdown section
233
+ structuralTriples.push({
234
+ subject: fileNode,
235
+ predicate: "contains",
236
+ object: c.chunk_id,
237
+ weight: 0.5,
238
+ source: "structure",
239
+ file: filePath,
240
+ })
241
+ } else {
242
+ // Generic content chunk
243
+ structuralTriples.push({
244
+ subject: fileNode,
245
+ predicate: "contains",
246
+ object: c.chunk_id,
247
+ weight: 0.3,
248
+ source: "structure",
249
+ file: filePath,
250
+ })
251
+ }
252
+ }
253
+
254
+ await this.graphDB.putEdges([...structuralTriples, ...relationTriples])
255
+ return relationTriples.length
78
256
  }
79
257
 
80
- resolveChunkId(filePath: string, line: number): string | null {
81
- const withoutExt = filePath.replace(/\.[^/.]+$/, "")
82
- const normalizedPath = withoutExt.replace(/[^a-zA-Z0-9]/g, "_")
83
- return `chunk_${normalizedPath}_0`
258
+ /** Resolve a file path + line to the best chunk ID.
259
+ * If chunks are provided, finds the one containing the line.
260
+ * Otherwise falls back to the default chunk. */
261
+ resolveChunkId(filePath: string, line: number, chunks?: ChunkWithId[]): string | null {
262
+ if (chunks && chunks.length > 0) {
263
+ for (const c of chunks) {
264
+ if (c.start_line !== undefined && c.end_line !== undefined) {
265
+ if (line >= c.start_line && line <= c.end_line) {
266
+ return c.chunk_id
267
+ }
268
+ }
269
+ }
270
+ // Line not in any chunk range — return first chunk
271
+ return chunks[0].chunk_id
272
+ }
273
+ // No chunks available — return default
274
+ return buildDefaultChunkId(filePath)
84
275
  }
85
276
 
86
277
  async getRelatedChunks(chunkId: string): Promise<Array<{ chunk_id: string; predicate: string; weight: number; direction: "outgoing" | "incoming" }>> {
@@ -1,5 +1,6 @@
1
1
  import levelgraph from "levelgraph"
2
2
  import { Level } from "level"
3
+ import { filePathFromNodeId, isStructuralPredicate } from "./graph-builder"
3
4
 
4
5
  export interface Triple {
5
6
  subject: string
@@ -149,23 +150,27 @@ export class GraphDB {
149
150
  async deleteFileMeta(filePath: string): Promise<void> {
150
151
  if (!this.initialized) throw new Error("GraphDB not initialized. Call init() first.")
151
152
 
152
- const triples = await new Promise<Triple[]>((resolve, reject) => {
153
- this.db.get(
154
- { subject: `meta:${filePath}`, predicate: "graph_built" },
155
- (err: Error | undefined, result: Triple[]) => {
156
- if (err) reject(err)
157
- else resolve(result || [])
158
- },
159
- )
160
- })
153
+ try {
154
+ const triples = await new Promise<Triple[]>((resolve, reject) => {
155
+ this.db.get(
156
+ { subject: `meta:${filePath}`, predicate: "graph_built" },
157
+ (err: Error | undefined, result: Triple[]) => {
158
+ if (err) reject(err)
159
+ else resolve(result || [])
160
+ },
161
+ )
162
+ })
161
163
 
162
- for (const t of triples) {
163
- await new Promise<void>((resolve, reject) => {
164
- this.db.del(t, (err: Error | undefined) => {
165
- if (err) reject(err)
166
- else resolve()
164
+ for (const t of triples) {
165
+ await new Promise<void>((resolve, reject) => {
166
+ this.db.del(t, (err: Error | undefined) => {
167
+ if (err) reject(err)
168
+ else resolve()
169
+ })
167
170
  })
168
- })
171
+ }
172
+ } catch (err) {
173
+ // Silently ignore errors (e.g., no meta triple exists)
169
174
  }
170
175
  }
171
176
 
@@ -191,9 +196,10 @@ export class GraphDB {
191
196
 
192
197
  /**
193
198
  * Get all triples in the graph (for validation/stats).
194
- * Excludes meta triples (predicate === "graph_built").
199
+ * Excludes meta, anchor, and structural triples by default.
200
+ * Pass includeStructural=true to also get structural edges.
195
201
  */
196
- async getAllTriples(): Promise<Triple[]> {
202
+ async getAllTriples(includeStructural: boolean = false): Promise<Triple[]> {
197
203
  if (!this.initialized) throw new Error("GraphDB not initialized. Call init() first.")
198
204
 
199
205
  const allTriples = await new Promise<Triple[]>((resolve, reject) => {
@@ -203,7 +209,11 @@ export class GraphDB {
203
209
  })
204
210
  })
205
211
 
206
- return allTriples.filter(t => t.predicate !== "graph_built")
212
+ return allTriples.filter(t => {
213
+ if (t.predicate === "graph_built" || t.predicate === "belongs_to") return false
214
+ if (!includeStructural && isStructuralPredicate(t.predicate)) return false
215
+ return true
216
+ })
207
217
  }
208
218
 
209
219
  async getRelatedFiles(chunkId: string, maxDepth: number = 1): Promise<{path: string, relation: string, weight: number}[]> {
@@ -213,58 +223,70 @@ export class GraphDB {
213
223
 
214
224
  const relatedFiles: Map<string, {relation: string, weight: number}> = new Map()
215
225
  const visited = new Set<string>()
216
-
226
+ const self = this
227
+
228
+ // Resolve the caller's file directly from the node ID
229
+ const callerFile = filePathFromNodeId(chunkId)
230
+
217
231
  async function traverse(currentId: string, currentDepth: number, currentRelation: string) {
218
- if (currentDepth > maxDepth || visited.has(currentId)) {
232
+ if (currentDepth >= maxDepth || visited.has(currentId)) {
219
233
  return
220
234
  }
221
-
235
+
222
236
  visited.add(currentId)
223
-
237
+
224
238
  try {
225
239
  const outgoing = await new Promise<Triple[]>((resolve, reject) => {
226
- this.db.get({ subject: currentId }, (err: Error | undefined, triples: Triple[]) => {
240
+ self.db.get({ subject: currentId }, (err: Error | undefined, triples: Triple[]) => {
227
241
  if (err) reject(err)
228
242
  else resolve(triples || [])
229
243
  })
230
244
  })
231
-
245
+
232
246
  for (const triple of outgoing) {
233
- const fileId = triple.object
234
-
235
- // Aggregate relations and weights
236
- const existing = relatedFiles.get(fileId)
247
+ // Skip meta, anchor, and structural-only edges
248
+ if (triple.predicate === "graph_built" || triple.predicate === "belongs_to") continue
249
+ if (isStructuralPredicate(triple.predicate)) continue
250
+
251
+ // Resolve file for the target node directly from its ID
252
+ const targetFile = filePathFromNodeId(triple.object)
253
+ if (!targetFile) continue
254
+
255
+ const existing = relatedFiles.get(targetFile)
237
256
  if (existing) {
238
257
  existing.weight = Math.max(existing.weight, triple.weight)
239
258
  } else {
240
- relatedFiles.set(fileId, {
259
+ relatedFiles.set(targetFile, {
241
260
  relation: currentRelation || triple.predicate,
242
261
  weight: triple.weight
243
262
  })
244
263
  }
245
-
246
- // Recurse for imports/extends relations
264
+
247
265
  if (triple.predicate === "imports" || triple.predicate === "extends") {
248
- await traverse(fileId, currentDepth + 1, triple.predicate)
266
+ await traverse(triple.object, currentDepth + 1, triple.predicate)
249
267
  }
250
268
  }
251
-
269
+
252
270
  const incoming = await new Promise<Triple[]>((resolve, reject) => {
253
- this.db.get({ object: currentId }, (err: Error | undefined, triples: Triple[]) => {
271
+ self.db.get({ object: currentId }, (err: Error | undefined, triples: Triple[]) => {
254
272
  if (err) reject(err)
255
273
  else resolve(triples || [])
256
274
  })
257
275
  })
258
-
276
+
259
277
  for (const triple of incoming) {
260
- const fileId = triple.subject
261
-
262
- const existing = relatedFiles.get(fileId)
278
+ if (triple.predicate === "graph_built" || triple.predicate === "belongs_to") continue
279
+ if (isStructuralPredicate(triple.predicate)) continue
280
+
281
+ const sourceFile = filePathFromNodeId(triple.subject)
282
+ if (!sourceFile) continue
283
+
284
+ const existing = relatedFiles.get(sourceFile)
263
285
  if (existing) {
264
286
  existing.weight = Math.max(existing.weight, triple.weight)
265
287
  } else {
266
- relatedFiles.set(fileId, {
267
- relation: `used_by`,
288
+ relatedFiles.set(sourceFile, {
289
+ relation: "used_by",
268
290
  weight: triple.weight
269
291
  })
270
292
  }
@@ -273,17 +295,18 @@ export class GraphDB {
273
295
  console.error(`Error traversing graph for ${currentId}:`, error)
274
296
  }
275
297
  }
276
-
298
+
277
299
  await traverse(chunkId, 0, "")
278
-
279
- const result = Array.from(relatedFiles.entries())
280
- .map(([path, data]) => ({
281
- path,
300
+
301
+ // Remove the caller's own file from results
302
+ if (callerFile) relatedFiles.delete(callerFile)
303
+
304
+ return Array.from(relatedFiles.entries())
305
+ .map(([filePath, data]) => ({
306
+ path: filePath,
282
307
  relation: data.relation,
283
308
  weight: data.weight
284
309
  }))
285
310
  .sort((a, b) => b.weight - a.weight)
286
-
287
- return result
288
311
  }
289
312
  }
@@ -16,7 +16,7 @@ import { mergeResults, DEFAULT_HYBRID_CONFIG } from "./hybrid-search.ts";
16
16
  import { QueryCache, DEFAULT_CACHE_CONFIG } from "./query-cache.ts";
17
17
  import { SearchMetrics } from "./search-metrics.ts";
18
18
  import { GraphDB } from "./graph-db.ts";
19
- import { GraphBuilder } from "./graph-builder.ts";
19
+ import { GraphBuilder, isStructuralPredicate } from "./graph-builder.ts";
20
20
  import { UsageTracker } from "./usage-tracker.ts";
21
21
 
22
22
  // Suppress transformers.js logs unless DEBUG is set
@@ -85,6 +85,19 @@ let HYBRID_CONFIG = { ...DEFAULT_HYBRID_CONFIG };
85
85
  let METRICS_ENABLED = false;
86
86
  let CACHE_ENABLED = true;
87
87
 
88
+ // ── Graph config (v3) ───────────────────────────────────────────────────────
89
+ const DEFAULT_GRAPH_CONFIG = {
90
+ enabled: true,
91
+ max_related: 4,
92
+ min_relevance: 0.5,
93
+ lsp: {
94
+ enabled: true,
95
+ timeout_ms: 5000,
96
+ },
97
+ read_intercept: true,
98
+ };
99
+ let GRAPH_CONFIG = { ...DEFAULT_GRAPH_CONFIG, lsp: { ...DEFAULT_GRAPH_CONFIG.lsp } };
100
+
88
101
  function defaultVectorizerYaml() {
89
102
  return (
90
103
  `vectorizer:\n` +
@@ -121,6 +134,16 @@ function defaultVectorizerYaml() {
121
134
  ` hybrid: true\n` +
122
135
  ` bm25_weight: 0.3\n` +
123
136
  `\n` +
137
+ ` # Graph-based context (v3)\n` +
138
+ ` graph:\n` +
139
+ ` enabled: true\n` +
140
+ ` max_related: 4\n` +
141
+ ` min_relevance: 0.5\n` +
142
+ ` lsp:\n` +
143
+ ` enabled: true\n` +
144
+ ` timeout_ms: 5000\n` +
145
+ ` read_intercept: true\n` +
146
+ `\n` +
124
147
  ` # Quality monitoring\n` +
125
148
  ` quality:\n` +
126
149
  ` enable_metrics: false\n` +
@@ -282,6 +305,26 @@ async function loadConfig(projectRoot) {
282
305
  CACHE_ENABLED = parseBool(qs, "enable_cache", true);
283
306
  }
284
307
 
308
+ // ── Parse graph config (v3) ──────────────────────────────────────────────
309
+ const graphMatch = section.match(/^\s{2}graph:\s*\n([\s\S]*?)(?=^\s{2}[a-zA-Z_\-]+:|\Z)/m);
310
+ if (graphMatch) {
311
+ const gs = graphMatch[1];
312
+ GRAPH_CONFIG.enabled = parseBool(gs, "enabled", DEFAULT_GRAPH_CONFIG.enabled);
313
+ GRAPH_CONFIG.max_related = parseNumber(gs, "max_related", DEFAULT_GRAPH_CONFIG.max_related);
314
+ GRAPH_CONFIG.min_relevance = parseNumber(gs, "min_relevance", DEFAULT_GRAPH_CONFIG.min_relevance);
315
+ GRAPH_CONFIG.read_intercept = parseBool(gs, "read_intercept", DEFAULT_GRAPH_CONFIG.read_intercept);
316
+
317
+ // Nested lsp: section
318
+ const lspMatch = gs.match(/^\s+lsp:\s*\n([\s\S]*?)(?=^\s{4}[a-zA-Z_\-]+:|\Z)/m);
319
+ if (lspMatch) {
320
+ const ls = lspMatch[1];
321
+ GRAPH_CONFIG.lsp.enabled = parseBool(ls, "enabled", DEFAULT_GRAPH_CONFIG.lsp.enabled);
322
+ GRAPH_CONFIG.lsp.timeout_ms = parseNumber(ls, "timeout_ms", DEFAULT_GRAPH_CONFIG.lsp.timeout_ms);
323
+ }
324
+
325
+ if (DEBUG) console.log("[vectorizer] Graph config:", GRAPH_CONFIG);
326
+ }
327
+
285
328
  // Parse global exclude
286
329
  const excludeMatch = section.match(/^\s{2}exclude:\s*\n((?:\s{4}-\s+.+\n?)*)/m);
287
330
  if (excludeMatch) {
@@ -392,11 +435,19 @@ class CodebaseIndexer {
392
435
  this.db = await lancedb.connect(path.join(this.cacheDir, "lancedb"));
393
436
  await this.loadHashes();
394
437
 
395
- const graphType = this.indexName === "docs" ? "doc_graph" : "code_graph";
396
- const graphPath = path.join(this.root, ".opencode", "graph", graphType);
397
- await fs.mkdir(path.dirname(graphPath), { recursive: true });
398
- this.graphDB = await new GraphDB(graphPath).init();
399
- this.graphBuilder = new GraphBuilder(this.graphDB, this.root);
438
+ // Graph DB only if graph is enabled in config
439
+ if (GRAPH_CONFIG.enabled) {
440
+ const graphType = this.indexName === "docs" ? "doc_graph" : "code_graph";
441
+ const graphPath = path.join(this.root, ".opencode", "graph", graphType);
442
+ await fs.mkdir(path.dirname(graphPath), { recursive: true });
443
+ this.graphDB = await new GraphDB(graphPath).init();
444
+ this.graphBuilder = new GraphBuilder(
445
+ this.graphDB,
446
+ this.root,
447
+ GRAPH_CONFIG.lsp.enabled,
448
+ GRAPH_CONFIG.lsp.timeout_ms,
449
+ );
450
+ }
400
451
 
401
452
  // Usage tracker — provenance & usage stats
402
453
  this.usageTracker = new UsageTracker(this.cacheDir);
@@ -557,36 +608,41 @@ class CodebaseIndexer {
557
608
  // Semantic chunking
558
609
  const chunks = chunkContent(cleaned, fileMeta.file_type, fileMeta.language, CHUNKING_CONFIG);
559
610
 
560
- // v3: Assign chunk IDs for graph tracking
561
- const chunksWithIds = this.graphBuilder.assignChunkIds(relPath, chunks);
611
+ // v3: Assign chunk IDs for graph tracking (works without graph — just adds IDs)
612
+ const chunksWithIds = this.graphBuilder
613
+ ? this.graphBuilder.assignChunkIds(relPath, chunks)
614
+ : chunks.map((c, i) => ({ ...c, chunk_id: `chunk:${relPath}::_chunk_${i}` }));
562
615
 
563
616
  // v3: Delete old edges for this file and build new ones
564
- await this.graphDB.deleteByFile(relPath);
565
- const graphEdgesBuilt = await this.graphBuilder.buildEdges(relPath, content, chunksWithIds, fileMeta.file_type);
566
-
567
- // Log graph creation to indexer.log
568
- if (graphEdgesBuilt > 0 || DEBUG) {
569
- const timestamp = new Date().toISOString().slice(11, 19);
570
- const logMsg = `${timestamp} Graph built: ${relPath} (${chunksWithIds.length} chunks)`;
571
- if (DEBUG) console.log(`[vectorizer] ${logMsg}`);
572
-
573
- // Write to indexer.log in .opencode directory
617
+ let graphEdgesBuilt = 0;
618
+ if (this.graphBuilder && this.graphDB) {
619
+ await this.graphDB.deleteByFile(relPath);
620
+ graphEdgesBuilt = await this.graphBuilder.buildEdges(relPath, content, chunksWithIds, fileMeta.file_type);
621
+
622
+ // Log graph creation to indexer.log
623
+ if (graphEdgesBuilt > 0 || DEBUG) {
624
+ const timestamp = new Date().toISOString().slice(11, 19);
625
+ const logMsg = `${timestamp} Graph built: ${relPath} (${chunksWithIds.length} chunks)`;
626
+ if (DEBUG) console.log(`[vectorizer] ${logMsg}`);
627
+
628
+ // Write to indexer.log in .opencode directory
629
+ try {
630
+ const logPath = path.join(this.root, ".opencode", "indexer.log");
631
+ const fsSync = await import("fs");
632
+ fsSync.appendFileSync(logPath, `${logMsg}\n`);
633
+ } catch {
634
+ // non-fatal — logging is advisory
635
+ }
636
+ }
637
+
638
+ // FR-054: Store graph build timestamp + file hash as metadata triple
574
639
  try {
575
- const logPath = path.join(this.root, ".opencode", "indexer.log");
576
- const fsSync = await import("fs");
577
- fsSync.appendFileSync(logPath, `${logMsg}\n`);
640
+ await this.graphDB.setFileMeta(relPath, hash, Date.now());
578
641
  } catch {
579
- // non-fatal — logging is advisory
642
+ // non-fatal — metadata is advisory
580
643
  }
581
644
  }
582
645
 
583
- // FR-054: Store graph build timestamp + file hash as metadata triple
584
- try {
585
- await this.graphDB.setFileMeta(relPath, hash, Date.now());
586
- } catch {
587
- // non-fatal — metadata is advisory
588
- }
589
-
590
646
  const data = [];
591
647
  for (let i = 0; i < chunksWithIds.length; i++) {
592
648
  const embedding = await this.embed(chunksWithIds[i].content);
@@ -606,9 +662,9 @@ class CodebaseIndexer {
606
662
  function_name: chunksWithIds[i].function_name || "",
607
663
  class_name: chunksWithIds[i].class_name || "",
608
664
  tags: (fileMeta.tags || []).join(","),
609
- // Line numbers for "from-to" extraction
610
- start_line: chunksWithIds[i].start_line,
611
- end_line: chunksWithIds[i].end_line,
665
+ // Line numbers for "from-to" extraction (default to -1 when unknown)
666
+ start_line: chunksWithIds[i].start_line ?? -1,
667
+ end_line: chunksWithIds[i].end_line ?? -1,
612
668
  });
613
669
  }
614
670
 
@@ -648,7 +704,7 @@ class CodebaseIndexer {
648
704
  const table = await this.db.openTable(tableName);
649
705
  let allRows;
650
706
  try {
651
- allRows = await table.filter("").limit(100000).execute();
707
+ allRows = await table.filter("true").limit(100000).execute();
652
708
  } catch (e) {
653
709
  if (DEBUG) console.log("[vectorizer] BM25 index build failed (corrupted table?):", e.message);
654
710
  return null;
@@ -712,10 +768,15 @@ class CodebaseIndexer {
712
768
  const bm25Results = bm25.search(query, fetchLimit);
713
769
 
714
770
  // Build score maps
771
+ // LanceDB _distance is L2 (euclidean). For normalized vectors,
772
+ // L2 ∈ [0, 2]. Convert to similarity ∈ [0, 1]:
773
+ // similarity = 1 - (distance / 2)
774
+ const distanceToScore = (d: number | null | undefined) =>
775
+ d != null ? Math.max(0, 1 - d / 2) : 0.5;
776
+
715
777
  const vectorScores = new Map();
716
778
  for (let i = 0; i < results.length; i++) {
717
- const score = results[i]._distance != null ? 1 - results[i]._distance : 0.5;
718
- vectorScores.set(i, score);
779
+ vectorScores.set(i, distanceToScore(results[i]._distance));
719
780
  }
720
781
 
721
782
  const bm25Scores = new Map();
@@ -730,7 +791,7 @@ class CodebaseIndexer {
730
791
 
731
792
  for (let i = 0; i < results.length; i++) {
732
793
  const key = `${results[i].file}:${results[i].chunk_index}`;
733
- const vs = results[i]._distance != null ? 1 - results[i]._distance : 0.5;
794
+ const vs = distanceToScore(results[i]._distance);
734
795
  resultMap.set(key, { row: results[i], vectorScore: vs, bm25Score: 0 });
735
796
  }
736
797
 
@@ -831,7 +892,10 @@ class CodebaseIndexer {
831
892
 
832
893
  const outgoing = await this.graphDB.getOutgoing(result.chunk_id);
833
894
  const incoming = await this.graphDB.getIncoming(result.chunk_id);
834
- const allEdges = [...outgoing, ...incoming];
895
+ // Filter out structural and meta edges — only relation edges are useful for context
896
+ const allEdges = [...outgoing, ...incoming].filter(
897
+ e => e.predicate !== "belongs_to" && e.predicate !== "graph_built" && !isStructuralPredicate(e.predicate)
898
+ );
835
899
 
836
900
  const neighbors = [];
837
901
  for (const edge of allEdges) {
@@ -852,8 +916,13 @@ class CodebaseIndexer {
852
916
  });
853
917
  }
854
918
 
919
+ // Apply min_relevance filter, then cap at max_related
855
920
  neighbors.sort((a, b) => b.score - a.score);
856
- result.relatedContext = neighbors.slice(0, 3);
921
+ const minRelevance = GRAPH_CONFIG.min_relevance ?? 0.5;
922
+ const maxRelated = GRAPH_CONFIG.max_related ?? 4;
923
+ result.relatedContext = neighbors
924
+ .filter(n => n.score >= minRelevance)
925
+ .slice(0, maxRelated);
857
926
 
858
927
  // FR-060: Record provenance for each attached chunk
859
928
  if (this.usageTracker) {
@@ -894,7 +963,7 @@ class CodebaseIndexer {
894
963
  const table = await this.db.openTable(tableName);
895
964
  let rows;
896
965
  try {
897
- rows = await table.filter("").limit(100000).execute();
966
+ rows = await table.filter("true").limit(100000).execute();
898
967
  } catch (e) {
899
968
  if (DEBUG) console.log("[vectorizer] Chunk cache build failed (corrupted table?):", e.message);
900
969
  return null;
@@ -1032,7 +1101,7 @@ class CodebaseIndexer {
1032
1101
  const tables = await this.db.tableNames();
1033
1102
  if (tables.includes(tableName)) {
1034
1103
  const table = await this.db.openTable(tableName);
1035
- const allRows = await table.filter("").limit(100000).execute();
1104
+ const allRows = await table.filter("true").limit(100000).execute();
1036
1105
  const chunkData = allRows
1037
1106
  .filter(r => r.chunk_id && r.vector)
1038
1107
  .map(r => ({ chunk_id: r.chunk_id, vector: Array.from(r.vector), file: r.file }));
package/vectorizer.yaml CHANGED
@@ -61,6 +61,22 @@ vectorizer:
61
61
  # Indexes to maintain - each has pattern (what to include) and ignore (what to skip)
62
62
  indexes:
63
63
 
64
+ # Source code index - all common programming languages
65
+ code:
66
+ enabled: true
67
+ pattern: "**/*.{js,ts,jsx,tsx,mjs,cjs,py,go,rs,java,kt,swift,c,cpp,h,hpp,cs,rb,php,scala,clj}"
68
+ ignore:
69
+ - "**/node_modules/**"
70
+ - "**/.git/**"
71
+ - "**/dist/**"
72
+ - "**/build/**"
73
+ - "**/.opencode/**"
74
+ - "**/docs/**"
75
+ - "**/vendor/**"
76
+ - "**/__pycache__/**"
77
+ hybrid: true
78
+ bm25_weight: 0.3
79
+
64
80
  # Documentation index - markdown, text files
65
81
  docs:
66
82
  enabled: true