@dreb/coding-agent 1.18.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/dist/core/tools/search.d.ts.map +1 -1
  2. package/dist/core/tools/search.js +14 -36
  3. package/dist/core/tools/search.js.map +1 -1
  4. package/package.json +2 -1
  5. package/dist/core/search/chunker.d.ts +0 -21
  6. package/dist/core/search/chunker.d.ts.map +0 -1
  7. package/dist/core/search/chunker.js +0 -51
  8. package/dist/core/search/chunker.js.map +0 -1
  9. package/dist/core/search/db.d.ts +0 -89
  10. package/dist/core/search/db.d.ts.map +0 -1
  11. package/dist/core/search/db.js +0 -406
  12. package/dist/core/search/db.js.map +0 -1
  13. package/dist/core/search/embedder.d.ts +0 -52
  14. package/dist/core/search/embedder.d.ts.map +0 -1
  15. package/dist/core/search/embedder.js +0 -158
  16. package/dist/core/search/embedder.js.map +0 -1
  17. package/dist/core/search/index-manager.d.ts +0 -55
  18. package/dist/core/search/index-manager.d.ts.map +0 -1
  19. package/dist/core/search/index-manager.js +0 -311
  20. package/dist/core/search/index-manager.js.map +0 -1
  21. package/dist/core/search/metrics/bm25.d.ts +0 -10
  22. package/dist/core/search/metrics/bm25.d.ts.map +0 -1
  23. package/dist/core/search/metrics/bm25.js +0 -32
  24. package/dist/core/search/metrics/bm25.js.map +0 -1
  25. package/dist/core/search/metrics/git-recency.d.ts +0 -14
  26. package/dist/core/search/metrics/git-recency.d.ts.map +0 -1
  27. package/dist/core/search/metrics/git-recency.js +0 -123
  28. package/dist/core/search/metrics/git-recency.js.map +0 -1
  29. package/dist/core/search/metrics/import-graph.d.ts +0 -15
  30. package/dist/core/search/metrics/import-graph.d.ts.map +0 -1
  31. package/dist/core/search/metrics/import-graph.js +0 -115
  32. package/dist/core/search/metrics/import-graph.js.map +0 -1
  33. package/dist/core/search/metrics/path-match.d.ts +0 -13
  34. package/dist/core/search/metrics/path-match.d.ts.map +0 -1
  35. package/dist/core/search/metrics/path-match.js +0 -54
  36. package/dist/core/search/metrics/path-match.js.map +0 -1
  37. package/dist/core/search/metrics/symbol-match.d.ts +0 -12
  38. package/dist/core/search/metrics/symbol-match.d.ts.map +0 -1
  39. package/dist/core/search/metrics/symbol-match.js +0 -62
  40. package/dist/core/search/metrics/symbol-match.js.map +0 -1
  41. package/dist/core/search/metrics/tokenize.d.ts +0 -12
  42. package/dist/core/search/metrics/tokenize.d.ts.map +0 -1
  43. package/dist/core/search/metrics/tokenize.js +0 -29
  44. package/dist/core/search/metrics/tokenize.js.map +0 -1
  45. package/dist/core/search/poem.d.ts +0 -38
  46. package/dist/core/search/poem.d.ts.map +0 -1
  47. package/dist/core/search/poem.js +0 -214
  48. package/dist/core/search/poem.js.map +0 -1
  49. package/dist/core/search/query-classifier.d.ts +0 -17
  50. package/dist/core/search/query-classifier.d.ts.map +0 -1
  51. package/dist/core/search/query-classifier.js +0 -54
  52. package/dist/core/search/query-classifier.js.map +0 -1
  53. package/dist/core/search/scanner.d.ts +0 -30
  54. package/dist/core/search/scanner.d.ts.map +0 -1
  55. package/dist/core/search/scanner.js +0 -344
  56. package/dist/core/search/scanner.js.map +0 -1
  57. package/dist/core/search/search.d.ts +0 -51
  58. package/dist/core/search/search.d.ts.map +0 -1
  59. package/dist/core/search/search.js +0 -381
  60. package/dist/core/search/search.js.map +0 -1
  61. package/dist/core/search/text-chunker.d.ts +0 -15
  62. package/dist/core/search/text-chunker.d.ts.map +0 -1
  63. package/dist/core/search/text-chunker.js +0 -580
  64. package/dist/core/search/text-chunker.js.map +0 -1
  65. package/dist/core/search/tree-sitter-chunker.d.ts +0 -25
  66. package/dist/core/search/tree-sitter-chunker.d.ts.map +0 -1
  67. package/dist/core/search/tree-sitter-chunker.js +0 -357
  68. package/dist/core/search/tree-sitter-chunker.js.map +0 -1
  69. package/dist/core/search/types.d.ts +0 -96
  70. package/dist/core/search/types.d.ts.map +0 -1
  71. package/dist/core/search/types.js +0 -6
  72. package/dist/core/search/types.js.map +0 -1
  73. package/dist/core/search/vector-store.d.ts +0 -43
  74. package/dist/core/search/vector-store.d.ts.map +0 -1
  75. package/dist/core/search/vector-store.js +0 -73
  76. package/dist/core/search/vector-store.js.map +0 -1
@@ -1,344 +0,0 @@
1
- /**
2
- * File scanner for the semantic search subsystem.
3
- *
4
- * Discovers project files for indexing by walking the directory tree,
5
- * respecting .gitignore rules, and classifying files by type.
6
- */
7
- import { existsSync, readdirSync, readFileSync, statSync } from "node:fs";
8
- import { homedir } from "node:os";
9
- import { extname, isAbsolute, join, relative, sep } from "node:path";
10
- import ignore from "ignore";
11
- import { getDrebToolVisibleDirs } from "../tools/dreb-paths.js";
12
- // ============================================================================
13
- // Constants
14
- // ============================================================================
15
- /** Maximum file size to index (1 MB). */
16
- const MAX_FILE_SIZE = 1024 * 1024;
17
- /** Directories unconditionally skipped during traversal. */
18
- const SKIP_DIRS = new Set([
19
- "node_modules",
20
- ".git",
21
- ".dreb/index",
22
- ".hg",
23
- ".svn",
24
- "__pycache__",
25
- ".tox",
26
- ".venv",
27
- "dist",
28
- "build",
29
- ".next",
30
- ".nuxt",
31
- "coverage",
32
- ".cache",
33
- ]);
34
- /** Extension → FileType mapping. */
35
- const EXTENSION_MAP = new Map([
36
- // Tree-sitter languages
37
- [".ts", "typescript"],
38
- [".tsx", "tsx"],
39
- [".js", "javascript"],
40
- [".mjs", "javascript"],
41
- [".cjs", "javascript"],
42
- [".py", "python"],
43
- [".go", "go"],
44
- [".rs", "rust"],
45
- [".java", "java"],
46
- [".c", "c"],
47
- [".h", "c"],
48
- [".cpp", "cpp"],
49
- [".hpp", "cpp"],
50
- [".cc", "cpp"],
51
- [".cxx", "cpp"],
52
- [".hh", "cpp"],
53
- [".hxx", "cpp"],
54
- // Text file types
55
- [".md", "markdown"],
56
- [".mdx", "markdown"],
57
- [".yml", "yaml"],
58
- [".yaml", "yaml"],
59
- [".json", "json"],
60
- [".toml", "toml"],
61
- [".txt", "plaintext"],
62
- [".cfg", "plaintext"],
63
- [".ini", "plaintext"],
64
- [".env", "plaintext"],
65
- [".conf", "plaintext"],
66
- ]);
67
- // ============================================================================
68
- // Public API
69
- // ============================================================================
70
- /**
71
- * Detect the {@link FileType} for a file path based on its extension.
72
- * Returns `null` for unrecognized extensions or files without an extension.
73
- */
74
- export function detectFileType(filePath) {
75
- const ext = extname(filePath).toLowerCase();
76
- if (!ext)
77
- return null;
78
- return EXTENSION_MAP.get(ext) ?? null;
79
- }
80
- /**
81
- * Scan a project directory and return all indexable files.
82
- *
83
- * Walks the tree rooted at {@link projectRoot}, respects `.gitignore` rules,
84
- * skips binary / oversized files, and optionally includes memory files from
85
- * a global memory directory.
86
- */
87
- export async function scanProject(projectRoot, globalMemoryDir) {
88
- const results = [];
89
- // Detect if projectRoot is the home directory — use shallow scan mode
90
- // to avoid recursing into the entire home dir (which would be catastrophic).
91
- const isHomeDir = isHomeDirPath(projectRoot);
92
- if (isHomeDir) {
93
- // Shallow mode: only scan top-level files and ~/.dreb/memory/
94
- scanShallow(projectRoot, results);
95
- }
96
- else {
97
- // Normal mode: full recursive walk with .gitignore
98
- const ig = ignore();
99
- loadGitignore(ig, projectRoot, projectRoot);
100
- walkDirectory(projectRoot, projectRoot, ig, results);
101
- }
102
- // Include tool-visible .dreb/ subdirs (bypasses gitignore).
103
- // In home dir mode, global memory is already handled separately below,
104
- // and we don't want to double-scan ~/.dreb/memory/.
105
- if (!isHomeDir) {
106
- for (const dir of getDrebToolVisibleDirs(projectRoot)) {
107
- scanMemoryDir(dir, projectRoot, results);
108
- }
109
- }
110
- // Include global memory files if the directory exists
111
- if (globalMemoryDir && existsSync(globalMemoryDir)) {
112
- scanMemoryDir(globalMemoryDir, projectRoot, results);
113
- }
114
- return results;
115
- }
116
- /** Check if a path is the user's home directory. */
117
- function isHomeDirPath(dir) {
118
- try {
119
- const home = homedir();
120
- // Normalize trailing slashes for comparison
121
- const normalizedDir = dir.replace(/[/\\]+$/, "");
122
- const normalizedHome = home.replace(/[/\\]+$/, "");
123
- return normalizedDir === normalizedHome;
124
- }
125
- catch {
126
- return false;
127
- }
128
- }
129
- /**
130
- * Shallow scan mode for home directory: only index top-level files
131
- * (no directory recursion) to avoid scanning the entire home directory.
132
- * Memory files are handled separately via scanMemoryDir.
133
- */
134
- function scanShallow(dir, results) {
135
- let entries;
136
- try {
137
- entries = readdirSync(dir);
138
- }
139
- catch {
140
- return;
141
- }
142
- for (const entry of entries) {
143
- // Skip dotfiles/dotdirs in home dir (except specific ones we want)
144
- if (entry.startsWith("."))
145
- continue;
146
- const fullPath = join(dir, entry);
147
- let stats;
148
- try {
149
- stats = statSync(fullPath);
150
- }
151
- catch {
152
- continue;
153
- }
154
- // Only index files, not directories (shallow mode)
155
- if (!stats.isFile())
156
- continue;
157
- if (stats.size > MAX_FILE_SIZE)
158
- continue;
159
- if (stats.size === 0)
160
- continue;
161
- const fileType = detectFileType(entry);
162
- if (!fileType)
163
- continue;
164
- results.push({
165
- filePath: entry,
166
- fileType,
167
- mtime: stats.mtimeMs,
168
- });
169
- }
170
- }
171
- /** Convert an OS path to posix separators for ignore matching. */
172
- function toPosix(p) {
173
- return p.split(sep).join("/");
174
- }
175
- /** Load .gitignore rules from a directory into the ignore matcher. */
176
- function loadGitignore(ig, dir, root) {
177
- const gitignorePath = join(dir, ".gitignore");
178
- if (!existsSync(gitignorePath))
179
- return;
180
- try {
181
- const content = readFileSync(gitignorePath, "utf-8");
182
- const relDir = relative(root, dir);
183
- const prefix = relDir ? `${toPosix(relDir)}/` : "";
184
- const patterns = content
185
- .split(/\r?\n/)
186
- .map((line) => prefixPattern(line, prefix))
187
- .filter((line) => line !== null);
188
- if (patterns.length > 0) {
189
- ig.add(patterns);
190
- }
191
- }
192
- catch {
193
- // Unreadable .gitignore — skip silently
194
- }
195
- }
196
- /**
197
- * Prefix a .gitignore pattern with a directory path so it applies
198
- * correctly when matching against root-relative paths.
199
- */
200
- function prefixPattern(line, prefix) {
201
- const trimmed = line.trim();
202
- if (!trimmed)
203
- return null;
204
- if (trimmed.startsWith("#") && !trimmed.startsWith("\\#"))
205
- return null;
206
- let pattern = line;
207
- let negated = false;
208
- if (pattern.startsWith("!")) {
209
- negated = true;
210
- pattern = pattern.slice(1);
211
- }
212
- else if (pattern.startsWith("\\!")) {
213
- pattern = pattern.slice(1);
214
- }
215
- const prefixed = prefix ? `${prefix}${pattern}` : pattern;
216
- return negated ? `!${prefixed}` : prefixed;
217
- }
218
- /**
219
- * Check if a directory component (relative to root) should be unconditionally skipped.
220
- * Handles both top-level names ("node_modules") and nested paths (".dreb/index").
221
- */
222
- function shouldSkipDir(relPath) {
223
- const posix = toPosix(relPath);
224
- // Check the directory name itself
225
- const parts = posix.split("/");
226
- const name = parts[parts.length - 1];
227
- if (SKIP_DIRS.has(name))
228
- return true;
229
- // Check multi-segment skip patterns (e.g. ".dreb/index")
230
- for (const skip of SKIP_DIRS) {
231
- if (skip.includes("/") && (posix === skip || posix.endsWith(`/${skip}`))) {
232
- return true;
233
- }
234
- }
235
- return false;
236
- }
237
- /** Recursively walk a directory, collecting indexable files. */
238
- function walkDirectory(dir, root, ig, results) {
239
- let entries;
240
- try {
241
- entries = readdirSync(dir);
242
- }
243
- catch {
244
- return; // Permission denied, etc.
245
- }
246
- for (const entry of entries) {
247
- const fullPath = join(dir, entry);
248
- const relPath = relative(root, fullPath);
249
- const posixRel = toPosix(relPath);
250
- let stats;
251
- try {
252
- stats = statSync(fullPath);
253
- }
254
- catch {
255
- continue; // Broken symlink, etc.
256
- }
257
- if (stats.isDirectory()) {
258
- // Hard-coded skip list
259
- if (shouldSkipDir(relPath))
260
- continue;
261
- // .gitignore check (directories need trailing slash)
262
- if (ig.ignores(`${posixRel}/`))
263
- continue;
264
- // Load nested .gitignore before descending
265
- loadGitignore(ig, fullPath, root);
266
- walkDirectory(fullPath, root, ig, results);
267
- continue;
268
- }
269
- if (!stats.isFile())
270
- continue;
271
- // .gitignore check for files
272
- if (ig.ignores(posixRel))
273
- continue;
274
- // Size gate
275
- if (stats.size > MAX_FILE_SIZE)
276
- continue;
277
- if (stats.size === 0)
278
- continue;
279
- // File type detection
280
- const fileType = detectFileType(entry);
281
- if (!fileType)
282
- continue;
283
- results.push({
284
- filePath: posixRel,
285
- fileType,
286
- mtime: stats.mtimeMs,
287
- });
288
- }
289
- }
290
- /**
291
- * Scan a memory directory (project or global) for indexable files.
292
- *
293
- * Memory directories are always fully included — no .gitignore filtering —
294
- * because they live outside the normal project tree or in `.dreb/` which
295
- * is typically gitignored.
296
- *
297
- * Paths for global memory files are stored with a `~memory/` prefix
298
- * to distinguish them from project files.
299
- */
300
- function scanMemoryDir(memoryDir, projectRoot, results, baseMemoryDir) {
301
- let entries;
302
- try {
303
- entries = readdirSync(memoryDir);
304
- }
305
- catch {
306
- return;
307
- }
308
- for (const entry of entries) {
309
- const fullPath = join(memoryDir, entry);
310
- let stats;
311
- try {
312
- stats = statSync(fullPath);
313
- }
314
- catch {
315
- continue;
316
- }
317
- if (stats.isDirectory()) {
318
- // Recurse into subdirectories
319
- scanMemoryDir(fullPath, projectRoot, results, baseMemoryDir ?? memoryDir);
320
- continue;
321
- }
322
- if (!stats.isFile())
323
- continue;
324
- if (stats.size > MAX_FILE_SIZE)
325
- continue;
326
- if (stats.size === 0)
327
- continue;
328
- const fileType = detectFileType(entry);
329
- if (!fileType)
330
- continue;
331
- // If the memory dir is inside the project root, use normal relative path.
332
- // Otherwise, use a ~memory/ prefix so paths remain unique and identifiable.
333
- const rel = relative(projectRoot, fullPath);
334
- const isOutsideProject = rel.startsWith("..") || isAbsolute(rel);
335
- const rootMemoryDir = baseMemoryDir ?? memoryDir;
336
- const filePath = isOutsideProject ? `~memory/${relative(rootMemoryDir, fullPath)}` : rel;
337
- results.push({
338
- filePath: toPosix(filePath),
339
- fileType,
340
- mtime: stats.mtimeMs,
341
- });
342
- }
343
- }
344
- //# sourceMappingURL=scanner.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"scanner.js","sourceRoot":"","sources":["../../../src/core/search/scanner.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,UAAU,EAAE,WAAW,EAAE,YAAY,EAAc,QAAQ,EAAE,MAAM,SAAS,CAAC;AACtF,OAAO,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAClC,OAAO,EAAE,OAAO,EAAE,UAAU,EAAE,IAAI,EAAE,QAAQ,EAAE,GAAG,EAAE,MAAM,WAAW,CAAC;AACrE,OAAO,MAAM,MAAM,QAAQ,CAAC;AAC5B,OAAO,EAAE,sBAAsB,EAAE,MAAM,wBAAwB,CAAC;AAiBhE,+EAA+E;AAC/E,YAAY;AACZ,+EAA+E;AAE/E,yCAAyC;AACzC,MAAM,aAAa,GAAG,IAAI,GAAG,IAAI,CAAC;AAElC,4DAA4D;AAC5D,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC;IACzB,cAAc;IACd,MAAM;IACN,aAAa;IACb,KAAK;IACL,MAAM;IACN,aAAa;IACb,MAAM;IACN,OAAO;IACP,MAAM;IACN,OAAO;IACP,OAAO;IACP,OAAO;IACP,UAAU;IACV,QAAQ;CACR,CAAC,CAAC;AAEH,sCAAoC;AACpC,MAAM,aAAa,GAAkC,IAAI,GAAG,CAAmB;IAC9E,wBAAwB;IACxB,CAAC,KAAK,EAAE,YAAY,CAAC;IACrB,CAAC,MAAM,EAAE,KAAK,CAAC;IACf,CAAC,KAAK,EAAE,YAAY,CAAC;IACrB,CAAC,MAAM,EAAE,YAAY,CAAC;IACtB,CAAC,MAAM,EAAE,YAAY,CAAC;IACtB,CAAC,KAAK,EAAE,QAAQ,CAAC;IACjB,CAAC,KAAK,EAAE,IAAI,CAAC;IACb,CAAC,KAAK,EAAE,MAAM,CAAC;IACf,CAAC,OAAO,EAAE,MAAM,CAAC;IACjB,CAAC,IAAI,EAAE,GAAG,CAAC;IACX,CAAC,IAAI,EAAE,GAAG,CAAC;IACX,CAAC,MAAM,EAAE,KAAK,CAAC;IACf,CAAC,MAAM,EAAE,KAAK,CAAC;IACf,CAAC,KAAK,EAAE,KAAK,CAAC;IACd,CAAC,MAAM,EAAE,KAAK,CAAC;IACf,CAAC,KAAK,EAAE,KAAK,CAAC;IACd,CAAC,MAAM,EAAE,KAAK,CAAC;IACf,kBAAkB;IAClB,CAAC,KAAK,EAAE,UAAU,CAAC;IACnB,CAAC,MAAM,EAAE,UAAU,CAAC;IACpB,CAAC,MAAM,EAAE,MAAM,CAAC;IAChB,CAAC,OAAO,EAAE,MAAM,CAAC;IACjB,CAAC,OAAO,EAAE,MAAM,CAAC;IACjB,CAAC,OAAO,EAAE,MAAM,CAAC;IACjB,CAAC,MAAM,EAAE,WAAW,CAAC;IACrB,CAAC,MAAM,EAAE,WAAW,CAAC;IACrB,CAAC,MAAM,EAAE,WAAW,CAAC;IACrB,CAAC,MAAM,EAAE,WAAW,CAAC;IACrB,CAAC,OAAO,EAAE,WAAW,CAAC;CACtB,CAAC,CAAC;AAEH,+EAA+E;AAC/E,aAAa;AACb,+EAA+E;AAE/E;;;GAGG;AACH,MAAM,UAAU,cAAc,CAAC,QAAgB,EAAmB;IACjE,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;IAC5C,IAAI,CAAC,GAAG;QAAE,OAAO,IAAI,CAAC;IACtB,OAAO,aAAa,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC;AAAA,CACtC;AAED;;;;;;GAMG;AACH,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,WAAmB,EAAE,eAAwB,EAA0B;IACxG,MAAM,OAAO,GAAkB,EAAE,CAAC;IAElC,wEAAsE;IACtE,6EAA6E;IAC7E,MAAM,SAAS,GAAG,aAAa,CAAC,WAAW,CAAC,CAAC;IAE7C,IAAI,SAAS,EAAE,CAAC;QACf,8DAA8D;QAC9D,WAAW,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC;IACnC,CAAC;SAAM,CAAC;QACP,mDAAmD;QACnD,MAAM,EAAE,GAAG,MAAM,EAAE,CAAC;QACpB,aAAa,CAAC,EAAE,EAAE,WAAW,EAAE,WAAW,CAAC,CAAC;QAC5C,aAAa,CAAC,WAAW,EAAE,WAAW,EAAE,EAAE,EAAE,OAAO,CAAC,CAAC;IACtD,CAAC;IAED,4DAA4D;IAC5D,uEAAuE;IACvE,oDAAoD;IACpD,IAAI,CAAC,SAAS,EAAE,CAAC;QAChB,KAAK,MAAM,GAAG,IAAI,sBAAsB,CAAC,WAAW,CAAC,EAAE,CAAC;YACvD,aAAa,CAAC,GAAG,EAAE,WAAW,EAAE,OAAO,CAAC,CAAC;QAC1C,CAAC;IACF,CAAC;IAED,sDAAsD;IACtD,IAAI,eAAe,IAAI,UAAU,CAAC,eAAe,CAAC,EAAE,CAAC;QACpD,aAAa,CAAC,eAAe,EAAE,WAAW,EAAE,OAAO,CAAC,CAAC;IACtD,CAAC;IAED,OAAO,OAAO,CAAC;AAAA,CACf;AAED,oDAAoD;AACpD,SAAS,aAAa,CAAC,GAAW,EAAW;IAC5C,IAAI,CAAC;QACJ,MAAM,IAAI,GAAG,OAAO,EAAE,CAAC;QACvB,4CAA4C;QAC5C,MAAM,aAAa,GAAG,GAAG,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;QACjD,MAAM,cAAc,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;QACnD,OAAO,aAAa,KAAK,cAAc,CAAC;IACzC,CAAC;IAAC,MAAM,CAAC;QACR,OAAO,KAAK,CAAC;IACd,CAAC;AAAA,CACD;AAED;;;;GAIG;AACH,SAAS,WAAW,CAAC,GAAW,EAAE,OAAsB,EAAQ;IAC/D,IAAI,OAAiB,CAAC;IACtB,IAAI,CAAC;QACJ,OAAO,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC;IAC5B,CAAC;IAAC,MAAM,CAAC;QACR,OAAO;IACR,CAAC;IAED,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC7B,mEAAmE;QACnE,IAAI,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC;YAAE,SAAS;QAEpC,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;QAElC,IAAI,KAAY,CAAC;QACjB,IAAI,CAAC;YACJ,KAAK,GAAG,QAAQ,CAAC,QAAQ,CAAC,CAAC;QAC5B,CAAC;QAAC,MAAM,CAAC;YACR,SAAS;QACV,CAAC;QAED,mDAAmD;QACnD,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE;YAAE,SAAS;QAC9B,IAAI,KAAK,CAAC,IAAI,GAAG,aAAa;YAAE,SAAS;QACzC,IAAI,KAAK,CAAC,IAAI,KAAK,CAAC;YAAE,SAAS;QAE/B,MAAM,QAAQ,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;QACvC,IAAI,CAAC,QAAQ;YAAE,SAAS;QAExB,OAAO,CAAC,IAAI,CAAC;YACZ,QAAQ,EAAE,KAAK;YACf,QAAQ;YACR,KAAK,EAAE,KAAK,CAAC,OAAO;SACpB,CAAC,CAAC;IACJ,CAAC;AAAA,CACD;AAQD,kEAAkE;AAClE,SAAS,OAAO,CAAC,CAAS,EAAU;IACnC,OAAO,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAAA,CAC9B;AAED,sEAAsE;AACtE,SAAS,aAAa,CAAC,EAAiB,EAAE,GAAW,EAAE,IAAY,EAAQ;IAC1E,MAAM,aAAa,GAAG,IAAI,CAAC,GAAG,EAAE,YAAY,CAAC,CAAC;IAC9C,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC;QAAE,OAAO;IAEvC,IAAI,CAAC;QACJ,MAAM,OAAO,GAAG,YAAY,CAAC,aAAa,EAAE,OAAO,CAAC,CAAC;QACrD,MAAM,MAAM,GAAG,QAAQ,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;QACnC,MAAM,MAAM,GAAG,MAAM,CAAC,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;QAEnD,MAAM,QAAQ,GAAG,OAAO;aACtB,KAAK,CAAC,OAAO,CAAC;aACd,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,aAAa,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;aAC1C,MAAM,CAAC,CAAC,IAAI,EAAkB,EAAE,CAAC,IAAI,KAAK,IAAI,CAAC,CAAC;QAElD,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACzB,EAAE,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAClB,CAAC;IACF,CAAC;IAAC,MAAM,CAAC;QACR,0CAAwC;IACzC,CAAC;AAAA,CACD;AAED;;;GAGG;AACH,SAAS,aAAa,CAAC,IAAY,EAAE,MAAc,EAAiB;IACnE,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAC5B,IAAI,CAAC,OAAO;QAAE,OAAO,IAAI,CAAC;IAC1B,IAAI,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAEvE,IAAI,OAAO,GAAG,IAAI,CAAC;IACnB,IAAI,OAAO,GAAG,KAAK,CAAC;IAEpB,IAAI,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;QAC7B,OAAO,GAAG,IAAI,CAAC;QACf,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAC5B,CAAC;SAAM,IAAI,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC,EAAE,CAAC;QACtC,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAC5B,CAAC;IAED,MAAM,QAAQ,GAAG,MAAM,CAAC,CAAC,CAAC,GAAG,MAAM,GAAG,OAAO,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC;IAC1D,OAAO,OAAO,CAAC,CAAC,CAAC,IAAI,QAAQ,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC;AAAA,CAC3C;AAED;;;GAGG;AACH,SAAS,aAAa,CAAC,OAAe,EAAW;IAChD,MAAM,KAAK,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;IAE/B,kCAAkC;IAClC,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IAC/B,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACrC,IAAI,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC;IAErC,yDAAyD;IACzD,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;QAC9B,IAAI,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,KAAK,IAAI,IAAI,KAAK,CAAC,QAAQ,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC;YAC1E,OAAO,IAAI,CAAC;QACb,CAAC;IACF,CAAC;IAED,OAAO,KAAK,CAAC;AAAA,CACb;AAED,gEAAgE;AAChE,SAAS,aAAa,CAAC,GAAW,EAAE,IAAY,EAAE,EAAiB,EAAE,OAAsB,EAAQ;IAClG,IAAI,OAAiB,CAAC;IACtB,IAAI,CAAC;QACJ,OAAO,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC;IAC5B,CAAC;IAAC,MAAM,CAAC;QACR,OAAO,CAAC,0BAA0B;IACnC,CAAC;IAED,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC7B,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;QAClC,MAAM,OAAO,GAAG,QAAQ,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;QACzC,MAAM,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;QAElC,IAAI,KAAY,CAAC;QACjB,IAAI,CAAC;YACJ,KAAK,GAAG,QAAQ,CAAC,QAAQ,CAAC,CAAC;QAC5B,CAAC;QAAC,MAAM,CAAC;YACR,SAAS,CAAC,uBAAuB;QAClC,CAAC;QAED,IAAI,KAAK,CAAC,WAAW,EAAE,EAAE,CAAC;YACzB,uBAAuB;YACvB,IAAI,aAAa,CAAC,OAAO,CAAC;gBAAE,SAAS;YAErC,qDAAqD;YACrD,IAAI,EAAE,CAAC,OAAO,CAAC,GAAG,QAAQ,GAAG,CAAC;gBAAE,SAAS;YAEzC,2CAA2C;YAC3C,aAAa,CAAC,EAAE,EAAE,QAAQ,EAAE,IAAI,CAAC,CAAC;YAElC,aAAa,CAAC,QAAQ,EAAE,IAAI,EAAE,EAAE,EAAE,OAAO,CAAC,CAAC;YAC3C,SAAS;QACV,CAAC;QAED,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE;YAAE,SAAS;QAE9B,6BAA6B;QAC7B,IAAI,EAAE,CAAC,OAAO,CAAC,QAAQ,CAAC;YAAE,SAAS;QAEnC,YAAY;QACZ,IAAI,KAAK,CAAC,IAAI,GAAG,aAAa;YAAE,SAAS;QACzC,IAAI,KAAK,CAAC,IAAI,KAAK,CAAC;YAAE,SAAS;QAE/B,sBAAsB;QACtB,MAAM,QAAQ,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;QACvC,IAAI,CAAC,QAAQ;YAAE,SAAS;QAExB,OAAO,CAAC,IAAI,CAAC;YACZ,QAAQ,EAAE,QAAQ;YAClB,QAAQ;YACR,KAAK,EAAE,KAAK,CAAC,OAAO;SACpB,CAAC,CAAC;IACJ,CAAC;AAAA,CACD;AAED;;;;;;;;;GASG;AACH,SAAS,aAAa,CAAC,SAAiB,EAAE,WAAmB,EAAE,OAAsB,EAAE,aAAsB,EAAQ;IACpH,IAAI,OAAiB,CAAC;IACtB,IAAI,CAAC;QACJ,OAAO,GAAG,WAAW,CAAC,SAAS,CAAC,CAAC;IAClC,CAAC;IAAC,MAAM,CAAC;QACR,OAAO;IACR,CAAC;IAED,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC7B,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;QAExC,IAAI,KAAY,CAAC;QACjB,IAAI,CAAC;YACJ,KAAK,GAAG,QAAQ,CAAC,QAAQ,CAAC,CAAC;QAC5B,CAAC;QAAC,MAAM,CAAC;YACR,SAAS;QACV,CAAC;QAED,IAAI,KAAK,CAAC,WAAW,EAAE,EAAE,CAAC;YACzB,8BAA8B;YAC9B,aAAa,CAAC,QAAQ,EAAE,WAAW,EAAE,OAAO,EAAE,aAAa,IAAI,SAAS,CAAC,CAAC;YAC1E,SAAS;QACV,CAAC;QAED,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE;YAAE,SAAS;QAC9B,IAAI,KAAK,CAAC,IAAI,GAAG,aAAa;YAAE,SAAS;QACzC,IAAI,KAAK,CAAC,IAAI,KAAK,CAAC;YAAE,SAAS;QAE/B,MAAM,QAAQ,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;QACvC,IAAI,CAAC,QAAQ;YAAE,SAAS;QAExB,0EAA0E;QAC1E,4EAA4E;QAC5E,MAAM,GAAG,GAAG,QAAQ,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC;QAC5C,MAAM,gBAAgB,GAAG,GAAG,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,UAAU,CAAC,GAAG,CAAC,CAAC;QACjE,MAAM,aAAa,GAAG,aAAa,IAAI,SAAS,CAAC;QACjD,MAAM,QAAQ,GAAG,gBAAgB,CAAC,CAAC,CAAC,WAAW,QAAQ,CAAC,aAAa,EAAE,QAAQ,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;QAEzF,OAAO,CAAC,IAAI,CAAC;YACZ,QAAQ,EAAE,OAAO,CAAC,QAAQ,CAAC;YAC3B,QAAQ;YACR,KAAK,EAAE,KAAK,CAAC,OAAO;SACpB,CAAC,CAAC;IACJ,CAAC;AAAA,CACD","sourcesContent":["/**\n * File scanner for the semantic search subsystem.\n *\n * Discovers project files for indexing by walking the directory tree,\n * respecting .gitignore rules, and classifying files by type.\n */\n\nimport { existsSync, readdirSync, readFileSync, type Stats, statSync } from \"node:fs\";\nimport { homedir } from \"node:os\";\nimport { extname, isAbsolute, join, relative, sep } from \"node:path\";\nimport ignore from \"ignore\";\nimport { getDrebToolVisibleDirs } from \"../tools/dreb-paths.js\";\nimport type { FileType } from \"./types.js\";\n\n// ============================================================================\n// Public types\n// ============================================================================\n\n/** A file discovered by the scanner, ready for indexing. */\nexport interface ScannedFile {\n\t/** Path relative to the project root (posix separators). */\n\tfilePath: string;\n\t/** Detected file type. */\n\tfileType: FileType;\n\t/** File modification time in milliseconds since epoch. */\n\tmtime: number;\n}\n\n// ============================================================================\n// Constants\n// ============================================================================\n\n/** Maximum file size to index (1 MB). */\nconst MAX_FILE_SIZE = 1024 * 1024;\n\n/** Directories unconditionally skipped during traversal. */\nconst SKIP_DIRS = new Set([\n\t\"node_modules\",\n\t\".git\",\n\t\".dreb/index\",\n\t\".hg\",\n\t\".svn\",\n\t\"__pycache__\",\n\t\".tox\",\n\t\".venv\",\n\t\"dist\",\n\t\"build\",\n\t\".next\",\n\t\".nuxt\",\n\t\"coverage\",\n\t\".cache\",\n]);\n\n/** Extension → FileType mapping. */\nconst EXTENSION_MAP: ReadonlyMap<string, FileType> = new Map<string, FileType>([\n\t// Tree-sitter languages\n\t[\".ts\", \"typescript\"],\n\t[\".tsx\", \"tsx\"],\n\t[\".js\", \"javascript\"],\n\t[\".mjs\", \"javascript\"],\n\t[\".cjs\", \"javascript\"],\n\t[\".py\", \"python\"],\n\t[\".go\", \"go\"],\n\t[\".rs\", \"rust\"],\n\t[\".java\", \"java\"],\n\t[\".c\", \"c\"],\n\t[\".h\", \"c\"],\n\t[\".cpp\", \"cpp\"],\n\t[\".hpp\", \"cpp\"],\n\t[\".cc\", \"cpp\"],\n\t[\".cxx\", \"cpp\"],\n\t[\".hh\", \"cpp\"],\n\t[\".hxx\", \"cpp\"],\n\t// Text file types\n\t[\".md\", \"markdown\"],\n\t[\".mdx\", \"markdown\"],\n\t[\".yml\", \"yaml\"],\n\t[\".yaml\", \"yaml\"],\n\t[\".json\", \"json\"],\n\t[\".toml\", \"toml\"],\n\t[\".txt\", \"plaintext\"],\n\t[\".cfg\", \"plaintext\"],\n\t[\".ini\", \"plaintext\"],\n\t[\".env\", \"plaintext\"],\n\t[\".conf\", \"plaintext\"],\n]);\n\n// ============================================================================\n// Public API\n// ============================================================================\n\n/**\n * Detect the {@link FileType} for a file path based on its extension.\n * Returns `null` for unrecognized extensions or files without an extension.\n */\nexport function detectFileType(filePath: string): FileType | null {\n\tconst ext = extname(filePath).toLowerCase();\n\tif (!ext) return null;\n\treturn EXTENSION_MAP.get(ext) ?? null;\n}\n\n/**\n * Scan a project directory and return all indexable files.\n *\n * Walks the tree rooted at {@link projectRoot}, respects `.gitignore` rules,\n * skips binary / oversized files, and optionally includes memory files from\n * a global memory directory.\n */\nexport async function scanProject(projectRoot: string, globalMemoryDir?: string): Promise<ScannedFile[]> {\n\tconst results: ScannedFile[] = [];\n\n\t// Detect if projectRoot is the home directory — use shallow scan mode\n\t// to avoid recursing into the entire home dir (which would be catastrophic).\n\tconst isHomeDir = isHomeDirPath(projectRoot);\n\n\tif (isHomeDir) {\n\t\t// Shallow mode: only scan top-level files and ~/.dreb/memory/\n\t\tscanShallow(projectRoot, results);\n\t} else {\n\t\t// Normal mode: full recursive walk with .gitignore\n\t\tconst ig = ignore();\n\t\tloadGitignore(ig, projectRoot, projectRoot);\n\t\twalkDirectory(projectRoot, projectRoot, ig, results);\n\t}\n\n\t// Include tool-visible .dreb/ subdirs (bypasses gitignore).\n\t// In home dir mode, global memory is already handled separately below,\n\t// and we don't want to double-scan ~/.dreb/memory/.\n\tif (!isHomeDir) {\n\t\tfor (const dir of getDrebToolVisibleDirs(projectRoot)) {\n\t\t\tscanMemoryDir(dir, projectRoot, results);\n\t\t}\n\t}\n\n\t// Include global memory files if the directory exists\n\tif (globalMemoryDir && existsSync(globalMemoryDir)) {\n\t\tscanMemoryDir(globalMemoryDir, projectRoot, results);\n\t}\n\n\treturn results;\n}\n\n/** Check if a path is the user's home directory. */\nfunction isHomeDirPath(dir: string): boolean {\n\ttry {\n\t\tconst home = homedir();\n\t\t// Normalize trailing slashes for comparison\n\t\tconst normalizedDir = dir.replace(/[/\\\\]+$/, \"\");\n\t\tconst normalizedHome = home.replace(/[/\\\\]+$/, \"\");\n\t\treturn normalizedDir === normalizedHome;\n\t} catch {\n\t\treturn false;\n\t}\n}\n\n/**\n * Shallow scan mode for home directory: only index top-level files\n * (no directory recursion) to avoid scanning the entire home directory.\n * Memory files are handled separately via scanMemoryDir.\n */\nfunction scanShallow(dir: string, results: ScannedFile[]): void {\n\tlet entries: string[];\n\ttry {\n\t\tentries = readdirSync(dir);\n\t} catch {\n\t\treturn;\n\t}\n\n\tfor (const entry of entries) {\n\t\t// Skip dotfiles/dotdirs in home dir (except specific ones we want)\n\t\tif (entry.startsWith(\".\")) continue;\n\n\t\tconst fullPath = join(dir, entry);\n\n\t\tlet stats: Stats;\n\t\ttry {\n\t\t\tstats = statSync(fullPath);\n\t\t} catch {\n\t\t\tcontinue;\n\t\t}\n\n\t\t// Only index files, not directories (shallow mode)\n\t\tif (!stats.isFile()) continue;\n\t\tif (stats.size > MAX_FILE_SIZE) continue;\n\t\tif (stats.size === 0) continue;\n\n\t\tconst fileType = detectFileType(entry);\n\t\tif (!fileType) continue;\n\n\t\tresults.push({\n\t\t\tfilePath: entry,\n\t\t\tfileType,\n\t\t\tmtime: stats.mtimeMs,\n\t\t});\n\t}\n}\n\n// ============================================================================\n// Internal helpers\n// ============================================================================\n\ntype IgnoreMatcher = ReturnType<typeof ignore>;\n\n/** Convert an OS path to posix separators for ignore matching. */\nfunction toPosix(p: string): string {\n\treturn p.split(sep).join(\"/\");\n}\n\n/** Load .gitignore rules from a directory into the ignore matcher. */\nfunction loadGitignore(ig: IgnoreMatcher, dir: string, root: string): void {\n\tconst gitignorePath = join(dir, \".gitignore\");\n\tif (!existsSync(gitignorePath)) return;\n\n\ttry {\n\t\tconst content = readFileSync(gitignorePath, \"utf-8\");\n\t\tconst relDir = relative(root, dir);\n\t\tconst prefix = relDir ? `${toPosix(relDir)}/` : \"\";\n\n\t\tconst patterns = content\n\t\t\t.split(/\\r?\\n/)\n\t\t\t.map((line) => prefixPattern(line, prefix))\n\t\t\t.filter((line): line is string => line !== null);\n\n\t\tif (patterns.length > 0) {\n\t\t\tig.add(patterns);\n\t\t}\n\t} catch {\n\t\t// Unreadable .gitignore — skip silently\n\t}\n}\n\n/**\n * Prefix a .gitignore pattern with a directory path so it applies\n * correctly when matching against root-relative paths.\n */\nfunction prefixPattern(line: string, prefix: string): string | null {\n\tconst trimmed = line.trim();\n\tif (!trimmed) return null;\n\tif (trimmed.startsWith(\"#\") && !trimmed.startsWith(\"\\\\#\")) return null;\n\n\tlet pattern = line;\n\tlet negated = false;\n\n\tif (pattern.startsWith(\"!\")) {\n\t\tnegated = true;\n\t\tpattern = pattern.slice(1);\n\t} else if (pattern.startsWith(\"\\\\!\")) {\n\t\tpattern = pattern.slice(1);\n\t}\n\n\tconst prefixed = prefix ? `${prefix}${pattern}` : pattern;\n\treturn negated ? `!${prefixed}` : prefixed;\n}\n\n/**\n * Check if a directory component (relative to root) should be unconditionally skipped.\n * Handles both top-level names (\"node_modules\") and nested paths (\".dreb/index\").\n */\nfunction shouldSkipDir(relPath: string): boolean {\n\tconst posix = toPosix(relPath);\n\n\t// Check the directory name itself\n\tconst parts = posix.split(\"/\");\n\tconst name = parts[parts.length - 1];\n\tif (SKIP_DIRS.has(name)) return true;\n\n\t// Check multi-segment skip patterns (e.g. \".dreb/index\")\n\tfor (const skip of SKIP_DIRS) {\n\t\tif (skip.includes(\"/\") && (posix === skip || posix.endsWith(`/${skip}`))) {\n\t\t\treturn true;\n\t\t}\n\t}\n\n\treturn false;\n}\n\n/** Recursively walk a directory, collecting indexable files. */\nfunction walkDirectory(dir: string, root: string, ig: IgnoreMatcher, results: ScannedFile[]): void {\n\tlet entries: string[];\n\ttry {\n\t\tentries = readdirSync(dir);\n\t} catch {\n\t\treturn; // Permission denied, etc.\n\t}\n\n\tfor (const entry of entries) {\n\t\tconst fullPath = join(dir, entry);\n\t\tconst relPath = relative(root, fullPath);\n\t\tconst posixRel = toPosix(relPath);\n\n\t\tlet stats: Stats;\n\t\ttry {\n\t\t\tstats = statSync(fullPath);\n\t\t} catch {\n\t\t\tcontinue; // Broken symlink, etc.\n\t\t}\n\n\t\tif (stats.isDirectory()) {\n\t\t\t// Hard-coded skip list\n\t\t\tif (shouldSkipDir(relPath)) continue;\n\n\t\t\t// .gitignore check (directories need trailing slash)\n\t\t\tif (ig.ignores(`${posixRel}/`)) continue;\n\n\t\t\t// Load nested .gitignore before descending\n\t\t\tloadGitignore(ig, fullPath, root);\n\n\t\t\twalkDirectory(fullPath, root, ig, results);\n\t\t\tcontinue;\n\t\t}\n\n\t\tif (!stats.isFile()) continue;\n\n\t\t// .gitignore check for files\n\t\tif (ig.ignores(posixRel)) continue;\n\n\t\t// Size gate\n\t\tif (stats.size > MAX_FILE_SIZE) continue;\n\t\tif (stats.size === 0) continue;\n\n\t\t// File type detection\n\t\tconst fileType = detectFileType(entry);\n\t\tif (!fileType) continue;\n\n\t\tresults.push({\n\t\t\tfilePath: posixRel,\n\t\t\tfileType,\n\t\t\tmtime: stats.mtimeMs,\n\t\t});\n\t}\n}\n\n/**\n * Scan a memory directory (project or global) for indexable files.\n *\n * Memory directories are always fully included — no .gitignore filtering —\n * because they live outside the normal project tree or in `.dreb/` which\n * is typically gitignored.\n *\n * Paths for global memory files are stored with a `~memory/` prefix\n * to distinguish them from project files.\n */\nfunction scanMemoryDir(memoryDir: string, projectRoot: string, results: ScannedFile[], baseMemoryDir?: string): void {\n\tlet entries: string[];\n\ttry {\n\t\tentries = readdirSync(memoryDir);\n\t} catch {\n\t\treturn;\n\t}\n\n\tfor (const entry of entries) {\n\t\tconst fullPath = join(memoryDir, entry);\n\n\t\tlet stats: Stats;\n\t\ttry {\n\t\t\tstats = statSync(fullPath);\n\t\t} catch {\n\t\t\tcontinue;\n\t\t}\n\n\t\tif (stats.isDirectory()) {\n\t\t\t// Recurse into subdirectories\n\t\t\tscanMemoryDir(fullPath, projectRoot, results, baseMemoryDir ?? memoryDir);\n\t\t\tcontinue;\n\t\t}\n\n\t\tif (!stats.isFile()) continue;\n\t\tif (stats.size > MAX_FILE_SIZE) continue;\n\t\tif (stats.size === 0) continue;\n\n\t\tconst fileType = detectFileType(entry);\n\t\tif (!fileType) continue;\n\n\t\t// If the memory dir is inside the project root, use normal relative path.\n\t\t// Otherwise, use a ~memory/ prefix so paths remain unique and identifiable.\n\t\tconst rel = relative(projectRoot, fullPath);\n\t\tconst isOutsideProject = rel.startsWith(\"..\") || isAbsolute(rel);\n\t\tconst rootMemoryDir = baseMemoryDir ?? memoryDir;\n\t\tconst filePath = isOutsideProject ? `~memory/${relative(rootMemoryDir, fullPath)}` : rel;\n\n\t\tresults.push({\n\t\t\tfilePath: toPosix(filePath),\n\t\t\tfileType,\n\t\t\tmtime: stats.mtimeMs,\n\t\t});\n\t}\n}\n"]}
@@ -1,51 +0,0 @@
1
- /**
2
- * Main search API.
3
- *
4
- * Orchestrates: check/build index → compute all 6 metrics → classify query
5
- * → duplicate columns → POEM rank → assemble results.
6
- */
7
- import type { IndexProgressCallback, SearchResult } from "./types.js";
8
- export interface SearchOptions {
9
- /** Maximum number of results to return. Default: 20. */
10
- limit?: number;
11
- /** Restrict search to files under this path (relative to project root). */
12
- pathFilter?: string;
13
- /** Progress callback for indexing operations. */
14
- onProgress?: IndexProgressCallback;
15
- }
16
- export declare class SearchEngine {
17
- private readonly projectRoot;
18
- private indexManager;
19
- private embedderPromise;
20
- private searchQueue;
21
- constructor(projectRoot: string);
22
- /** Check if semantic search is available (requires node:sqlite). */
23
- static isAvailable(): boolean;
24
- /**
25
- * Search the codebase with a natural language or identifier query.
26
- *
27
- * On first call, builds the index (scans, chunks, embeds). Subsequent calls
28
- * incrementally update changed files before searching.
29
- */
30
- search(query: string, options?: SearchOptions): Promise<SearchResult[]>;
31
- /** Get index stats without opening a new connection. */
32
- getStats(): {
33
- files: number;
34
- chunks: number;
35
- } | null;
36
- /**
37
- * Reset the search index — delete the DB and close the IndexManager.
38
- *
39
- * Preserves the embedder (expensive ONNX model, unrelated to index state).
40
- * The next `search()` call will lazily re-create the IndexManager and build
41
- * a fresh index from scratch.
42
- */
43
- resetIndex(): void;
44
- /** Dispose resources. */
45
- close(): void;
46
- private getIndexManager;
47
- private getIndexConfig;
48
- private getOrCreateEmbedder;
49
- private computeVectorScores;
50
- }
51
- //# sourceMappingURL=search.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"search.d.ts","sourceRoot":"","sources":["../../../src/core/search/search.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAeH,OAAO,KAAK,EAAe,qBAAqB,EAAgB,YAAY,EAAe,MAAM,YAAY,CAAC;AAe9G,MAAM,WAAW,aAAa;IAC7B,wDAAwD;IACxD,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,2EAA2E;IAC3E,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,iDAAiD;IACjD,UAAU,CAAC,EAAE,qBAAqB,CAAC;CACnC;AAMD,qBAAa,YAAY;IACxB,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAS;IACrC,OAAO,CAAC,YAAY,CAA6B;IACjD,OAAO,CAAC,eAAe,CAAkC;IACzD,OAAO,CAAC,WAAW,CAAoC;IAEvD,YAAY,WAAW,EAAE,MAAM,EAE9B;IAED,oEAAoE;IACpE,MAAM,CAAC,WAAW,IAAI,OAAO,CAE5B;IAED;;;;;OAKG;IACG,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,aAAa,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC,CAiI5E;IAED,wDAAwD;IACxD,QAAQ,IAAI;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE,GAAG,IAAI,CAGnD;IAED;;;;;;OAMG;IACH,UAAU,IAAI,IAAI,CAUjB;IAED,yBAAyB;IACzB,KAAK,IAAI,IAAI,CAQZ;IAMD,OAAO,CAAC,eAAe;IASvB,OAAO,CAAC,cAAc;IAStB,OAAO,CAAC,mBAAmB;YAoBb,mBAAmB;CA4BjC","sourcesContent":["/**\n * Main search API.\n *\n * Orchestrates: check/build index → compute all 6 metrics → classify query\n * → duplicate columns → POEM rank → assemble results.\n */\n\nimport { existsSync, unlinkSync } from \"node:fs\";\nimport { homedir } from \"node:os\";\nimport path from \"node:path\";\nimport type { SearchDatabase } from \"./db.js\";\nimport { Embedder } from \"./embedder.js\";\nimport { IndexManager } from \"./index-manager.js\";\nimport { computeBm25Scores } from \"./metrics/bm25.js\";\nimport { computeGitRecencyScores } from \"./metrics/git-recency.js\";\nimport { computeImportGraphScores } from \"./metrics/import-graph.js\";\nimport { computePathMatchScores } from \"./metrics/path-match.js\";\nimport { computeSymbolMatchScores } from \"./metrics/symbol-match.js\";\nimport { poemRank } from \"./poem.js\";\nimport { classifyQuery } from \"./query-classifier.js\";\nimport type { IndexConfig, IndexProgressCallback, MetricScores, SearchResult, StoredChunk } from \"./types.js\";\nimport { topKSimilar } from \"./vector-store.js\";\n\n// ============================================================================\n// Constants\n// ============================================================================\n\nconst DEFAULT_MODEL_NAME = \"Xenova/all-MiniLM-L6-v2\";\nconst DEFAULT_RESULT_LIMIT = 20;\nconst METRIC_CANDIDATE_LIMIT = 1000;\n\n// ============================================================================\n// Search Options\n// ============================================================================\n\nexport interface SearchOptions {\n\t/** Maximum number of results to return. Default: 20. */\n\tlimit?: number;\n\t/** Restrict search to files under this path (relative to project root). */\n\tpathFilter?: string;\n\t/** Progress callback for indexing operations. */\n\tonProgress?: IndexProgressCallback;\n}\n\n// ============================================================================\n// Search Engine\n// ============================================================================\n\nexport class SearchEngine {\n\tprivate readonly projectRoot: string;\n\tprivate indexManager: IndexManager | null = null;\n\tprivate embedderPromise: Promise<Embedder> | null = null;\n\tprivate searchQueue: Promise<void> = Promise.resolve();\n\n\tconstructor(projectRoot: string) {\n\t\tthis.projectRoot = projectRoot;\n\t}\n\n\t/** Check if semantic search is available (requires node:sqlite). */\n\tstatic isAvailable(): boolean {\n\t\treturn IndexManager.isAvailable();\n\t}\n\n\t/**\n\t * Search the codebase with a natural language or identifier query.\n\t *\n\t * On first call, builds the index (scans, chunks, embeds). Subsequent calls\n\t * incrementally update changed files before searching.\n\t */\n\tasync search(query: string, options?: SearchOptions): Promise<SearchResult[]> {\n\t\t// Chain through searchQueue so concurrent calls serialize\n\t\tlet resolve!: () => void;\n\t\tconst gate = new Promise<void>((r) => {\n\t\t\tresolve = r;\n\t\t});\n\t\tconst waitFor = this.searchQueue;\n\t\tthis.searchQueue = gate;\n\n\t\ttry {\n\t\t\tawait waitFor;\n\n\t\t\tconst limit = options?.limit ?? DEFAULT_RESULT_LIMIT;\n\t\t\tconst onProgress = options?.onProgress;\n\n\t\t\t// Ensure index is built and up to date\n\t\t\tconst indexManager = this.getIndexManager();\n\t\t\tconst db = indexManager.getDb();\n\n\t\t\t// Share our embedder with IndexManager so it doesn't create a second one\n\t\t\tconst embedder = await this.getOrCreateEmbedder();\n\t\t\tindexManager.setEmbedder(embedder);\n\n\t\t\tawait indexManager.buildIndex(onProgress);\n\t\t\tawait indexManager.ensureEmbeddings(onProgress);\n\n\t\t\t// Get all chunks (potentially filtered by path)\n\t\t\tlet allChunks = db.getAllChunks();\n\t\t\tif (options?.pathFilter) {\n\t\t\t\tconst filter = options.pathFilter;\n\t\t\t\tallChunks = allChunks.filter((c) => c.filePath.startsWith(filter));\n\t\t\t}\n\n\t\t\tif (allChunks.length === 0) {\n\t\t\t\treturn [];\n\t\t\t}\n\n\t\t\t// Classify query type for POEM column weighting\n\t\t\tconst queryType = classifyQuery(query);\n\n\t\t\t// Compute all 6 metrics\n\t\t\tonProgress?.(\"searching\", 0, 6);\n\n\t\t\t// 1. BM25 (FTS5)\n\t\t\tconst bm25Scores = computeBm25Scores(db, sanitizeFtsQuery(query), METRIC_CANDIDATE_LIMIT);\n\t\t\tonProgress?.(\"searching\", 1, 6);\n\n\t\t\t// 2. Cosine similarity (vector search)\n\t\t\tconst cosineScores = await this.computeVectorScores(db, query, METRIC_CANDIDATE_LIMIT, onProgress);\n\t\t\tonProgress?.(\"searching\", 2, 6);\n\n\t\t\t// 3. Path match\n\t\t\tconst pathScores = computePathMatchScores(query, allChunks);\n\t\t\tonProgress?.(\"searching\", 3, 6);\n\n\t\t\t// 4. Symbol match\n\t\t\tconst symbols = db.getAllSymbols();\n\t\t\tconst symbolScores = computeSymbolMatchScores(query, symbols);\n\t\t\tonProgress?.(\"searching\", 4, 6);\n\n\t\t\t// 5. Import graph (use BM25 + cosine as seed scores, aggregated per file)\n\t\t\t// Only use files with strong scores as seeds — low-scoring files (e.g. from\n\t\t\t// common OR terms matching everywhere) pollute the seed set and prevent\n\t\t\t// meaningful propagation.\n\t\t\tconst fileSeedScores = aggregateFileScores(allChunks, bm25Scores, cosineScores);\n\t\t\tconst seedThreshold = computeSeedThreshold(fileSeedScores);\n\t\t\tconst filteredSeeds = new Map<number, number>();\n\t\t\tfor (const [fileId, score] of fileSeedScores) {\n\t\t\t\tif (score >= seedThreshold) filteredSeeds.set(fileId, score);\n\t\t\t}\n\t\t\tconst fileIdToChunkIds = buildFileChunkMap(allChunks);\n\t\t\tconst importScores = computeImportGraphScores(db, filteredSeeds, fileIdToChunkIds);\n\t\t\tonProgress?.(\"searching\", 5, 6);\n\n\t\t\t// 6. Git recency\n\t\t\tconst recencyScores = await computeGitRecencyScores(this.projectRoot, allChunks);\n\t\t\tonProgress?.(\"searching\", 6, 6);\n\n\t\t\t// Build MetricScores for each candidate chunk\n\t\t\tconst candidateIds = collectCandidateIds(\n\t\t\t\tbm25Scores,\n\t\t\t\tcosineScores,\n\t\t\t\tpathScores,\n\t\t\t\tsymbolScores,\n\t\t\t\timportScores,\n\t\t\t\trecencyScores,\n\t\t\t);\n\t\t\tconst candidates = new Map<number, MetricScores>();\n\n\t\t\tfor (const id of candidateIds) {\n\t\t\t\tcandidates.set(id, {\n\t\t\t\t\tbm25: bm25Scores.get(id) ?? 0,\n\t\t\t\t\tcosine: cosineScores.get(id) ?? 0,\n\t\t\t\t\tpathMatch: pathScores.get(id) ?? 0,\n\t\t\t\t\tsymbolMatch: symbolScores.get(id) ?? 0,\n\t\t\t\t\timportGraph: importScores.get(id) ?? 0,\n\t\t\t\t\tgitRecency: recencyScores.get(id) ?? 0,\n\t\t\t\t});\n\t\t\t}\n\n\t\t\tif (candidates.size === 0) {\n\t\t\t\treturn [];\n\t\t\t}\n\n\t\t\t// POEM rank\n\t\t\tconst ranked = poemRank(candidates, queryType);\n\n\t\t\t// Assemble results\n\t\t\tconst chunkMap = new Map<number, StoredChunk>();\n\t\t\tfor (const chunk of allChunks) {\n\t\t\t\tchunkMap.set(chunk.id, chunk);\n\t\t\t}\n\n\t\t\tconst results: SearchResult[] = [];\n\t\t\tfor (const candidate of ranked.slice(0, limit)) {\n\t\t\t\tconst chunk = chunkMap.get(candidate.id);\n\t\t\t\tif (chunk) {\n\t\t\t\t\tresults.push({\n\t\t\t\t\t\tchunk,\n\t\t\t\t\t\tscores: candidate.scores,\n\t\t\t\t\t\trank: candidate.rank,\n\t\t\t\t\t});\n\t\t\t\t}\n\t\t\t}\n\n\t\t\treturn results;\n\t\t} finally {\n\t\t\tresolve();\n\t\t}\n\t}\n\n\t/** Get index stats without opening a new connection. */\n\tgetStats(): { files: number; chunks: number } | null {\n\t\tif (!this.indexManager) return null;\n\t\treturn this.indexManager.getStats();\n\t}\n\n\t/**\n\t * Reset the search index — delete the DB and close the IndexManager.\n\t *\n\t * Preserves the embedder (expensive ONNX model, unrelated to index state).\n\t * The next `search()` call will lazily re-create the IndexManager and build\n\t * a fresh index from scratch.\n\t */\n\tresetIndex(): void {\n\t\t// Close DB connection first (WAL mode may hold locks)\n\t\tthis.indexManager?.close();\n\t\tthis.indexManager = null;\n\n\t\t// Delete the DB file\n\t\tconst dbPath = path.join(this.projectRoot, \".dreb\", \"index\", \"search.db\");\n\t\tif (existsSync(dbPath)) {\n\t\t\tunlinkSync(dbPath);\n\t\t}\n\t}\n\n\t/** Dispose resources. */\n\tclose(): void {\n\t\tthis.indexManager?.close();\n\t\tthis.indexManager = null;\n\t\t// Dispose embedder if it was created\n\t\tif (this.embedderPromise) {\n\t\t\tthis.embedderPromise.then((e) => e.dispose()).catch(() => {});\n\t\t\tthis.embedderPromise = null;\n\t\t}\n\t}\n\n\t// ========================================================================\n\t// Private\n\t// ========================================================================\n\n\tprivate getIndexManager(): IndexManager {\n\t\tif (!this.indexManager) {\n\t\t\tconst config = this.getIndexConfig();\n\t\t\tthis.indexManager = new IndexManager(config);\n\t\t\tthis.indexManager.open();\n\t\t}\n\t\treturn this.indexManager;\n\t}\n\n\tprivate getIndexConfig(): IndexConfig {\n\t\treturn {\n\t\t\tprojectRoot: this.projectRoot,\n\t\t\tindexDir: path.join(this.projectRoot, \".dreb\", \"index\"),\n\t\t\tglobalMemoryDir: path.join(homedir(), \".dreb\", \"memory\"),\n\t\t\tmodelName: DEFAULT_MODEL_NAME,\n\t\t};\n\t}\n\n\tprivate getOrCreateEmbedder(): Promise<Embedder> {\n\t\tif (!this.embedderPromise) {\n\t\t\tthis.embedderPromise = (async () => {\n\t\t\t\ttry {\n\t\t\t\t\tconst config = this.getIndexConfig();\n\t\t\t\t\tconst embedder = new Embedder({\n\t\t\t\t\t\tmodelCacheDir: path.join(homedir(), \".dreb\", \"agent\", \"models\"),\n\t\t\t\t\t\tmodelName: config.modelName,\n\t\t\t\t\t});\n\t\t\t\t\tawait embedder.initialize();\n\t\t\t\t\treturn embedder;\n\t\t\t\t} catch (err) {\n\t\t\t\t\tthis.embedderPromise = null; // reset on failure for retry\n\t\t\t\t\tthrow err;\n\t\t\t\t}\n\t\t\t})();\n\t\t}\n\t\treturn this.embedderPromise;\n\t}\n\n\tprivate async computeVectorScores(\n\t\tdb: SearchDatabase,\n\t\tquery: string,\n\t\tlimit: number,\n\t\t_onProgress?: IndexProgressCallback,\n\t): Promise<Map<number, number>> {\n\t\tconst config = this.getIndexConfig();\n\t\tconst embedder = await this.getOrCreateEmbedder();\n\n\t\t// Embed the query\n\t\tconst queryVector = await embedder.embedQuery(query);\n\n\t\t// Get all stored embeddings\n\t\tconst storedVectors = db.getAllEmbeddings(config.modelName);\n\n\t\tif (storedVectors.size === 0) {\n\t\t\treturn new Map();\n\t\t}\n\n\t\tconst topK = topKSimilar(queryVector, storedVectors, limit);\n\n\t\t// Convert to Map, clamping negative similarities to 0\n\t\tconst scores = new Map<number, number>();\n\t\tfor (const { id, score } of topK) {\n\t\t\tscores.set(id, Math.max(0, score));\n\t\t}\n\t\treturn scores;\n\t}\n}\n\n// ============================================================================\n// Helpers\n// ============================================================================\n\n/** Collect all unique chunk IDs that appear in any metric's results. */\nfunction collectCandidateIds(...scoreMaps: Map<number, number>[]): Set<number> {\n\tconst ids = new Set<number>();\n\tfor (const map of scoreMaps) {\n\t\tfor (const id of map.keys()) {\n\t\t\tids.add(id);\n\t\t}\n\t}\n\treturn ids;\n}\n\n/** Aggregate chunk-level scores to file-level scores (max per file). */\nfunction aggregateFileScores(chunks: StoredChunk[], ...scoreMaps: Map<number, number>[]): Map<number, number> {\n\tconst fileScores = new Map<number, number>();\n\n\tfor (const chunk of chunks) {\n\t\tlet maxScore = 0;\n\t\tfor (const map of scoreMaps) {\n\t\t\tconst s = map.get(chunk.id);\n\t\t\tif (s !== undefined && s > maxScore) maxScore = s;\n\t\t}\n\t\tif (maxScore > 0) {\n\t\t\tconst existing = fileScores.get(chunk.fileId);\n\t\t\tif (existing === undefined || maxScore > existing) {\n\t\t\t\tfileScores.set(chunk.fileId, maxScore);\n\t\t\t}\n\t\t}\n\t}\n\n\treturn fileScores;\n}\n\n/**\n * Compute a dynamic threshold for import graph seeds.\n * Uses the median score — only the top half of files are strong enough seeds.\n * Falls back to 0.1 minimum to avoid accepting near-zero scores.\n */\nfunction computeSeedThreshold(fileScores: Map<number, number>): number {\n\tif (fileScores.size === 0) return 0;\n\tconst sorted = [...fileScores.values()].sort((a, b) => b - a);\n\tconst median = sorted[Math.floor(sorted.length / 2)];\n\treturn Math.max(median, 0.1);\n}\n\n/** Build a map of fileId → chunk IDs for that file. */\nfunction buildFileChunkMap(chunks: StoredChunk[]): Map<number, number[]> {\n\tconst map = new Map<number, number[]>();\n\tfor (const chunk of chunks) {\n\t\tconst existing = map.get(chunk.fileId);\n\t\tif (existing) existing.push(chunk.id);\n\t\telse map.set(chunk.fileId, [chunk.id]);\n\t}\n\treturn map;\n}\n\n/** Common English stopwords to exclude from FTS queries. */\nconst STOPWORDS = new Set([\n\t\"a\",\n\t\"an\",\n\t\"and\",\n\t\"are\",\n\t\"as\",\n\t\"at\",\n\t\"be\",\n\t\"but\",\n\t\"by\",\n\t\"for\",\n\t\"from\",\n\t\"had\",\n\t\"has\",\n\t\"have\",\n\t\"he\",\n\t\"her\",\n\t\"his\",\n\t\"how\",\n\t\"i\",\n\t\"if\",\n\t\"in\",\n\t\"into\",\n\t\"is\",\n\t\"it\",\n\t\"its\",\n\t\"me\",\n\t\"my\",\n\t\"no\",\n\t\"not\",\n\t\"of\",\n\t\"on\",\n\t\"or\",\n\t\"our\",\n\t\"she\",\n\t\"so\",\n\t\"than\",\n\t\"that\",\n\t\"the\",\n\t\"their\",\n\t\"them\",\n\t\"then\",\n\t\"there\",\n\t\"these\",\n\t\"they\",\n\t\"this\",\n\t\"to\",\n\t\"up\",\n\t\"us\",\n\t\"was\",\n\t\"we\",\n\t\"what\",\n\t\"when\",\n\t\"where\",\n\t\"which\",\n\t\"who\",\n\t\"will\",\n\t\"with\",\n\t\"would\",\n\t\"you\",\n\t\"your\",\n]);\n\n/**\n * Sanitize a query string for FTS5 MATCH syntax.\n * FTS5 chokes on certain characters — strip operators and wrap terms.\n *\n * Removes stopwords and uses OR between terms so multi-word queries return\n * partial matches (FTS5's default implicit AND is too restrictive).\n */\nfunction sanitizeFtsQuery(query: string): string {\n\t// Remove FTS5 operators and special chars\n\tconst cleaned = query\n\t\t.replace(/[*\"():^{}[\\]~!@#$%&=+|<>]/g, \" \")\n\t\t.replace(/\\bAND\\b|\\bOR\\b|\\bNOT\\b|\\bNEAR\\b/gi, \" \")\n\t\t.trim();\n\n\t// Split into tokens, remove stopwords, join with OR\n\tconst tokens = cleaned.split(/\\s+/).filter((t) => t.length > 0 && !STOPWORDS.has(t.toLowerCase()));\n\tif (tokens.length === 0) return '\"\"';\n\tif (tokens.length === 1) return tokens[0];\n\treturn tokens.join(\" OR \");\n}\n"]}