@dreb/semantic-search 2.4.5 → 2.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/README.md +2 -2
- package/dist/chunker.d.ts.map +1 -1
- package/dist/chunker.js +1 -0
- package/dist/chunker.js.map +1 -1
- package/dist/scanner.d.ts.map +1 -1
- package/dist/scanner.js +5 -0
- package/dist/scanner.js.map +1 -1
- package/dist/tree-sitter-chunker.d.ts.map +1 -1
- package/dist/tree-sitter-chunker.js +7 -0
- package/dist/tree-sitter-chunker.js.map +1 -1
- package/dist/types.d.ts +1 -1
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/grammars/tree-sitter-gdscript.wasm +0 -0
- package/package.json +3 -1
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "semantic-search",
|
|
3
3
|
"description": "Semantic codebase search — natural language queries over code and docs using embeddings, tree-sitter parsing, and POEM multi-signal ranking",
|
|
4
|
-
"version": "2.
|
|
4
|
+
"version": "2.5.0",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "Drew Brereton"
|
|
7
7
|
},
|
package/README.md
CHANGED
|
@@ -100,8 +100,8 @@ SearchEngine.isAvailable(); // check for node:sqlite
|
|
|
100
100
|
|
|
101
101
|
## What Gets Indexed
|
|
102
102
|
|
|
103
|
-
- **Code** — tree-sitter AST chunks (functions, classes, methods, interfaces, etc.). TypeScript, JavaScript, Python, Go, Rust, Java, C, C
|
|
104
|
-
- **Text** — Markdown (by heading), YAML/TOML (by key), JSON, plaintext (by paragraph).
|
|
103
|
+
- **Code** — tree-sitter AST chunks (functions, classes, methods, interfaces, etc.). TypeScript, JavaScript, Python, Go, Rust, Java, C, C++, GDScript.
|
|
104
|
+
- **Text** — Markdown (by heading), YAML/TOML (by key), JSON, plaintext (by paragraph). Also indexes Godot scene (`.tscn`), resource (`.tres`), and project (`.godot`) files as plaintext.
|
|
105
105
|
- **Extra directories** — via `globalMemoryDir` or `visibleDirs`, scanned even if gitignored.
|
|
106
106
|
|
|
107
107
|
The index is stored in `.search-index/search.db` at the project root (add `.search-index/` to `.gitignore`).
|
package/dist/chunker.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"chunker.d.ts","sourceRoot":"","sources":["../src/chunker.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAIH,OAAO,KAAK,EAAE,KAAK,EAAE,QAAQ,EAAoC,MAAM,YAAY,CAAC;
|
|
1
|
+
{"version":3,"file":"chunker.d.ts","sourceRoot":"","sources":["../src/chunker.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAIH,OAAO,KAAK,EAAE,KAAK,EAAE,QAAQ,EAAoC,MAAM,YAAY,CAAC;AAuBpF;;;;;;;;;;;GAWG;AACH,wBAAsB,SAAS,CAAC,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,QAAQ,EAAE,QAAQ,GAAG,OAAO,CAAC,KAAK,EAAE,CAAC,CAYvG"}
|
package/dist/chunker.js
CHANGED
package/dist/chunker.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"chunker.js","sourceRoot":"","sources":["../src/chunker.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,aAAa,EAAE,MAAM,mBAAmB,CAAC;AAClD,OAAO,EAAE,mBAAmB,EAAE,cAAc,EAAE,MAAM,0BAA0B,CAAC;AAG/E,+EAA+E;AAC/E,gBAAgB;AAChB,+EAA+E;AAE/E,MAAM,qBAAqB,GAAgB,IAAI,GAAG,CAAC;IAClD,YAAY;IACZ,KAAK;IACL,YAAY;IACZ,QAAQ;IACR,IAAI;IACJ,MAAM;IACN,MAAM;IACN,GAAG;IACH,KAAK;
|
|
1
|
+
{"version":3,"file":"chunker.js","sourceRoot":"","sources":["../src/chunker.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,aAAa,EAAE,MAAM,mBAAmB,CAAC;AAClD,OAAO,EAAE,mBAAmB,EAAE,cAAc,EAAE,MAAM,0BAA0B,CAAC;AAG/E,+EAA+E;AAC/E,gBAAgB;AAChB,+EAA+E;AAE/E,MAAM,qBAAqB,GAAgB,IAAI,GAAG,CAAC;IAClD,YAAY;IACZ,KAAK;IACL,YAAY;IACZ,QAAQ;IACR,IAAI;IACJ,MAAM;IACN,MAAM;IACN,GAAG;IACH,KAAK;IACL,UAAU;CACV,CAAC,CAAC;AAEH,+EAA+E;AAC/E,aAAa;AACb,+EAA+E;AAE/E;;;;;;;;;;;GAWG;AACH,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,OAAe,EAAE,QAAgB,EAAE,QAAkB;IACpF,IAAI,qBAAqB,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;QACzC,IAAI,CAAC;YACJ,MAAM,cAAc,EAAE,CAAC;YACvB,OAAO,MAAM,mBAAmB,CAAC,OAAO,EAAE,QAAQ,EAAE,QAA8B,CAAC,CAAC;QACrF,CAAC;QAAC,MAAM,CAAC;YACR,uDAAuD;YACvD,OAAO,aAAa,CAAC,OAAO,EAAE,QAAQ,EAAE,WAAW,CAAC,CAAC;QACtD,CAAC;IACF,CAAC;IAED,OAAO,aAAa,CAAC,OAAO,EAAE,QAAQ,EAAE,QAAwB,CAAC,CAAC;AACnE,CAAC","sourcesContent":["/**\n * Chunking coordinator for the semantic search subsystem.\n *\n * Dispatches to the tree-sitter AST chunker for code files and the\n * text chunker for non-code files (markdown, YAML, JSON, etc.).\n */\n\nimport { chunkTextFile } from \"./text-chunker.js\";\nimport { chunkWithTreeSitter, initTreeSitter } from \"./tree-sitter-chunker.js\";\nimport type { Chunk, FileType, TextFileType, TreeSitterLanguage } from \"./types.js\";\n\n// ============================================================================\n// Language Sets\n// ============================================================================\n\nconst TREE_SITTER_LANGUAGES: Set<string> = new Set([\n\t\"typescript\",\n\t\"tsx\",\n\t\"javascript\",\n\t\"python\",\n\t\"go\",\n\t\"rust\",\n\t\"java\",\n\t\"c\",\n\t\"cpp\",\n\t\"gdscript\",\n]);\n\n// ============================================================================\n// Public API\n// ============================================================================\n\n/**\n * Chunk a file's content into semantically meaningful pieces.\n *\n * For code files, uses tree-sitter to parse the AST and extract functions,\n * classes, methods, etc. For text files, uses format-specific splitting rules.\n *\n * If tree-sitter parsing fails for a code file, falls back to plaintext chunking.\n *\n * @param content - Raw file content\n * @param filePath - Relative file path (stored in chunk metadata)\n * @param fileType - Detected file type\n */\nexport async function chunkFile(content: string, filePath: string, fileType: FileType): Promise<Chunk[]> {\n\tif (TREE_SITTER_LANGUAGES.has(fileType)) {\n\t\ttry {\n\t\t\tawait initTreeSitter();\n\t\t\treturn await chunkWithTreeSitter(content, filePath, fileType as TreeSitterLanguage);\n\t\t} catch {\n\t\t\t// Tree-sitter failed — fall back to plaintext chunking\n\t\t\treturn chunkTextFile(content, filePath, \"plaintext\");\n\t\t}\n\t}\n\n\treturn chunkTextFile(content, filePath, fileType as TextFileType);\n}\n"]}
|
package/dist/scanner.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scanner.d.ts","sourceRoot":"","sources":["../src/scanner.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAMH,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAM3C,4DAA4D;AAC5D,MAAM,WAAW,WAAW;IAC3B,4DAA4D;IAC5D,QAAQ,EAAE,MAAM,CAAC;IACjB,0BAA0B;IAC1B,QAAQ,EAAE,QAAQ,CAAC;IACnB,0DAA0D;IAC1D,KAAK,EAAE,MAAM,CAAC;CACd;
|
|
1
|
+
{"version":3,"file":"scanner.d.ts","sourceRoot":"","sources":["../src/scanner.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAMH,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAM3C,4DAA4D;AAC5D,MAAM,WAAW,WAAW;IAC3B,4DAA4D;IAC5D,QAAQ,EAAE,MAAM,CAAC;IACjB,0BAA0B;IAC1B,QAAQ,EAAE,QAAQ,CAAC;IACnB,0DAA0D;IAC1D,KAAK,EAAE,MAAM,CAAC;CACd;AAsED;;;GAGG;AACH,wBAAgB,cAAc,CAAC,QAAQ,EAAE,MAAM,GAAG,QAAQ,GAAG,IAAI,CAIhE;AAED;;;;;;GAMG;AACH,wBAAsB,WAAW,CAChC,WAAW,EAAE,MAAM,EACnB,eAAe,CAAC,EAAE,MAAM,EACxB,WAAW,CAAC,EAAE,MAAM,EAAE,GACpB,OAAO,CAAC,WAAW,EAAE,CAAC,CAgCxB"}
|
package/dist/scanner.js
CHANGED
|
@@ -50,6 +50,11 @@ const EXTENSION_MAP = new Map([
|
|
|
50
50
|
[".cxx", "cpp"],
|
|
51
51
|
[".hh", "cpp"],
|
|
52
52
|
[".hxx", "cpp"],
|
|
53
|
+
[".gd", "gdscript"],
|
|
54
|
+
// Godot text-based files
|
|
55
|
+
[".tscn", "plaintext"],
|
|
56
|
+
[".tres", "plaintext"],
|
|
57
|
+
[".godot", "plaintext"],
|
|
53
58
|
// Text file types
|
|
54
59
|
[".md", "markdown"],
|
|
55
60
|
[".mdx", "markdown"],
|
package/dist/scanner.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scanner.js","sourceRoot":"","sources":["../src/scanner.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,UAAU,EAAE,WAAW,EAAE,YAAY,EAAc,QAAQ,EAAE,MAAM,SAAS,CAAC;AACtF,OAAO,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAClC,OAAO,EAAE,OAAO,EAAE,UAAU,EAAE,IAAI,EAAE,QAAQ,EAAE,GAAG,EAAE,MAAM,WAAW,CAAC;AACrE,OAAO,MAAM,MAAM,QAAQ,CAAC;AAiB5B,+EAA+E;AAC/E,YAAY;AACZ,+EAA+E;AAE/E,yCAAyC;AACzC,MAAM,aAAa,GAAG,IAAI,GAAG,IAAI,CAAC;AAElC,4DAA4D;AAC5D,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC;IACzB,cAAc;IACd,MAAM;IACN,aAAa;IACb,KAAK;IACL,MAAM;IACN,aAAa;IACb,MAAM;IACN,OAAO;IACP,MAAM;IACN,OAAO;IACP,OAAO;IACP,OAAO;IACP,UAAU;IACV,QAAQ;CACR,CAAC,CAAC;AAEH,oCAAoC;AACpC,MAAM,aAAa,GAAkC,IAAI,GAAG,CAAmB;IAC9E,wBAAwB;IACxB,CAAC,KAAK,EAAE,YAAY,CAAC;IACrB,CAAC,MAAM,EAAE,KAAK,CAAC;IACf,CAAC,KAAK,EAAE,YAAY,CAAC;IACrB,CAAC,MAAM,EAAE,YAAY,CAAC;IACtB,CAAC,MAAM,EAAE,YAAY,CAAC;IACtB,CAAC,KAAK,EAAE,QAAQ,CAAC;IACjB,CAAC,KAAK,EAAE,IAAI,CAAC;IACb,CAAC,KAAK,EAAE,MAAM,CAAC;IACf,CAAC,OAAO,EAAE,MAAM,CAAC;IACjB,CAAC,IAAI,EAAE,GAAG,CAAC;IACX,CAAC,IAAI,EAAE,GAAG,CAAC;IACX,CAAC,MAAM,EAAE,KAAK,CAAC;IACf,CAAC,MAAM,EAAE,KAAK,CAAC;IACf,CAAC,KAAK,EAAE,KAAK,CAAC;IACd,CAAC,MAAM,EAAE,KAAK,CAAC;IACf,CAAC,KAAK,EAAE,KAAK,CAAC;IACd,CAAC,MAAM,EAAE,KAAK,CAAC;IACf,kBAAkB;IAClB,CAAC,KAAK,EAAE,UAAU,CAAC;IACnB,CAAC,MAAM,EAAE,UAAU,CAAC;IACpB,CAAC,MAAM,EAAE,MAAM,CAAC;IAChB,CAAC,OAAO,EAAE,MAAM,CAAC;IACjB,CAAC,OAAO,EAAE,MAAM,CAAC;IACjB,CAAC,OAAO,EAAE,MAAM,CAAC;IACjB,CAAC,MAAM,EAAE,WAAW,CAAC;IACrB,CAAC,MAAM,EAAE,WAAW,CAAC;IACrB,CAAC,MAAM,EAAE,WAAW,CAAC;IACrB,CAAC,MAAM,EAAE,WAAW,CAAC;IACrB,CAAC,OAAO,EAAE,WAAW,CAAC;CACtB,CAAC,CAAC;AAEH,+EAA+E;AAC/E,aAAa;AACb,+EAA+E;AAE/E;;;GAGG;AACH,MAAM,UAAU,cAAc,CAAC,QAAgB;IAC9C,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;IAC5C,IAAI,CAAC,GAAG;QAAE,OAAO,IAAI,CAAC;IACtB,OAAO,aAAa,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC;AACvC,CAAC;AAED;;;;;;GAMG;AACH,MAAM,CAAC,KAAK,UAAU,WAAW,CAChC,WAAmB,EACnB,eAAwB,EACxB,WAAsB;IAEtB,MAAM,OAAO,GAAkB,EAAE,CAAC;IAElC,sEAAsE;IACtE,6EAA6E;IAC7E,MAAM,SAAS,GAAG,aAAa,CAAC,WAAW,CAAC,CAAC;IAE7C,IAAI,SAAS,EAAE,CAAC;QACf,8DAA8D;QAC9D,WAAW,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC;IACnC,CAAC;SAAM,CAAC;QACP,mDAAmD;QACnD,MAAM,EAAE,GAAG,MAAM,EAAE,CAAC;QACpB,aAAa,CAAC,EAAE,EAAE,WAAW,EAAE,WAAW,CAAC,CAAC;QAC5C,aAAa,CAAC,WAAW,EAAE,WAAW,EAAE,EAAE,EAAE,OAAO,CAAC,CAAC;IACtD,CAAC;IAED,4DAA4D;IAC5D,uEAAuE;IACvE,oDAAoD;IACpD,IAAI,CAAC,SAAS,EAAE,CAAC;QAChB,KAAK,MAAM,GAAG,IAAI,WAAW,IAAI,EAAE,EAAE,CAAC;YACrC,aAAa,CAAC,GAAG,EAAE,WAAW,EAAE,OAAO,CAAC,CAAC;QAC1C,CAAC;IACF,CAAC;IAED,sDAAsD;IACtD,IAAI,eAAe,IAAI,UAAU,CAAC,eAAe,CAAC,EAAE,CAAC;QACpD,aAAa,CAAC,eAAe,EAAE,WAAW,EAAE,OAAO,CAAC,CAAC;IACtD,CAAC;IAED,OAAO,OAAO,CAAC;AAChB,CAAC;AAED,oDAAoD;AACpD,SAAS,aAAa,CAAC,GAAW;IACjC,IAAI,CAAC;QACJ,MAAM,IAAI,GAAG,OAAO,EAAE,CAAC;QACvB,4CAA4C;QAC5C,MAAM,aAAa,GAAG,GAAG,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;QACjD,MAAM,cAAc,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;QACnD,OAAO,aAAa,KAAK,cAAc,CAAC;IACzC,CAAC;IAAC,MAAM,CAAC;QACR,8DAA8D;QAC9D,OAAO,KAAK,CAAC;IACd,CAAC;AACF,CAAC;AAED;;;;GAIG;AACH,SAAS,WAAW,CAAC,GAAW,EAAE,OAAsB;IACvD,IAAI,OAAiB,CAAC;IACtB,IAAI,CAAC;QACJ,OAAO,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC;IAC5B,CAAC;IAAC,MAAM,CAAC;QACR,kEAAkE;QAClE,OAAO;IACR,CAAC;IAED,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC7B,mEAAmE;QACnE,IAAI,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC;YAAE,SAAS;QAEpC,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;QAElC,IAAI,KAAY,CAAC;QACjB,IAAI,CAAC;YACJ,KAAK,GAAG,QAAQ,CAAC,QAAQ,CAAC,CAAC;QAC5B,CAAC;QAAC,MAAM,CAAC;YACR,kDAAkD;YAClD,SAAS;QACV,CAAC;QAED,mDAAmD;QACnD,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE;YAAE,SAAS;QAC9B,IAAI,KAAK,CAAC,IAAI,GAAG,aAAa;YAAE,SAAS;QACzC,IAAI,KAAK,CAAC,IAAI,KAAK,CAAC;YAAE,SAAS;QAE/B,MAAM,QAAQ,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;QACvC,IAAI,CAAC,QAAQ;YAAE,SAAS;QAExB,OAAO,CAAC,IAAI,CAAC;YACZ,QAAQ,EAAE,KAAK;YACf,QAAQ;YACR,KAAK,EAAE,KAAK,CAAC,OAAO;SACpB,CAAC,CAAC;IACJ,CAAC;AACF,CAAC;AAQD,kEAAkE;AAClE,SAAS,OAAO,CAAC,CAAS;IACzB,OAAO,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAC/B,CAAC;AAED,sEAAsE;AACtE,SAAS,aAAa,CAAC,EAAiB,EAAE,GAAW,EAAE,IAAY;IAClE,MAAM,aAAa,GAAG,IAAI,CAAC,GAAG,EAAE,YAAY,CAAC,CAAC;IAC9C,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC;QAAE,OAAO;IAEvC,IAAI,CAAC;QACJ,MAAM,OAAO,GAAG,YAAY,CAAC,aAAa,EAAE,OAAO,CAAC,CAAC;QACrD,MAAM,MAAM,GAAG,QAAQ,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;QACnC,MAAM,MAAM,GAAG,MAAM,CAAC,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;QAEnD,MAAM,QAAQ,GAAG,OAAO;aACtB,KAAK,CAAC,OAAO,CAAC;aACd,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,aAAa,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;aAC1C,MAAM,CAAC,CAAC,IAAI,EAAkB,EAAE,CAAC,IAAI,KAAK,IAAI,CAAC,CAAC;QAElD,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACzB,EAAE,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAClB,CAAC;IACF,CAAC;IAAC,MAAM,CAAC;QACR,wCAAwC;IACzC,CAAC;AACF,CAAC;AAED;;;GAGG;AACH,SAAS,aAAa,CAAC,IAAY,EAAE,MAAc;IAClD,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAC5B,IAAI,CAAC,OAAO;QAAE,OAAO,IAAI,CAAC;IAC1B,IAAI,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAEvE,IAAI,OAAO,GAAG,IAAI,CAAC;IACnB,IAAI,OAAO,GAAG,KAAK,CAAC;IAEpB,IAAI,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;QAC7B,OAAO,GAAG,IAAI,CAAC;QACf,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAC5B,CAAC;SAAM,IAAI,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC,EAAE,CAAC;QACtC,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAC5B,CAAC;IAED,MAAM,QAAQ,GAAG,MAAM,CAAC,CAAC,CAAC,GAAG,MAAM,GAAG,OAAO,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC;IAC1D,OAAO,OAAO,CAAC,CAAC,CAAC,IAAI,QAAQ,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC;AAC5C,CAAC;AAED;;;GAGG;AACH,SAAS,aAAa,CAAC,OAAe;IACrC,MAAM,KAAK,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;IAE/B,kCAAkC;IAClC,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IAC/B,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACrC,IAAI,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC;IAErC,yDAAyD;IACzD,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;QAC9B,IAAI,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,KAAK,IAAI,IAAI,KAAK,CAAC,QAAQ,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC;YAC1E,OAAO,IAAI,CAAC;QACb,CAAC;IACF,CAAC;IAED,OAAO,KAAK,CAAC;AACd,CAAC;AAED,gEAAgE;AAChE,SAAS,aAAa,CAAC,GAAW,EAAE,IAAY,EAAE,EAAiB,EAAE,OAAsB;IAC1F,IAAI,OAAiB,CAAC;IACtB,IAAI,CAAC;QACJ,OAAO,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC;IAC5B,CAAC;IAAC,MAAM,CAAC;QACR,sFAAsF;QACtF,OAAO;IACR,CAAC;IAED,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC7B,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;QAClC,MAAM,OAAO,GAAG,QAAQ,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;QACzC,MAAM,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;QAElC,IAAI,KAAY,CAAC;QACjB,IAAI,CAAC;YACJ,KAAK,GAAG,QAAQ,CAAC,QAAQ,CAAC,CAAC;QAC5B,CAAC;QAAC,MAAM,CAAC;YACR,kDAAkD;YAClD,SAAS;QACV,CAAC;QAED,IAAI,KAAK,CAAC,WAAW,EAAE,EAAE,CAAC;YACzB,uBAAuB;YACvB,IAAI,aAAa,CAAC,OAAO,CAAC;gBAAE,SAAS;YAErC,qDAAqD;YACrD,IAAI,EAAE,CAAC,OAAO,CAAC,GAAG,QAAQ,GAAG,CAAC;gBAAE,SAAS;YAEzC,2CAA2C;YAC3C,aAAa,CAAC,EAAE,EAAE,QAAQ,EAAE,IAAI,CAAC,CAAC;YAElC,aAAa,CAAC,QAAQ,EAAE,IAAI,EAAE,EAAE,EAAE,OAAO,CAAC,CAAC;YAC3C,SAAS;QACV,CAAC;QAED,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE;YAAE,SAAS;QAE9B,6BAA6B;QAC7B,IAAI,EAAE,CAAC,OAAO,CAAC,QAAQ,CAAC;YAAE,SAAS;QAEnC,YAAY;QACZ,IAAI,KAAK,CAAC,IAAI,GAAG,aAAa;YAAE,SAAS;QACzC,IAAI,KAAK,CAAC,IAAI,KAAK,CAAC;YAAE,SAAS;QAE/B,sBAAsB;QACtB,MAAM,QAAQ,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;QACvC,IAAI,CAAC,QAAQ;YAAE,SAAS;QAExB,OAAO,CAAC,IAAI,CAAC;YACZ,QAAQ,EAAE,QAAQ;YAClB,QAAQ;YACR,KAAK,EAAE,KAAK,CAAC,OAAO;SACpB,CAAC,CAAC;IACJ,CAAC;AACF,CAAC;AAED;;;;;;;;;GASG;AACH,SAAS,aAAa,CAAC,SAAiB,EAAE,WAAmB,EAAE,OAAsB,EAAE,aAAsB;IAC5G,IAAI,OAAiB,CAAC;IACtB,IAAI,CAAC;QACJ,OAAO,GAAG,WAAW,CAAC,SAAS,CAAC,CAAC;IAClC,CAAC;IAAC,MAAM,CAAC;QACR,4DAA4D;QAC5D,OAAO;IACR,CAAC;IAED,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC7B,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;QAExC,IAAI,KAAY,CAAC;QACjB,IAAI,CAAC;YACJ,KAAK,GAAG,QAAQ,CAAC,QAAQ,CAAC,CAAC;QAC5B,CAAC;QAAC,MAAM,CAAC;YACR,kDAAkD;YAClD,SAAS;QACV,CAAC;QAED,IAAI,KAAK,CAAC,WAAW,EAAE,EAAE,CAAC;YACzB,8BAA8B;YAC9B,aAAa,CAAC,QAAQ,EAAE,WAAW,EAAE,OAAO,EAAE,aAAa,IAAI,SAAS,CAAC,CAAC;YAC1E,SAAS;QACV,CAAC;QAED,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE;YAAE,SAAS;QAC9B,IAAI,KAAK,CAAC,IAAI,GAAG,aAAa;YAAE,SAAS;QACzC,IAAI,KAAK,CAAC,IAAI,KAAK,CAAC;YAAE,SAAS;QAE/B,MAAM,QAAQ,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;QACvC,IAAI,CAAC,QAAQ;YAAE,SAAS;QAExB,0EAA0E;QAC1E,4EAA4E;QAC5E,MAAM,GAAG,GAAG,QAAQ,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC;QAC5C,MAAM,gBAAgB,GAAG,GAAG,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,UAAU,CAAC,GAAG,CAAC,CAAC;QACjE,MAAM,aAAa,GAAG,aAAa,IAAI,SAAS,CAAC;QACjD,MAAM,QAAQ,GAAG,gBAAgB,CAAC,CAAC,CAAC,WAAW,QAAQ,CAAC,aAAa,EAAE,QAAQ,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;QAEzF,OAAO,CAAC,IAAI,CAAC;YACZ,QAAQ,EAAE,OAAO,CAAC,QAAQ,CAAC;YAC3B,QAAQ;YACR,KAAK,EAAE,KAAK,CAAC,OAAO;SACpB,CAAC,CAAC;IACJ,CAAC;AACF,CAAC","sourcesContent":["/**\n * File scanner for the semantic search subsystem.\n *\n * Discovers project files for indexing by walking the directory tree,\n * respecting .gitignore rules, and classifying files by type.\n */\n\nimport { existsSync, readdirSync, readFileSync, type Stats, statSync } from \"node:fs\";\nimport { homedir } from \"node:os\";\nimport { extname, isAbsolute, join, relative, sep } from \"node:path\";\nimport ignore from \"ignore\";\nimport type { FileType } from \"./types.js\";\n\n// ============================================================================\n// Public types\n// ============================================================================\n\n/** A file discovered by the scanner, ready for indexing. */\nexport interface ScannedFile {\n\t/** Path relative to the project root (posix separators). */\n\tfilePath: string;\n\t/** Detected file type. */\n\tfileType: FileType;\n\t/** File modification time in milliseconds since epoch. */\n\tmtime: number;\n}\n\n// ============================================================================\n// Constants\n// ============================================================================\n\n/** Maximum file size to index (1 MB). */\nconst MAX_FILE_SIZE = 1024 * 1024;\n\n/** Directories unconditionally skipped during traversal. */\nconst SKIP_DIRS = new Set([\n\t\"node_modules\",\n\t\".git\",\n\t\".dreb/index\",\n\t\".hg\",\n\t\".svn\",\n\t\"__pycache__\",\n\t\".tox\",\n\t\".venv\",\n\t\"dist\",\n\t\"build\",\n\t\".next\",\n\t\".nuxt\",\n\t\"coverage\",\n\t\".cache\",\n]);\n\n/** Extension → FileType mapping. */\nconst EXTENSION_MAP: ReadonlyMap<string, FileType> = new Map<string, FileType>([\n\t// Tree-sitter languages\n\t[\".ts\", \"typescript\"],\n\t[\".tsx\", \"tsx\"],\n\t[\".js\", \"javascript\"],\n\t[\".mjs\", \"javascript\"],\n\t[\".cjs\", \"javascript\"],\n\t[\".py\", \"python\"],\n\t[\".go\", \"go\"],\n\t[\".rs\", \"rust\"],\n\t[\".java\", \"java\"],\n\t[\".c\", \"c\"],\n\t[\".h\", \"c\"],\n\t[\".cpp\", \"cpp\"],\n\t[\".hpp\", \"cpp\"],\n\t[\".cc\", \"cpp\"],\n\t[\".cxx\", \"cpp\"],\n\t[\".hh\", \"cpp\"],\n\t[\".hxx\", \"cpp\"],\n\t// Text file types\n\t[\".md\", \"markdown\"],\n\t[\".mdx\", \"markdown\"],\n\t[\".yml\", \"yaml\"],\n\t[\".yaml\", \"yaml\"],\n\t[\".json\", \"json\"],\n\t[\".toml\", \"toml\"],\n\t[\".txt\", \"plaintext\"],\n\t[\".cfg\", \"plaintext\"],\n\t[\".ini\", \"plaintext\"],\n\t[\".env\", \"plaintext\"],\n\t[\".conf\", \"plaintext\"],\n]);\n\n// ============================================================================\n// Public API\n// ============================================================================\n\n/**\n * Detect the {@link FileType} for a file path based on its extension.\n * Returns `null` for unrecognized extensions or files without an extension.\n */\nexport function detectFileType(filePath: string): FileType | null {\n\tconst ext = extname(filePath).toLowerCase();\n\tif (!ext) return null;\n\treturn EXTENSION_MAP.get(ext) ?? null;\n}\n\n/**\n * Scan a project directory and return all indexable files.\n *\n * Walks the tree rooted at {@link projectRoot}, respects `.gitignore` rules,\n * skips binary / oversized files, and optionally includes memory files from\n * a global memory directory.\n */\nexport async function scanProject(\n\tprojectRoot: string,\n\tglobalMemoryDir?: string,\n\tvisibleDirs?: string[],\n): Promise<ScannedFile[]> {\n\tconst results: ScannedFile[] = [];\n\n\t// Detect if projectRoot is the home directory — use shallow scan mode\n\t// to avoid recursing into the entire home dir (which would be catastrophic).\n\tconst isHomeDir = isHomeDirPath(projectRoot);\n\n\tif (isHomeDir) {\n\t\t// Shallow mode: only scan top-level files and ~/.dreb/memory/\n\t\tscanShallow(projectRoot, results);\n\t} else {\n\t\t// Normal mode: full recursive walk with .gitignore\n\t\tconst ig = ignore();\n\t\tloadGitignore(ig, projectRoot, projectRoot);\n\t\twalkDirectory(projectRoot, projectRoot, ig, results);\n\t}\n\n\t// Include tool-visible .dreb/ subdirs (bypasses gitignore).\n\t// In home dir mode, global memory is already handled separately below,\n\t// and we don't want to double-scan ~/.dreb/memory/.\n\tif (!isHomeDir) {\n\t\tfor (const dir of visibleDirs ?? []) {\n\t\t\tscanMemoryDir(dir, projectRoot, results);\n\t\t}\n\t}\n\n\t// Include global memory files if the directory exists\n\tif (globalMemoryDir && existsSync(globalMemoryDir)) {\n\t\tscanMemoryDir(globalMemoryDir, projectRoot, results);\n\t}\n\n\treturn results;\n}\n\n/** Check if a path is the user's home directory. */\nfunction isHomeDirPath(dir: string): boolean {\n\ttry {\n\t\tconst home = homedir();\n\t\t// Normalize trailing slashes for comparison\n\t\tconst normalizedDir = dir.replace(/[/\\\\]+$/, \"\");\n\t\tconst normalizedHome = home.replace(/[/\\\\]+$/, \"\");\n\t\treturn normalizedDir === normalizedHome;\n\t} catch {\n\t\t/* os.homedir() can throw in sandboxed/unusual environments */\n\t\treturn false;\n\t}\n}\n\n/**\n * Shallow scan mode for home directory: only index top-level files\n * (no directory recursion) to avoid scanning the entire home directory.\n * Memory files are handled separately via scanMemoryDir.\n */\nfunction scanShallow(dir: string, results: ScannedFile[]): void {\n\tlet entries: string[];\n\ttry {\n\t\tentries = readdirSync(dir);\n\t} catch {\n\t\t/* Directory unreadable (EACCES, ENOENT) — abandon shallow scan */\n\t\treturn;\n\t}\n\n\tfor (const entry of entries) {\n\t\t// Skip dotfiles/dotdirs in home dir (except specific ones we want)\n\t\tif (entry.startsWith(\".\")) continue;\n\n\t\tconst fullPath = join(dir, entry);\n\n\t\tlet stats: Stats;\n\t\ttry {\n\t\t\tstats = statSync(fullPath);\n\t\t} catch {\n\t\t\t/* Broken symlink or race deletion — skip entry */\n\t\t\tcontinue;\n\t\t}\n\n\t\t// Only index files, not directories (shallow mode)\n\t\tif (!stats.isFile()) continue;\n\t\tif (stats.size > MAX_FILE_SIZE) continue;\n\t\tif (stats.size === 0) continue;\n\n\t\tconst fileType = detectFileType(entry);\n\t\tif (!fileType) continue;\n\n\t\tresults.push({\n\t\t\tfilePath: entry,\n\t\t\tfileType,\n\t\t\tmtime: stats.mtimeMs,\n\t\t});\n\t}\n}\n\n// ============================================================================\n// Internal helpers\n// ============================================================================\n\ntype IgnoreMatcher = ReturnType<typeof ignore>;\n\n/** Convert an OS path to posix separators for ignore matching. */\nfunction toPosix(p: string): string {\n\treturn p.split(sep).join(\"/\");\n}\n\n/** Load .gitignore rules from a directory into the ignore matcher. */\nfunction loadGitignore(ig: IgnoreMatcher, dir: string, root: string): void {\n\tconst gitignorePath = join(dir, \".gitignore\");\n\tif (!existsSync(gitignorePath)) return;\n\n\ttry {\n\t\tconst content = readFileSync(gitignorePath, \"utf-8\");\n\t\tconst relDir = relative(root, dir);\n\t\tconst prefix = relDir ? `${toPosix(relDir)}/` : \"\";\n\n\t\tconst patterns = content\n\t\t\t.split(/\\r?\\n/)\n\t\t\t.map((line) => prefixPattern(line, prefix))\n\t\t\t.filter((line): line is string => line !== null);\n\n\t\tif (patterns.length > 0) {\n\t\t\tig.add(patterns);\n\t\t}\n\t} catch {\n\t\t// Unreadable .gitignore — skip silently\n\t}\n}\n\n/**\n * Prefix a .gitignore pattern with a directory path so it applies\n * correctly when matching against root-relative paths.\n */\nfunction prefixPattern(line: string, prefix: string): string | null {\n\tconst trimmed = line.trim();\n\tif (!trimmed) return null;\n\tif (trimmed.startsWith(\"#\") && !trimmed.startsWith(\"\\\\#\")) return null;\n\n\tlet pattern = line;\n\tlet negated = false;\n\n\tif (pattern.startsWith(\"!\")) {\n\t\tnegated = true;\n\t\tpattern = pattern.slice(1);\n\t} else if (pattern.startsWith(\"\\\\!\")) {\n\t\tpattern = pattern.slice(1);\n\t}\n\n\tconst prefixed = prefix ? `${prefix}${pattern}` : pattern;\n\treturn negated ? `!${prefixed}` : prefixed;\n}\n\n/**\n * Check if a directory component (relative to root) should be unconditionally skipped.\n * Handles both top-level names (\"node_modules\") and nested paths (\".dreb/index\").\n */\nfunction shouldSkipDir(relPath: string): boolean {\n\tconst posix = toPosix(relPath);\n\n\t// Check the directory name itself\n\tconst parts = posix.split(\"/\");\n\tconst name = parts[parts.length - 1];\n\tif (SKIP_DIRS.has(name)) return true;\n\n\t// Check multi-segment skip patterns (e.g. \".dreb/index\")\n\tfor (const skip of SKIP_DIRS) {\n\t\tif (skip.includes(\"/\") && (posix === skip || posix.endsWith(`/${skip}`))) {\n\t\t\treturn true;\n\t\t}\n\t}\n\n\treturn false;\n}\n\n/** Recursively walk a directory, collecting indexable files. */\nfunction walkDirectory(dir: string, root: string, ig: IgnoreMatcher, results: ScannedFile[]): void {\n\tlet entries: string[];\n\ttry {\n\t\tentries = readdirSync(dir);\n\t} catch {\n\t\t/* Directory unreadable (permission denied, etc.) — stop recursion into this branch */\n\t\treturn;\n\t}\n\n\tfor (const entry of entries) {\n\t\tconst fullPath = join(dir, entry);\n\t\tconst relPath = relative(root, fullPath);\n\t\tconst posixRel = toPosix(relPath);\n\n\t\tlet stats: Stats;\n\t\ttry {\n\t\t\tstats = statSync(fullPath);\n\t\t} catch {\n\t\t\t/* Broken symlink or race deletion — skip entry */\n\t\t\tcontinue;\n\t\t}\n\n\t\tif (stats.isDirectory()) {\n\t\t\t// Hard-coded skip list\n\t\t\tif (shouldSkipDir(relPath)) continue;\n\n\t\t\t// .gitignore check (directories need trailing slash)\n\t\t\tif (ig.ignores(`${posixRel}/`)) continue;\n\n\t\t\t// Load nested .gitignore before descending\n\t\t\tloadGitignore(ig, fullPath, root);\n\n\t\t\twalkDirectory(fullPath, root, ig, results);\n\t\t\tcontinue;\n\t\t}\n\n\t\tif (!stats.isFile()) continue;\n\n\t\t// .gitignore check for files\n\t\tif (ig.ignores(posixRel)) continue;\n\n\t\t// Size gate\n\t\tif (stats.size > MAX_FILE_SIZE) continue;\n\t\tif (stats.size === 0) continue;\n\n\t\t// File type detection\n\t\tconst fileType = detectFileType(entry);\n\t\tif (!fileType) continue;\n\n\t\tresults.push({\n\t\t\tfilePath: posixRel,\n\t\t\tfileType,\n\t\t\tmtime: stats.mtimeMs,\n\t\t});\n\t}\n}\n\n/**\n * Scan a memory directory (project or global) for indexable files.\n *\n * Memory directories are always fully included — no .gitignore filtering —\n * because they live outside the normal project tree or in `.dreb/` which\n * is typically gitignored.\n *\n * Paths for global memory files are stored with a `~memory/` prefix\n * to distinguish them from project files.\n */\nfunction scanMemoryDir(memoryDir: string, projectRoot: string, results: ScannedFile[], baseMemoryDir?: string): void {\n\tlet entries: string[];\n\ttry {\n\t\tentries = readdirSync(memoryDir);\n\t} catch {\n\t\t/* Memory directory doesn't exist or is unreadable — skip */\n\t\treturn;\n\t}\n\n\tfor (const entry of entries) {\n\t\tconst fullPath = join(memoryDir, entry);\n\n\t\tlet stats: Stats;\n\t\ttry {\n\t\t\tstats = statSync(fullPath);\n\t\t} catch {\n\t\t\t/* Broken symlink or race deletion — skip entry */\n\t\t\tcontinue;\n\t\t}\n\n\t\tif (stats.isDirectory()) {\n\t\t\t// Recurse into subdirectories\n\t\t\tscanMemoryDir(fullPath, projectRoot, results, baseMemoryDir ?? memoryDir);\n\t\t\tcontinue;\n\t\t}\n\n\t\tif (!stats.isFile()) continue;\n\t\tif (stats.size > MAX_FILE_SIZE) continue;\n\t\tif (stats.size === 0) continue;\n\n\t\tconst fileType = detectFileType(entry);\n\t\tif (!fileType) continue;\n\n\t\t// If the memory dir is inside the project root, use normal relative path.\n\t\t// Otherwise, use a ~memory/ prefix so paths remain unique and identifiable.\n\t\tconst rel = relative(projectRoot, fullPath);\n\t\tconst isOutsideProject = rel.startsWith(\"..\") || isAbsolute(rel);\n\t\tconst rootMemoryDir = baseMemoryDir ?? memoryDir;\n\t\tconst filePath = isOutsideProject ? `~memory/${relative(rootMemoryDir, fullPath)}` : rel;\n\n\t\tresults.push({\n\t\t\tfilePath: toPosix(filePath),\n\t\t\tfileType,\n\t\t\tmtime: stats.mtimeMs,\n\t\t});\n\t}\n}\n"]}
|
|
1
|
+
{"version":3,"file":"scanner.js","sourceRoot":"","sources":["../src/scanner.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,UAAU,EAAE,WAAW,EAAE,YAAY,EAAc,QAAQ,EAAE,MAAM,SAAS,CAAC;AACtF,OAAO,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAClC,OAAO,EAAE,OAAO,EAAE,UAAU,EAAE,IAAI,EAAE,QAAQ,EAAE,GAAG,EAAE,MAAM,WAAW,CAAC;AACrE,OAAO,MAAM,MAAM,QAAQ,CAAC;AAiB5B,+EAA+E;AAC/E,YAAY;AACZ,+EAA+E;AAE/E,yCAAyC;AACzC,MAAM,aAAa,GAAG,IAAI,GAAG,IAAI,CAAC;AAElC,4DAA4D;AAC5D,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC;IACzB,cAAc;IACd,MAAM;IACN,aAAa;IACb,KAAK;IACL,MAAM;IACN,aAAa;IACb,MAAM;IACN,OAAO;IACP,MAAM;IACN,OAAO;IACP,OAAO;IACP,OAAO;IACP,UAAU;IACV,QAAQ;CACR,CAAC,CAAC;AAEH,oCAAoC;AACpC,MAAM,aAAa,GAAkC,IAAI,GAAG,CAAmB;IAC9E,wBAAwB;IACxB,CAAC,KAAK,EAAE,YAAY,CAAC;IACrB,CAAC,MAAM,EAAE,KAAK,CAAC;IACf,CAAC,KAAK,EAAE,YAAY,CAAC;IACrB,CAAC,MAAM,EAAE,YAAY,CAAC;IACtB,CAAC,MAAM,EAAE,YAAY,CAAC;IACtB,CAAC,KAAK,EAAE,QAAQ,CAAC;IACjB,CAAC,KAAK,EAAE,IAAI,CAAC;IACb,CAAC,KAAK,EAAE,MAAM,CAAC;IACf,CAAC,OAAO,EAAE,MAAM,CAAC;IACjB,CAAC,IAAI,EAAE,GAAG,CAAC;IACX,CAAC,IAAI,EAAE,GAAG,CAAC;IACX,CAAC,MAAM,EAAE,KAAK,CAAC;IACf,CAAC,MAAM,EAAE,KAAK,CAAC;IACf,CAAC,KAAK,EAAE,KAAK,CAAC;IACd,CAAC,MAAM,EAAE,KAAK,CAAC;IACf,CAAC,KAAK,EAAE,KAAK,CAAC;IACd,CAAC,MAAM,EAAE,KAAK,CAAC;IACf,CAAC,KAAK,EAAE,UAAU,CAAC;IACnB,yBAAyB;IACzB,CAAC,OAAO,EAAE,WAAW,CAAC;IACtB,CAAC,OAAO,EAAE,WAAW,CAAC;IACtB,CAAC,QAAQ,EAAE,WAAW,CAAC;IACvB,kBAAkB;IAClB,CAAC,KAAK,EAAE,UAAU,CAAC;IACnB,CAAC,MAAM,EAAE,UAAU,CAAC;IACpB,CAAC,MAAM,EAAE,MAAM,CAAC;IAChB,CAAC,OAAO,EAAE,MAAM,CAAC;IACjB,CAAC,OAAO,EAAE,MAAM,CAAC;IACjB,CAAC,OAAO,EAAE,MAAM,CAAC;IACjB,CAAC,MAAM,EAAE,WAAW,CAAC;IACrB,CAAC,MAAM,EAAE,WAAW,CAAC;IACrB,CAAC,MAAM,EAAE,WAAW,CAAC;IACrB,CAAC,MAAM,EAAE,WAAW,CAAC;IACrB,CAAC,OAAO,EAAE,WAAW,CAAC;CACtB,CAAC,CAAC;AAEH,+EAA+E;AAC/E,aAAa;AACb,+EAA+E;AAE/E;;;GAGG;AACH,MAAM,UAAU,cAAc,CAAC,QAAgB;IAC9C,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;IAC5C,IAAI,CAAC,GAAG;QAAE,OAAO,IAAI,CAAC;IACtB,OAAO,aAAa,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC;AACvC,CAAC;AAED;;;;;;GAMG;AACH,MAAM,CAAC,KAAK,UAAU,WAAW,CAChC,WAAmB,EACnB,eAAwB,EACxB,WAAsB;IAEtB,MAAM,OAAO,GAAkB,EAAE,CAAC;IAElC,sEAAsE;IACtE,6EAA6E;IAC7E,MAAM,SAAS,GAAG,aAAa,CAAC,WAAW,CAAC,CAAC;IAE7C,IAAI,SAAS,EAAE,CAAC;QACf,8DAA8D;QAC9D,WAAW,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC;IACnC,CAAC;SAAM,CAAC;QACP,mDAAmD;QACnD,MAAM,EAAE,GAAG,MAAM,EAAE,CAAC;QACpB,aAAa,CAAC,EAAE,EAAE,WAAW,EAAE,WAAW,CAAC,CAAC;QAC5C,aAAa,CAAC,WAAW,EAAE,WAAW,EAAE,EAAE,EAAE,OAAO,CAAC,CAAC;IACtD,CAAC;IAED,4DAA4D;IAC5D,uEAAuE;IACvE,oDAAoD;IACpD,IAAI,CAAC,SAAS,EAAE,CAAC;QAChB,KAAK,MAAM,GAAG,IAAI,WAAW,IAAI,EAAE,EAAE,CAAC;YACrC,aAAa,CAAC,GAAG,EAAE,WAAW,EAAE,OAAO,CAAC,CAAC;QAC1C,CAAC;IACF,CAAC;IAED,sDAAsD;IACtD,IAAI,eAAe,IAAI,UAAU,CAAC,eAAe,CAAC,EAAE,CAAC;QACpD,aAAa,CAAC,eAAe,EAAE,WAAW,EAAE,OAAO,CAAC,CAAC;IACtD,CAAC;IAED,OAAO,OAAO,CAAC;AAChB,CAAC;AAED,oDAAoD;AACpD,SAAS,aAAa,CAAC,GAAW;IACjC,IAAI,CAAC;QACJ,MAAM,IAAI,GAAG,OAAO,EAAE,CAAC;QACvB,4CAA4C;QAC5C,MAAM,aAAa,GAAG,GAAG,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;QACjD,MAAM,cAAc,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;QACnD,OAAO,aAAa,KAAK,cAAc,CAAC;IACzC,CAAC;IAAC,MAAM,CAAC;QACR,8DAA8D;QAC9D,OAAO,KAAK,CAAC;IACd,CAAC;AACF,CAAC;AAED;;;;GAIG;AACH,SAAS,WAAW,CAAC,GAAW,EAAE,OAAsB;IACvD,IAAI,OAAiB,CAAC;IACtB,IAAI,CAAC;QACJ,OAAO,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC;IAC5B,CAAC;IAAC,MAAM,CAAC;QACR,kEAAkE;QAClE,OAAO;IACR,CAAC;IAED,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC7B,mEAAmE;QACnE,IAAI,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC;YAAE,SAAS;QAEpC,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;QAElC,IAAI,KAAY,CAAC;QACjB,IAAI,CAAC;YACJ,KAAK,GAAG,QAAQ,CAAC,QAAQ,CAAC,CAAC;QAC5B,CAAC;QAAC,MAAM,CAAC;YACR,kDAAkD;YAClD,SAAS;QACV,CAAC;QAED,mDAAmD;QACnD,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE;YAAE,SAAS;QAC9B,IAAI,KAAK,CAAC,IAAI,GAAG,aAAa;YAAE,SAAS;QACzC,IAAI,KAAK,CAAC,IAAI,KAAK,CAAC;YAAE,SAAS;QAE/B,MAAM,QAAQ,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;QACvC,IAAI,CAAC,QAAQ;YAAE,SAAS;QAExB,OAAO,CAAC,IAAI,CAAC;YACZ,QAAQ,EAAE,KAAK;YACf,QAAQ;YACR,KAAK,EAAE,KAAK,CAAC,OAAO;SACpB,CAAC,CAAC;IACJ,CAAC;AACF,CAAC;AAQD,kEAAkE;AAClE,SAAS,OAAO,CAAC,CAAS;IACzB,OAAO,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAC/B,CAAC;AAED,sEAAsE;AACtE,SAAS,aAAa,CAAC,EAAiB,EAAE,GAAW,EAAE,IAAY;IAClE,MAAM,aAAa,GAAG,IAAI,CAAC,GAAG,EAAE,YAAY,CAAC,CAAC;IAC9C,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC;QAAE,OAAO;IAEvC,IAAI,CAAC;QACJ,MAAM,OAAO,GAAG,YAAY,CAAC,aAAa,EAAE,OAAO,CAAC,CAAC;QACrD,MAAM,MAAM,GAAG,QAAQ,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;QACnC,MAAM,MAAM,GAAG,MAAM,CAAC,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;QAEnD,MAAM,QAAQ,GAAG,OAAO;aACtB,KAAK,CAAC,OAAO,CAAC;aACd,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,aAAa,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;aAC1C,MAAM,CAAC,CAAC,IAAI,EAAkB,EAAE,CAAC,IAAI,KAAK,IAAI,CAAC,CAAC;QAElD,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACzB,EAAE,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAClB,CAAC;IACF,CAAC;IAAC,MAAM,CAAC;QACR,wCAAwC;IACzC,CAAC;AACF,CAAC;AAED;;;GAGG;AACH,SAAS,aAAa,CAAC,IAAY,EAAE,MAAc;IAClD,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAC5B,IAAI,CAAC,OAAO;QAAE,OAAO,IAAI,CAAC;IAC1B,IAAI,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAEvE,IAAI,OAAO,GAAG,IAAI,CAAC;IACnB,IAAI,OAAO,GAAG,KAAK,CAAC;IAEpB,IAAI,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;QAC7B,OAAO,GAAG,IAAI,CAAC;QACf,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAC5B,CAAC;SAAM,IAAI,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC,EAAE,CAAC;QACtC,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAC5B,CAAC;IAED,MAAM,QAAQ,GAAG,MAAM,CAAC,CAAC,CAAC,GAAG,MAAM,GAAG,OAAO,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC;IAC1D,OAAO,OAAO,CAAC,CAAC,CAAC,IAAI,QAAQ,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC;AAC5C,CAAC;AAED;;;GAGG;AACH,SAAS,aAAa,CAAC,OAAe;IACrC,MAAM,KAAK,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;IAE/B,kCAAkC;IAClC,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IAC/B,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACrC,IAAI,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC;IAErC,yDAAyD;IACzD,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;QAC9B,IAAI,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,KAAK,IAAI,IAAI,KAAK,CAAC,QAAQ,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC;YAC1E,OAAO,IAAI,CAAC;QACb,CAAC;IACF,CAAC;IAED,OAAO,KAAK,CAAC;AACd,CAAC;AAED,gEAAgE;AAChE,SAAS,aAAa,CAAC,GAAW,EAAE,IAAY,EAAE,EAAiB,EAAE,OAAsB;IAC1F,IAAI,OAAiB,CAAC;IACtB,IAAI,CAAC;QACJ,OAAO,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC;IAC5B,CAAC;IAAC,MAAM,CAAC;QACR,sFAAsF;QACtF,OAAO;IACR,CAAC;IAED,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC7B,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;QAClC,MAAM,OAAO,GAAG,QAAQ,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;QACzC,MAAM,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;QAElC,IAAI,KAAY,CAAC;QACjB,IAAI,CAAC;YACJ,KAAK,GAAG,QAAQ,CAAC,QAAQ,CAAC,CAAC;QAC5B,CAAC;QAAC,MAAM,CAAC;YACR,kDAAkD;YAClD,SAAS;QACV,CAAC;QAED,IAAI,KAAK,CAAC,WAAW,EAAE,EAAE,CAAC;YACzB,uBAAuB;YACvB,IAAI,aAAa,CAAC,OAAO,CAAC;gBAAE,SAAS;YAErC,qDAAqD;YACrD,IAAI,EAAE,CAAC,OAAO,CAAC,GAAG,QAAQ,GAAG,CAAC;gBAAE,SAAS;YAEzC,2CAA2C;YAC3C,aAAa,CAAC,EAAE,EAAE,QAAQ,EAAE,IAAI,CAAC,CAAC;YAElC,aAAa,CAAC,QAAQ,EAAE,IAAI,EAAE,EAAE,EAAE,OAAO,CAAC,CAAC;YAC3C,SAAS;QACV,CAAC;QAED,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE;YAAE,SAAS;QAE9B,6BAA6B;QAC7B,IAAI,EAAE,CAAC,OAAO,CAAC,QAAQ,CAAC;YAAE,SAAS;QAEnC,YAAY;QACZ,IAAI,KAAK,CAAC,IAAI,GAAG,aAAa;YAAE,SAAS;QACzC,IAAI,KAAK,CAAC,IAAI,KAAK,CAAC;YAAE,SAAS;QAE/B,sBAAsB;QACtB,MAAM,QAAQ,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;QACvC,IAAI,CAAC,QAAQ;YAAE,SAAS;QAExB,OAAO,CAAC,IAAI,CAAC;YACZ,QAAQ,EAAE,QAAQ;YAClB,QAAQ;YACR,KAAK,EAAE,KAAK,CAAC,OAAO;SACpB,CAAC,CAAC;IACJ,CAAC;AACF,CAAC;AAED;;;;;;;;;GASG;AACH,SAAS,aAAa,CAAC,SAAiB,EAAE,WAAmB,EAAE,OAAsB,EAAE,aAAsB;IAC5G,IAAI,OAAiB,CAAC;IACtB,IAAI,CAAC;QACJ,OAAO,GAAG,WAAW,CAAC,SAAS,CAAC,CAAC;IAClC,CAAC;IAAC,MAAM,CAAC;QACR,4DAA4D;QAC5D,OAAO;IACR,CAAC;IAED,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC7B,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;QAExC,IAAI,KAAY,CAAC;QACjB,IAAI,CAAC;YACJ,KAAK,GAAG,QAAQ,CAAC,QAAQ,CAAC,CAAC;QAC5B,CAAC;QAAC,MAAM,CAAC;YACR,kDAAkD;YAClD,SAAS;QACV,CAAC;QAED,IAAI,KAAK,CAAC,WAAW,EAAE,EAAE,CAAC;YACzB,8BAA8B;YAC9B,aAAa,CAAC,QAAQ,EAAE,WAAW,EAAE,OAAO,EAAE,aAAa,IAAI,SAAS,CAAC,CAAC;YAC1E,SAAS;QACV,CAAC;QAED,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE;YAAE,SAAS;QAC9B,IAAI,KAAK,CAAC,IAAI,GAAG,aAAa;YAAE,SAAS;QACzC,IAAI,KAAK,CAAC,IAAI,KAAK,CAAC;YAAE,SAAS;QAE/B,MAAM,QAAQ,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;QACvC,IAAI,CAAC,QAAQ;YAAE,SAAS;QAExB,0EAA0E;QAC1E,4EAA4E;QAC5E,MAAM,GAAG,GAAG,QAAQ,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC;QAC5C,MAAM,gBAAgB,GAAG,GAAG,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,UAAU,CAAC,GAAG,CAAC,CAAC;QACjE,MAAM,aAAa,GAAG,aAAa,IAAI,SAAS,CAAC;QACjD,MAAM,QAAQ,GAAG,gBAAgB,CAAC,CAAC,CAAC,WAAW,QAAQ,CAAC,aAAa,EAAE,QAAQ,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;QAEzF,OAAO,CAAC,IAAI,CAAC;YACZ,QAAQ,EAAE,OAAO,CAAC,QAAQ,CAAC;YAC3B,QAAQ;YACR,KAAK,EAAE,KAAK,CAAC,OAAO;SACpB,CAAC,CAAC;IACJ,CAAC;AACF,CAAC","sourcesContent":["/**\n * File scanner for the semantic search subsystem.\n *\n * Discovers project files for indexing by walking the directory tree,\n * respecting .gitignore rules, and classifying files by type.\n */\n\nimport { existsSync, readdirSync, readFileSync, type Stats, statSync } from \"node:fs\";\nimport { homedir } from \"node:os\";\nimport { extname, isAbsolute, join, relative, sep } from \"node:path\";\nimport ignore from \"ignore\";\nimport type { FileType } from \"./types.js\";\n\n// ============================================================================\n// Public types\n// ============================================================================\n\n/** A file discovered by the scanner, ready for indexing. */\nexport interface ScannedFile {\n\t/** Path relative to the project root (posix separators). */\n\tfilePath: string;\n\t/** Detected file type. */\n\tfileType: FileType;\n\t/** File modification time in milliseconds since epoch. */\n\tmtime: number;\n}\n\n// ============================================================================\n// Constants\n// ============================================================================\n\n/** Maximum file size to index (1 MB). */\nconst MAX_FILE_SIZE = 1024 * 1024;\n\n/** Directories unconditionally skipped during traversal. */\nconst SKIP_DIRS = new Set([\n\t\"node_modules\",\n\t\".git\",\n\t\".dreb/index\",\n\t\".hg\",\n\t\".svn\",\n\t\"__pycache__\",\n\t\".tox\",\n\t\".venv\",\n\t\"dist\",\n\t\"build\",\n\t\".next\",\n\t\".nuxt\",\n\t\"coverage\",\n\t\".cache\",\n]);\n\n/** Extension → FileType mapping. */\nconst EXTENSION_MAP: ReadonlyMap<string, FileType> = new Map<string, FileType>([\n\t// Tree-sitter languages\n\t[\".ts\", \"typescript\"],\n\t[\".tsx\", \"tsx\"],\n\t[\".js\", \"javascript\"],\n\t[\".mjs\", \"javascript\"],\n\t[\".cjs\", \"javascript\"],\n\t[\".py\", \"python\"],\n\t[\".go\", \"go\"],\n\t[\".rs\", \"rust\"],\n\t[\".java\", \"java\"],\n\t[\".c\", \"c\"],\n\t[\".h\", \"c\"],\n\t[\".cpp\", \"cpp\"],\n\t[\".hpp\", \"cpp\"],\n\t[\".cc\", \"cpp\"],\n\t[\".cxx\", \"cpp\"],\n\t[\".hh\", \"cpp\"],\n\t[\".hxx\", \"cpp\"],\n\t[\".gd\", \"gdscript\"],\n\t// Godot text-based files\n\t[\".tscn\", \"plaintext\"],\n\t[\".tres\", \"plaintext\"],\n\t[\".godot\", \"plaintext\"],\n\t// Text file types\n\t[\".md\", \"markdown\"],\n\t[\".mdx\", \"markdown\"],\n\t[\".yml\", \"yaml\"],\n\t[\".yaml\", \"yaml\"],\n\t[\".json\", \"json\"],\n\t[\".toml\", \"toml\"],\n\t[\".txt\", \"plaintext\"],\n\t[\".cfg\", \"plaintext\"],\n\t[\".ini\", \"plaintext\"],\n\t[\".env\", \"plaintext\"],\n\t[\".conf\", \"plaintext\"],\n]);\n\n// ============================================================================\n// Public API\n// ============================================================================\n\n/**\n * Detect the {@link FileType} for a file path based on its extension.\n * Returns `null` for unrecognized extensions or files without an extension.\n */\nexport function detectFileType(filePath: string): FileType | null {\n\tconst ext = extname(filePath).toLowerCase();\n\tif (!ext) return null;\n\treturn EXTENSION_MAP.get(ext) ?? null;\n}\n\n/**\n * Scan a project directory and return all indexable files.\n *\n * Walks the tree rooted at {@link projectRoot}, respects `.gitignore` rules,\n * skips binary / oversized files, and optionally includes memory files from\n * a global memory directory.\n */\nexport async function scanProject(\n\tprojectRoot: string,\n\tglobalMemoryDir?: string,\n\tvisibleDirs?: string[],\n): Promise<ScannedFile[]> {\n\tconst results: ScannedFile[] = [];\n\n\t// Detect if projectRoot is the home directory — use shallow scan mode\n\t// to avoid recursing into the entire home dir (which would be catastrophic).\n\tconst isHomeDir = isHomeDirPath(projectRoot);\n\n\tif (isHomeDir) {\n\t\t// Shallow mode: only scan top-level files and ~/.dreb/memory/\n\t\tscanShallow(projectRoot, results);\n\t} else {\n\t\t// Normal mode: full recursive walk with .gitignore\n\t\tconst ig = ignore();\n\t\tloadGitignore(ig, projectRoot, projectRoot);\n\t\twalkDirectory(projectRoot, projectRoot, ig, results);\n\t}\n\n\t// Include tool-visible .dreb/ subdirs (bypasses gitignore).\n\t// In home dir mode, global memory is already handled separately below,\n\t// and we don't want to double-scan ~/.dreb/memory/.\n\tif (!isHomeDir) {\n\t\tfor (const dir of visibleDirs ?? []) {\n\t\t\tscanMemoryDir(dir, projectRoot, results);\n\t\t}\n\t}\n\n\t// Include global memory files if the directory exists\n\tif (globalMemoryDir && existsSync(globalMemoryDir)) {\n\t\tscanMemoryDir(globalMemoryDir, projectRoot, results);\n\t}\n\n\treturn results;\n}\n\n/** Check if a path is the user's home directory. */\nfunction isHomeDirPath(dir: string): boolean {\n\ttry {\n\t\tconst home = homedir();\n\t\t// Normalize trailing slashes for comparison\n\t\tconst normalizedDir = dir.replace(/[/\\\\]+$/, \"\");\n\t\tconst normalizedHome = home.replace(/[/\\\\]+$/, \"\");\n\t\treturn normalizedDir === normalizedHome;\n\t} catch {\n\t\t/* os.homedir() can throw in sandboxed/unusual environments */\n\t\treturn false;\n\t}\n}\n\n/**\n * Shallow scan mode for home directory: only index top-level files\n * (no directory recursion) to avoid scanning the entire home directory.\n * Memory files are handled separately via scanMemoryDir.\n */\nfunction scanShallow(dir: string, results: ScannedFile[]): void {\n\tlet entries: string[];\n\ttry {\n\t\tentries = readdirSync(dir);\n\t} catch {\n\t\t/* Directory unreadable (EACCES, ENOENT) — abandon shallow scan */\n\t\treturn;\n\t}\n\n\tfor (const entry of entries) {\n\t\t// Skip dotfiles/dotdirs in home dir (except specific ones we want)\n\t\tif (entry.startsWith(\".\")) continue;\n\n\t\tconst fullPath = join(dir, entry);\n\n\t\tlet stats: Stats;\n\t\ttry {\n\t\t\tstats = statSync(fullPath);\n\t\t} catch {\n\t\t\t/* Broken symlink or race deletion — skip entry */\n\t\t\tcontinue;\n\t\t}\n\n\t\t// Only index files, not directories (shallow mode)\n\t\tif (!stats.isFile()) continue;\n\t\tif (stats.size > MAX_FILE_SIZE) continue;\n\t\tif (stats.size === 0) continue;\n\n\t\tconst fileType = detectFileType(entry);\n\t\tif (!fileType) continue;\n\n\t\tresults.push({\n\t\t\tfilePath: entry,\n\t\t\tfileType,\n\t\t\tmtime: stats.mtimeMs,\n\t\t});\n\t}\n}\n\n// ============================================================================\n// Internal helpers\n// ============================================================================\n\ntype IgnoreMatcher = ReturnType<typeof ignore>;\n\n/** Convert an OS path to posix separators for ignore matching. */\nfunction toPosix(p: string): string {\n\treturn p.split(sep).join(\"/\");\n}\n\n/** Load .gitignore rules from a directory into the ignore matcher. */\nfunction loadGitignore(ig: IgnoreMatcher, dir: string, root: string): void {\n\tconst gitignorePath = join(dir, \".gitignore\");\n\tif (!existsSync(gitignorePath)) return;\n\n\ttry {\n\t\tconst content = readFileSync(gitignorePath, \"utf-8\");\n\t\tconst relDir = relative(root, dir);\n\t\tconst prefix = relDir ? `${toPosix(relDir)}/` : \"\";\n\n\t\tconst patterns = content\n\t\t\t.split(/\\r?\\n/)\n\t\t\t.map((line) => prefixPattern(line, prefix))\n\t\t\t.filter((line): line is string => line !== null);\n\n\t\tif (patterns.length > 0) {\n\t\t\tig.add(patterns);\n\t\t}\n\t} catch {\n\t\t// Unreadable .gitignore — skip silently\n\t}\n}\n\n/**\n * Prefix a .gitignore pattern with a directory path so it applies\n * correctly when matching against root-relative paths.\n */\nfunction prefixPattern(line: string, prefix: string): string | null {\n\tconst trimmed = line.trim();\n\tif (!trimmed) return null;\n\tif (trimmed.startsWith(\"#\") && !trimmed.startsWith(\"\\\\#\")) return null;\n\n\tlet pattern = line;\n\tlet negated = false;\n\n\tif (pattern.startsWith(\"!\")) {\n\t\tnegated = true;\n\t\tpattern = pattern.slice(1);\n\t} else if (pattern.startsWith(\"\\\\!\")) {\n\t\tpattern = pattern.slice(1);\n\t}\n\n\tconst prefixed = prefix ? `${prefix}${pattern}` : pattern;\n\treturn negated ? `!${prefixed}` : prefixed;\n}\n\n/**\n * Check if a directory component (relative to root) should be unconditionally skipped.\n * Handles both top-level names (\"node_modules\") and nested paths (\".dreb/index\").\n */\nfunction shouldSkipDir(relPath: string): boolean {\n\tconst posix = toPosix(relPath);\n\n\t// Check the directory name itself\n\tconst parts = posix.split(\"/\");\n\tconst name = parts[parts.length - 1];\n\tif (SKIP_DIRS.has(name)) return true;\n\n\t// Check multi-segment skip patterns (e.g. \".dreb/index\")\n\tfor (const skip of SKIP_DIRS) {\n\t\tif (skip.includes(\"/\") && (posix === skip || posix.endsWith(`/${skip}`))) {\n\t\t\treturn true;\n\t\t}\n\t}\n\n\treturn false;\n}\n\n/** Recursively walk a directory, collecting indexable files. */\nfunction walkDirectory(dir: string, root: string, ig: IgnoreMatcher, results: ScannedFile[]): void {\n\tlet entries: string[];\n\ttry {\n\t\tentries = readdirSync(dir);\n\t} catch {\n\t\t/* Directory unreadable (permission denied, etc.) — stop recursion into this branch */\n\t\treturn;\n\t}\n\n\tfor (const entry of entries) {\n\t\tconst fullPath = join(dir, entry);\n\t\tconst relPath = relative(root, fullPath);\n\t\tconst posixRel = toPosix(relPath);\n\n\t\tlet stats: Stats;\n\t\ttry {\n\t\t\tstats = statSync(fullPath);\n\t\t} catch {\n\t\t\t/* Broken symlink or race deletion — skip entry */\n\t\t\tcontinue;\n\t\t}\n\n\t\tif (stats.isDirectory()) {\n\t\t\t// Hard-coded skip list\n\t\t\tif (shouldSkipDir(relPath)) continue;\n\n\t\t\t// .gitignore check (directories need trailing slash)\n\t\t\tif (ig.ignores(`${posixRel}/`)) continue;\n\n\t\t\t// Load nested .gitignore before descending\n\t\t\tloadGitignore(ig, fullPath, root);\n\n\t\t\twalkDirectory(fullPath, root, ig, results);\n\t\t\tcontinue;\n\t\t}\n\n\t\tif (!stats.isFile()) continue;\n\n\t\t// .gitignore check for files\n\t\tif (ig.ignores(posixRel)) continue;\n\n\t\t// Size gate\n\t\tif (stats.size > MAX_FILE_SIZE) continue;\n\t\tif (stats.size === 0) continue;\n\n\t\t// File type detection\n\t\tconst fileType = detectFileType(entry);\n\t\tif (!fileType) continue;\n\n\t\tresults.push({\n\t\t\tfilePath: posixRel,\n\t\t\tfileType,\n\t\t\tmtime: stats.mtimeMs,\n\t\t});\n\t}\n}\n\n/**\n * Scan a memory directory (project or global) for indexable files.\n *\n * Memory directories are always fully included — no .gitignore filtering —\n * because they live outside the normal project tree or in `.dreb/` which\n * is typically gitignored.\n *\n * Paths for global memory files are stored with a `~memory/` prefix\n * to distinguish them from project files.\n */\nfunction scanMemoryDir(memoryDir: string, projectRoot: string, results: ScannedFile[], baseMemoryDir?: string): void {\n\tlet entries: string[];\n\ttry {\n\t\tentries = readdirSync(memoryDir);\n\t} catch {\n\t\t/* Memory directory doesn't exist or is unreadable — skip */\n\t\treturn;\n\t}\n\n\tfor (const entry of entries) {\n\t\tconst fullPath = join(memoryDir, entry);\n\n\t\tlet stats: Stats;\n\t\ttry {\n\t\t\tstats = statSync(fullPath);\n\t\t} catch {\n\t\t\t/* Broken symlink or race deletion — skip entry */\n\t\t\tcontinue;\n\t\t}\n\n\t\tif (stats.isDirectory()) {\n\t\t\t// Recurse into subdirectories\n\t\t\tscanMemoryDir(fullPath, projectRoot, results, baseMemoryDir ?? memoryDir);\n\t\t\tcontinue;\n\t\t}\n\n\t\tif (!stats.isFile()) continue;\n\t\tif (stats.size > MAX_FILE_SIZE) continue;\n\t\tif (stats.size === 0) continue;\n\n\t\tconst fileType = detectFileType(entry);\n\t\tif (!fileType) continue;\n\n\t\t// If the memory dir is inside the project root, use normal relative path.\n\t\t// Otherwise, use a ~memory/ prefix so paths remain unique and identifiable.\n\t\tconst rel = relative(projectRoot, fullPath);\n\t\tconst isOutsideProject = rel.startsWith(\"..\") || isAbsolute(rel);\n\t\tconst rootMemoryDir = baseMemoryDir ?? memoryDir;\n\t\tconst filePath = isOutsideProject ? `~memory/${relative(rootMemoryDir, fullPath)}` : rel;\n\n\t\tresults.push({\n\t\t\tfilePath: toPosix(filePath),\n\t\t\tfileType,\n\t\t\tmtime: stats.mtimeMs,\n\t\t});\n\t}\n}\n"]}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"tree-sitter-chunker.d.ts","sourceRoot":"","sources":["../src/tree-sitter-chunker.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAKH,OAAO,KAAK,EAAE,KAAK,EAAa,kBAAkB,EAAE,MAAM,YAAY,CAAC;
|
|
1
|
+
{"version":3,"file":"tree-sitter-chunker.d.ts","sourceRoot":"","sources":["../src/tree-sitter-chunker.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAKH,OAAO,KAAK,EAAE,KAAK,EAAa,kBAAkB,EAAE,MAAM,YAAY,CAAC;AAsLvE;;;GAGG;AACH,wBAAsB,cAAc,IAAI,OAAO,CAAC,IAAI,CAAC,CAuBpD;AAqJD;;;;;;;;;GASG;AACH,wBAAsB,mBAAmB,CACxC,OAAO,EAAE,MAAM,EACf,QAAQ,EAAE,MAAM,EAChB,QAAQ,EAAE,kBAAkB,GAC1B,OAAO,CAAC,KAAK,EAAE,CAAC,CAwElB"}
|
|
@@ -28,6 +28,7 @@ const GRAMMAR_PATHS = {
|
|
|
28
28
|
java: "tree-sitter-java/tree-sitter-java.wasm",
|
|
29
29
|
c: "tree-sitter-c/tree-sitter-c.wasm",
|
|
30
30
|
cpp: "tree-sitter-cpp/tree-sitter-cpp.wasm",
|
|
31
|
+
gdscript: "../grammars/tree-sitter-gdscript.wasm",
|
|
31
32
|
};
|
|
32
33
|
// ============================================================================
|
|
33
34
|
// Name Extractors
|
|
@@ -113,6 +114,11 @@ const CPP_EXTRACTORS = [
|
|
|
113
114
|
...C_EXTRACTORS,
|
|
114
115
|
{ type: "class_specifier", kind: "class", getName: nameField },
|
|
115
116
|
];
|
|
117
|
+
const GDSCRIPT_EXTRACTORS = [
|
|
118
|
+
{ type: "function_definition", kind: "function", getName: nameField },
|
|
119
|
+
{ type: "class_definition", kind: "class", getName: nameField },
|
|
120
|
+
{ type: "enum_definition", kind: "enum", getName: nameField },
|
|
121
|
+
];
|
|
116
122
|
const LANGUAGE_EXTRACTORS = {
|
|
117
123
|
typescript: TS_EXTRACTORS,
|
|
118
124
|
tsx: TS_EXTRACTORS,
|
|
@@ -123,6 +129,7 @@ const LANGUAGE_EXTRACTORS = {
|
|
|
123
129
|
java: JAVA_EXTRACTORS,
|
|
124
130
|
c: C_EXTRACTORS,
|
|
125
131
|
cpp: CPP_EXTRACTORS,
|
|
132
|
+
gdscript: GDSCRIPT_EXTRACTORS,
|
|
126
133
|
};
|
|
127
134
|
// ============================================================================
|
|
128
135
|
// Initialization
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"tree-sitter-chunker.js","sourceRoot":"","sources":["../src/tree-sitter-chunker.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,IAAI,CAAC;AAClC,OAAO,EAAE,aAAa,EAAE,MAAM,QAAQ,CAAC;AAIvC,4DAA4D;AAC5D,MAAM,OAAO,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAiC/C,IAAI,MAAM,GAAuB,IAAI,CAAC;AACtC,IAAI,QAAQ,GAAyB,IAAI,CAAC;AAE1C,IAAI,WAAW,GAAyB,IAAI,CAAC;AAC7C,IAAI,WAAW,GAAG,KAAK,CAAC;AAExB,+EAA+E;AAC/E,iBAAiB;AACjB,+EAA+E;AAE/E,MAAM,aAAa,GAAG,IAAI,GAAG,EAA0D,CAAC;AAExF,4CAA4C;AAC5C,MAAM,aAAa,GAAuC;IACzD,UAAU,EAAE,oDAAoD;IAChE,GAAG,EAAE,6CAA6C;IAClD,UAAU,EAAE,oDAAoD;IAChE,MAAM,EAAE,4CAA4C;IACpD,EAAE,EAAE,oCAAoC;IACxC,IAAI,EAAE,wCAAwC;IAC9C,IAAI,EAAE,wCAAwC;IAC9C,CAAC,EAAE,kCAAkC;IACrC,GAAG,EAAE,sCAAsC;CAC3C,CAAC;AAEF,+EAA+E;AAC/E,kBAAkB;AAClB,+EAA+E;AAE/E,2CAA2C;AAC3C,SAAS,SAAS,CAAC,IAAY;IAC9B,OAAO,IAAI,CAAC,iBAAiB,CAAC,MAAM,CAAC,EAAE,IAAI,IAAI,IAAI,CAAC;AACrD,CAAC;AAED,6DAA6D;AAC7D,SAAS,iBAAiB,CAAC,IAAY;IACtC,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC;IAC3B,IAAI,MAAM,EAAE,IAAI,KAAK,qBAAqB,EAAE,CAAC;QAC5C,OAAO,MAAM,CAAC,iBAAiB,CAAC,MAAM,CAAC,EAAE,IAAI,IAAI,IAAI,CAAC;IACvD,CAAC;IACD,OAAO,IAAI,CAAC;AACb,CAAC;AAED,oFAAoF;AACpF,SAAS,aAAa,CAAC,IAAY;IAClC,MAAM,UAAU,GAAG,IAAI,CAAC,iBAAiB,CAAC,YAAY,CAAC,CAAC;IACxD,IAAI,CAAC,UAAU;QAAE,OAAO,IAAI,CAAC;IAC7B,mEAAmE;IACnE,IAAI,UAAU,CAAC,IAAI,KAAK,qBAAqB,EAAE,CAAC;QAC/C,OAAO,UAAU,CAAC,iBAAiB,CAAC,YAAY,CAAC,EAAE,IAAI,IAAI,IAAI,CAAC;IACjE,CAAC;IACD,OAAO,UAAU,CAAC,IAAI,IAAI,IAAI,CAAC;AAChC,CAAC;AAED,6DAA6D;AAC7D,SAAS,UAAU,CAAC,IAAY;IAC/B,MAAM,IAAI,GAAG,IAAI,CAAC,iBAAiB,CAAC,aAAa,CAAC,CAAC;IACnD,IAAI,CAAC,IAAI,EAAE,CAAC;QACX,uEAAuE;QACvE,yCAAyC;QACzC,OAAO,IAAI,CAAC;IACb,CAAC;IACD,OAAO,IAAI,CAAC,iBAAiB,CAAC,MAAM,CAAC,EAAE,IAAI,IAAI,IAAI,CAAC;AACrD,CAAC;AAED,+EAA+E;AAC/E,+BAA+B;AAC/B,+EAA+E;AAE/E,MAAM,aAAa,GAAoB;IACtC,EAAE,IAAI,EAAE,sBAAsB,EAAE,IAAI,EAAE,UAAU,EAAE,OAAO,EAAE,SAAS,EAAE;IACtE,EAAE,IAAI,EAAE,mBAAmB,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,SAAS,EAAE;IACjE,EAAE,IAAI,EAAE,mBAAmB,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE;IAChE,EAAE,IAAI,EAAE,uBAAuB,EAAE,IAAI,EAAE,WAAW,EAAE,OAAO,EAAE,SAAS,EAAE;IACxE,EAAE,IAAI,EAAE,wBAAwB,EAAE,IAAI,EAAE,YAAY,EAAE,OAAO,EAAE,SAAS,EAAE;IAC1E,EAAE,IAAI,EAAE,kBAAkB,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,UAAU,EAAE;IACjE,EAAE,IAAI,EAAE,gBAAgB,EAAE,IAAI,EAAE,UAAU,EAAE,OAAO,EAAE,iBAAiB,EAAE;CACxE,CAAC;AAEF,MAAM,aAAa,GAAoB;IACtC,EAAE,IAAI,EAAE,sBAAsB,EAAE,IAAI,EAAE,UAAU,EAAE,OAAO,EAAE,SAAS,EAAE;IACtE,EAAE,IAAI,EAAE,mBAAmB,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,SAAS,EAAE;IACjE,EAAE,IAAI,EAAE,mBAAmB,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE;IAChE,EAAE,IAAI,EAAE,kBAAkB,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,UAAU,EAAE;IACjE,EAAE,IAAI,EAAE,gBAAgB,EAAE,IAAI,EAAE,UAAU,EAAE,OAAO,EAAE,iBAAiB,EAAE;CACxE,CAAC;AAEF,MAAM,iBAAiB,GAAoB;IAC1C,EAAE,IAAI,EAAE,qBAAqB,EAAE,IAAI,EAAE,UAAU,EAAE,OAAO,EAAE,SAAS,EAAE;IACrE,EAAE,IAAI,EAAE,kBAAkB,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE;CAC/D,CAAC;AAEF,MAAM,aAAa,GAAoB;IACtC,EAAE,IAAI,EAAE,sBAAsB,EAAE,IAAI,EAAE,UAAU,EAAE,OAAO,EAAE,SAAS,EAAE;IACtE,EAAE,IAAI,EAAE,oBAAoB,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,SAAS,EAAE;IAClE,EAAE,IAAI,EAAE,WAAW,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,SAAS,EAAE;CACzD,CAAC;AAEF,MAAM,eAAe,GAAoB;IACxC,EAAE,IAAI,EAAE,eAAe,EAAE,IAAI,EAAE,UAAU,EAAE,OAAO,EAAE,SAAS,EAAE;IAC/D,EAAE,IAAI,EAAE,WAAW,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,iBAAiB,CAAC,MAAM,CAAC,EAAE,IAAI,IAAI,IAAI,EAAE;IAC9F,EAAE,IAAI,EAAE,aAAa,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,SAAS,EAAE;IAC3D,EAAE,IAAI,EAAE,WAAW,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE;IACvD,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,WAAW,EAAE,OAAO,EAAE,SAAS,EAAE;CAC7D,CAAC;AAEF,MAAM,eAAe,GAAoB;IACxC,EAAE,IAAI,EAAE,mBAAmB,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE;IAChE,EAAE,IAAI,EAAE,oBAAoB,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,SAAS,EAAE;IAClE,EAAE,IAAI,EAAE,uBAAuB,EAAE,IAAI,EAAE,WAAW,EAAE,OAAO,EAAE,SAAS,EAAE;CACxE,CAAC;AAEF,MAAM,YAAY,GAAoB;IACrC,EAAE,IAAI,EAAE,qBAAqB,EAAE,IAAI,EAAE,UAAU,EAAE,OAAO,EAAE,aAAa,EAAE;IACzE,EAAE,IAAI,EAAE,kBAAkB,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,SAAS,EAAE;CAChE,CAAC;AAEF,MAAM,cAAc,GAAoB;IACvC,GAAG,YAAY;IACf,EAAE,IAAI,EAAE,iBAAiB,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE;CAC9D,CAAC;AAEF,MAAM,mBAAmB,GAAgD;IACxE,UAAU,EAAE,aAAa;IACzB,GAAG,EAAE,aAAa;IAClB,UAAU,EAAE,aAAa;IACzB,MAAM,EAAE,iBAAiB;IACzB,EAAE,EAAE,aAAa;IACjB,IAAI,EAAE,eAAe;IACrB,IAAI,EAAE,eAAe;IACrB,CAAC,EAAE,YAAY;IACf,GAAG,EAAE,cAAc;CACnB,CAAC;AAEF,+EAA+E;AAC/E,iBAAiB;AACjB,+EAA+E;AAE/E;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc;IACnC,IAAI,WAAW;QAAE,OAAO;IACxB,IAAI,WAAW;QAAE,OAAO,WAAW,CAAC;IAEpC,WAAW,GAAG,CAAC,KAAK,IAAI,EAAE;QACzB,IAAI,CAAC;YACJ,MAAM,GAAG,GAAG,MAAM,MAAM,CAAC,iBAAiB,CAAC,CAAC;YAC5C,MAAM,GAAG,GAAG,CAAC,MAAM,CAAC;YACpB,QAAQ,GAAG,GAAG,CAAC,QAAQ,CAAC;YAExB,MAAM,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAC,sCAAsC,CAAC,CAAC;YACzE,MAAM,OAAO,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;YACvC,MAAM,MAAM,CAAC,IAAI,CAAC,EAAE,UAAU,EAAE,GAAG,EAAE,CAAC,QAAQ,EAAE,UAAU,EAAE,OAAO,EAAE,CAAC,CAAC;YACvE,WAAW,GAAG,IAAI,CAAC;QACpB,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACd,2DAA2D;YAC3D,oCAAoC;YACpC,WAAW,GAAG,IAAI,CAAC;YACnB,MAAM,GAAG,CAAC;QACX,CAAC;IACF,CAAC,CAAC,EAAE,CAAC;IAEL,OAAO,WAAW,CAAC;AACpB,CAAC;AAED,+EAA+E;AAC/E,mBAAmB;AACnB,+EAA+E;AAE/E,qDAAqD;AACrD,KAAK,UAAU,YAAY,CAAC,IAAwB;IACnD,MAAM,MAAM,GAAG,aAAa,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IACvC,IAAI,MAAM;QAAE,OAAO,MAAM,CAAC;IAE1B,IAAI,CAAC,QAAQ,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CAAC,2DAA2D,CAAC,CAAC;IAC9E,CAAC;IAED,MAAM,WAAW,GAAG,OAAO,CAAC,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC,CAAC;IACzD,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IAChD,aAAa,CAAC,GAAG,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;IAChC,OAAO,MAAM,CAAC;AACf,CAAC;AAED,+EAA+E;AAC/E,iBAAiB;AACjB,+EAA+E;AAE/E;;;;GAIG;AACH,SAAS,cAAc,CAAC,QAAgB,EAAE,UAA2B,EAAE,YAAsB;IAC5F,+BAA+B;IAC/B,MAAM,gBAAgB,GAAG,IAAI,GAAG,EAAyB,CAAC;IAC1D,KAAK,MAAM,GAAG,IAAI,UAAU,EAAE,CAAC;QAC9B,gBAAgB,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;IACrC,CAAC;IAED,MAAM,WAAW,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;IAClD,MAAM,UAAU,GAAG,QAAQ,CAAC,iBAAiB,CAAC,WAAW,CAAC,CAAC;IAE3D,qBAAqB;IACrB,MAAM,GAAG,GAAsB,EAAE,CAAC;IAClC,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;QAC/B,MAAM,GAAG,GAAG,gBAAgB,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC5C,IAAI,CAAC,GAAG;YAAE,SAAS;QAEnB,oFAAoF;QACpF,IAAI,IAAI,CAAC,IAAI,KAAK,kBAAkB,EAAE,CAAC;YACtC,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,wBAAwB,CAAC,CAAC;YAC/E,IAAI,CAAC,OAAO;gBAAE,SAAS;QACxB,CAAC;QAED,uFAAuF;QACvF,IAAI,IAAI,CAAC,IAAI,KAAK,gBAAgB,EAAE,CAAC;YACpC,IAAI,IAAI,CAAC,MAAM,EAAE,IAAI,KAAK,qBAAqB;gBAAE,SAAS;QAC3D,CAAC;QAED,MAAM,SAAS,GAAG,IAAI,CAAC,aAAa,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,cAAc;QAC5D,MAAM,OAAO,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,GAAG,CAAC,CAAC;QAEzC,GAAG,CAAC,IAAI,CAAC;YACR,IAAI,EAAE,GAAG,CAAC,OAAO,CAAC,IAAI,CAAC;YACvB,IAAI,EAAE,GAAG,CAAC,IAAI;YACd,SAAS;YACT,OAAO;YACP,OAAO,EAAE,IAAI,CAAC,IAAI;SAClB,CAAC,CAAC;IACJ,CAAC;IAED,wEAAwE;IACxE,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,GAAG,CAAC,CAAC,SAAS,IAAI,CAAC,CAAC,OAAO,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC;IAEvE,8CAA8C;IAC9C,MAAM,OAAO,GAAsB,EAAE,CAAC;IACtC,IAAI,WAAW,GAAG,CAAC,CAAC,CAAC;IAErB,KAAK,MAAM,MAAM,IAAI,GAAG,EAAE,CAAC;QAC1B,IAAI,MAAM,CAAC,SAAS,GAAG,WAAW,EAAE,CAAC;YACpC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACrB,WAAW,GAAG,MAAM,CAAC,OAAO,CAAC;QAC9B,CAAC;QACD,6DAA6D;IAC9D,CAAC;IAED,OAAO,OAAO,CAAC;AAChB,CAAC;AAED,+EAA+E;AAC/E,iBAAiB;AACjB,+EAA+E;AAE/E,2EAA2E;AAC3E,MAAM,aAAa,GAAG,CAAC,CAAC;AAExB;;GAEG;AACH,SAAS,WAAW,CACnB,OAA0B,EAC1B,WAAqB,EACrB,QAAgB,EAChB,QAA4B;IAE5B,MAAM,IAAI,GAAY,EAAE,CAAC;IACzB,IAAI,MAAM,GAAG,CAAC,CAAC,CAAC,yBAAyB;IAEzC,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC9B,IAAI,MAAM,CAAC,SAAS,GAAG,MAAM,EAAE,CAAC;YAC/B,MAAM,QAAQ,GAAG,WAAW,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,MAAM,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC;YACrE,MAAM,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC;YACpE,IAAI,QAAQ,GAAG,aAAa,EAAE,CAAC;gBAC9B,IAAI,CAAC,IAAI,CAAC;oBACT,QAAQ;oBACR,SAAS,EAAE,MAAM;oBACjB,OAAO,EAAE,MAAM,CAAC,SAAS,GAAG,CAAC;oBAC7B,IAAI,EAAE,MAAM;oBACZ,IAAI,EAAE,IAAI;oBACV,OAAO,EAAE,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC;oBAC5B,QAAQ;iBACR,CAAC,CAAC;YACJ,CAAC;QACF,CAAC;QACD,MAAM,GAAG,MAAM,CAAC,OAAO,GAAG,CAAC,CAAC;IAC7B,CAAC;IAED,iCAAiC;IACjC,IAAI,MAAM,IAAI,WAAW,CAAC,MAAM,EAAE,CAAC;QAClC,MAAM,QAAQ,GAAG,WAAW,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAC/C,MAAM,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC;QACpE,IAAI,QAAQ,GAAG,aAAa,EAAE,CAAC;YAC9B,IAAI,CAAC,IAAI,CAAC;gBACT,QAAQ;gBACR,SAAS,EAAE,MAAM;gBACjB,OAAO,EAAE,WAAW,CAAC,MAAM;gBAC3B,IAAI,EAAE,MAAM;gBACZ,IAAI,EAAE,IAAI;gBACV,OAAO,EAAE,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC;gBAC5B,QAAQ;aACR,CAAC,CAAC;QACJ,CAAC;IACF,CAAC;IAED,OAAO,IAAI,CAAC;AACb,CAAC;AAED,+EAA+E;AAC/E,aAAa;AACb,+EAA+E;AAE/E;;;;;;;;;GASG;AACH,MAAM,CAAC,KAAK,UAAU,mBAAmB,CACxC,OAAe,EACf,QAAgB,EAChB,QAA4B;IAE5B,IAAI,CAAC,WAAW,IAAI,CAAC,MAAM,EAAE,CAAC;QAC7B,MAAM,cAAc,EAAE,CAAC;IACxB,CAAC;IAED,6CAA6C;IAC7C,MAAM,UAAU,GAAG,MAAO,CAAC;IAC3B,MAAM,IAAI,GAAG,MAAM,YAAY,CAAC,QAAQ,CAAC,CAAC;IAC1C,MAAM,MAAM,GAAG,IAAI,UAAU,EAAE,CAAC;IAChC,MAAM,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;IAEzB,MAAM,IAAI,GAAG,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IACnC,IAAI,CAAC,IAAI,EAAE,CAAC;QACX,8DAA8D;QAC9D,MAAM,CAAC,MAAM,EAAE,CAAC;QAChB,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAClC,OAAO;YACN;gBACC,QAAQ;gBACR,SAAS,EAAE,CAAC;gBACZ,OAAO,EAAE,KAAK,CAAC,MAAM;gBACrB,IAAI,EAAE,MAAM;gBACZ,IAAI,EAAE,IAAI;gBACV,OAAO;gBACP,QAAQ,EAAE,QAAQ;aAClB;SACD,CAAC;IACH,CAAC;IAED,IAAI,CAAC;QACJ,MAAM,WAAW,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QACxC,MAAM,UAAU,GAAG,mBAAmB,CAAC,QAAQ,CAAC,CAAC;QACjD,MAAM,OAAO,GAAG,cAAc,CAAC,IAAI,CAAC,QAAQ,EAAE,UAAU,EAAE,WAAW,CAAC,CAAC;QAEvE,mCAAmC;QACnC,MAAM,MAAM,GAAY,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YAC3C,QAAQ;YACR,SAAS,EAAE,CAAC,CAAC,SAAS;YACtB,OAAO,EAAE,CAAC,CAAC,OAAO;YAClB,IAAI,EAAE,CAAC,CAAC,IAAI;YACZ,IAAI,EAAE,CAAC,CAAC,IAAI;YACZ,OAAO,EAAE,CAAC,CAAC,OAAO;YAClB,QAAQ,EAAE,QAAQ;SAClB,CAAC,CAAC,CAAC;QAEJ,iBAAiB;QACjB,MAAM,IAAI,GAAG,WAAW,CAAC,OAAO,EAAE,WAAW,EAAE,QAAQ,EAAE,QAAQ,CAAC,CAAC;QAEnE,+BAA+B;QAC/B,MAAM,GAAG,GAAG,CAAC,GAAG,MAAM,EAAE,GAAG,IAAI,CAAC,CAAC;QACjC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,GAAG,CAAC,CAAC,SAAS,CAAC,CAAC;QAE9C,mEAAmE;QACnE,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACzB,OAAO;gBACN;oBACC,QAAQ;oBACR,SAAS,EAAE,CAAC;oBACZ,OAAO,EAAE,WAAW,CAAC,MAAM;oBAC3B,IAAI,EAAE,MAAM;oBACZ,IAAI,EAAE,IAAI;oBACV,OAAO;oBACP,QAAQ,EAAE,QAAQ;iBAClB;aACD,CAAC;QACH,CAAC;QAED,OAAO,GAAG,CAAC;IACZ,CAAC;YAAS,CAAC;QACV,IAAI,CAAC,MAAM,EAAE,CAAC;QACd,MAAM,CAAC,MAAM,EAAE,CAAC;IACjB,CAAC;AACF,CAAC","sourcesContent":["/**\n * AST-aware code chunking using tree-sitter (WASM).\n *\n * Parses source files into syntax trees and extracts meaningful code constructs\n * (functions, classes, methods, structs, etc.) as individual chunks. Gaps between\n * extracted nodes are captured as file-level chunks when substantial.\n */\n\nimport { readFileSync } from \"fs\";\nimport { createRequire } from \"module\";\nimport type { Node as TSNode } from \"web-tree-sitter\";\nimport type { Chunk, ChunkKind, TreeSitterLanguage } from \"./types.js\";\n\n// Use createRequire for resolving WASM paths in ESM context\nconst require = createRequire(import.meta.url);\n\n// ============================================================================\n// Types\n// ============================================================================\n\n/** Describes which AST node types to extract for a language and how to get names. */\ninterface NodeExtractor {\n\t/** The tree-sitter node type string. */\n\ttype: string;\n\t/** The ChunkKind to assign to extracted chunks. */\n\tkind: ChunkKind;\n\t/** How to extract the symbol name from the node. */\n\tgetName: (node: TSNode) => string | null;\n}\n\n/** Intermediate representation of an extracted AST region. */\ninterface ExtractedRegion {\n\tname: string | null;\n\tkind: ChunkKind;\n\tstartLine: number; // 1-indexed\n\tendLine: number; // 1-indexed, inclusive\n\tcontent: string;\n}\n\n// ============================================================================\n// Lazy Imports\n// ============================================================================\n\n// web-tree-sitter types imported dynamically to avoid top-level await\ntype ParserClass = typeof import(\"web-tree-sitter\").Parser;\ntype LanguageClass = typeof import(\"web-tree-sitter\").Language;\n\nlet Parser: ParserClass | null = null;\nlet Language: LanguageClass | null = null;\n\nlet initPromise: Promise<void> | null = null;\nlet initialized = false;\n\n// ============================================================================\n// Language Cache\n// ============================================================================\n\nconst languageCache = new Map<TreeSitterLanguage, import(\"web-tree-sitter\").Language>();\n\n/** Grammar WASM paths keyed by language. */\nconst GRAMMAR_PATHS: Record<TreeSitterLanguage, string> = {\n\ttypescript: \"tree-sitter-typescript/tree-sitter-typescript.wasm\",\n\ttsx: \"tree-sitter-typescript/tree-sitter-tsx.wasm\",\n\tjavascript: \"tree-sitter-javascript/tree-sitter-javascript.wasm\",\n\tpython: \"tree-sitter-python/tree-sitter-python.wasm\",\n\tgo: \"tree-sitter-go/tree-sitter-go.wasm\",\n\trust: \"tree-sitter-rust/tree-sitter-rust.wasm\",\n\tjava: \"tree-sitter-java/tree-sitter-java.wasm\",\n\tc: \"tree-sitter-c/tree-sitter-c.wasm\",\n\tcpp: \"tree-sitter-cpp/tree-sitter-cpp.wasm\",\n};\n\n// ============================================================================\n// Name Extractors\n// ============================================================================\n\n/** Get name from a node's `name` field. */\nfunction nameField(node: TSNode): string | null {\n\treturn node.childForFieldName(\"name\")?.text ?? null;\n}\n\n/** Get name for an arrow function assigned to a variable. */\nfunction arrowFunctionName(node: TSNode): string | null {\n\tconst parent = node.parent;\n\tif (parent?.type === \"variable_declarator\") {\n\t\treturn parent.childForFieldName(\"name\")?.text ?? null;\n\t}\n\treturn null;\n}\n\n/** Get name for C function_definition: name is in the function_declarator child. */\nfunction cFunctionName(node: TSNode): string | null {\n\tconst declarator = node.childForFieldName(\"declarator\");\n\tif (!declarator) return null;\n\t// function_declarator has a `declarator` field for the actual name\n\tif (declarator.type === \"function_declarator\") {\n\t\treturn declarator.childForFieldName(\"declarator\")?.text ?? null;\n\t}\n\treturn declarator.text ?? null;\n}\n\n/** Get name from an export_statement's inner declaration. */\nfunction exportName(node: TSNode): string | null {\n\tconst decl = node.childForFieldName(\"declaration\");\n\tif (!decl) {\n\t\t// Named export like `export { foo }` — use the full text isn't useful,\n\t\t// just return null for anonymous exports\n\t\treturn null;\n\t}\n\treturn decl.childForFieldName(\"name\")?.text ?? null;\n}\n\n// ============================================================================\n// Per-Language Node Extractors\n// ============================================================================\n\nconst TS_EXTRACTORS: NodeExtractor[] = [\n\t{ type: \"function_declaration\", kind: \"function\", getName: nameField },\n\t{ type: \"method_definition\", kind: \"method\", getName: nameField },\n\t{ type: \"class_declaration\", kind: \"class\", getName: nameField },\n\t{ type: \"interface_declaration\", kind: \"interface\", getName: nameField },\n\t{ type: \"type_alias_declaration\", kind: \"type_alias\", getName: nameField },\n\t{ type: \"export_statement\", kind: \"export\", getName: exportName },\n\t{ type: \"arrow_function\", kind: \"function\", getName: arrowFunctionName },\n];\n\nconst JS_EXTRACTORS: NodeExtractor[] = [\n\t{ type: \"function_declaration\", kind: \"function\", getName: nameField },\n\t{ type: \"method_definition\", kind: \"method\", getName: nameField },\n\t{ type: \"class_declaration\", kind: \"class\", getName: nameField },\n\t{ type: \"export_statement\", kind: \"export\", getName: exportName },\n\t{ type: \"arrow_function\", kind: \"function\", getName: arrowFunctionName },\n];\n\nconst PYTHON_EXTRACTORS: NodeExtractor[] = [\n\t{ type: \"function_definition\", kind: \"function\", getName: nameField },\n\t{ type: \"class_definition\", kind: \"class\", getName: nameField },\n];\n\nconst GO_EXTRACTORS: NodeExtractor[] = [\n\t{ type: \"function_declaration\", kind: \"function\", getName: nameField },\n\t{ type: \"method_declaration\", kind: \"method\", getName: nameField },\n\t{ type: \"type_spec\", kind: \"struct\", getName: nameField },\n];\n\nconst RUST_EXTRACTORS: NodeExtractor[] = [\n\t{ type: \"function_item\", kind: \"function\", getName: nameField },\n\t{ type: \"impl_item\", kind: \"impl\", getName: (n) => n.childForFieldName(\"type\")?.text ?? null },\n\t{ type: \"struct_item\", kind: \"struct\", getName: nameField },\n\t{ type: \"enum_item\", kind: \"enum\", getName: nameField },\n\t{ type: \"trait_item\", kind: \"interface\", getName: nameField },\n];\n\nconst JAVA_EXTRACTORS: NodeExtractor[] = [\n\t{ type: \"class_declaration\", kind: \"class\", getName: nameField },\n\t{ type: \"method_declaration\", kind: \"method\", getName: nameField },\n\t{ type: \"interface_declaration\", kind: \"interface\", getName: nameField },\n];\n\nconst C_EXTRACTORS: NodeExtractor[] = [\n\t{ type: \"function_definition\", kind: \"function\", getName: cFunctionName },\n\t{ type: \"struct_specifier\", kind: \"struct\", getName: nameField },\n];\n\nconst CPP_EXTRACTORS: NodeExtractor[] = [\n\t...C_EXTRACTORS,\n\t{ type: \"class_specifier\", kind: \"class\", getName: nameField },\n];\n\nconst LANGUAGE_EXTRACTORS: Record<TreeSitterLanguage, NodeExtractor[]> = {\n\ttypescript: TS_EXTRACTORS,\n\ttsx: TS_EXTRACTORS,\n\tjavascript: JS_EXTRACTORS,\n\tpython: PYTHON_EXTRACTORS,\n\tgo: GO_EXTRACTORS,\n\trust: RUST_EXTRACTORS,\n\tjava: JAVA_EXTRACTORS,\n\tc: C_EXTRACTORS,\n\tcpp: CPP_EXTRACTORS,\n};\n\n// ============================================================================\n// Initialization\n// ============================================================================\n\n/**\n * Initialize the tree-sitter WASM runtime. Must be called before parsing.\n * Safe to call multiple times — subsequent calls are no-ops.\n */\nexport async function initTreeSitter(): Promise<void> {\n\tif (initialized) return;\n\tif (initPromise) return initPromise;\n\n\tinitPromise = (async () => {\n\t\ttry {\n\t\t\tconst mod = await import(\"web-tree-sitter\");\n\t\t\tParser = mod.Parser;\n\t\t\tLanguage = mod.Language;\n\n\t\t\tconst wasmPath = require.resolve(\"web-tree-sitter/web-tree-sitter.wasm\");\n\t\t\tconst wasmBuf = readFileSync(wasmPath);\n\t\t\tawait Parser.init({ locateFile: () => wasmPath, wasmBinary: wasmBuf });\n\t\t\tinitialized = true;\n\t\t} catch (err) {\n\t\t\t// Reset so subsequent calls can retry instead of returning\n\t\t\t// the same rejected promise forever\n\t\t\tinitPromise = null;\n\t\t\tthrow err;\n\t\t}\n\t})();\n\n\treturn initPromise;\n}\n\n// ============================================================================\n// Language Loading\n// ============================================================================\n\n/** Load and cache a tree-sitter language grammar. */\nasync function loadLanguage(lang: TreeSitterLanguage): Promise<import(\"web-tree-sitter\").Language> {\n\tconst cached = languageCache.get(lang);\n\tif (cached) return cached;\n\n\tif (!Language) {\n\t\tthrow new Error(\"tree-sitter not initialized — call initTreeSitter() first\");\n\t}\n\n\tconst grammarPath = require.resolve(GRAMMAR_PATHS[lang]);\n\tconst loaded = await Language.load(grammarPath);\n\tlanguageCache.set(lang, loaded);\n\treturn loaded;\n}\n\n// ============================================================================\n// AST Extraction\n// ============================================================================\n\n/**\n * Walk the tree and collect nodes matching the target types.\n * Returns regions sorted by start position, with nested nodes skipped\n * (only outermost matches are kept).\n */\nfunction extractRegions(rootNode: TSNode, extractors: NodeExtractor[], _sourceLines: string[]): ExtractedRegion[] {\n\t// Gather all target node types\n\tconst typeToExtractors = new Map<string, NodeExtractor>();\n\tfor (const ext of extractors) {\n\t\ttypeToExtractors.set(ext.type, ext);\n\t}\n\n\tconst targetTypes = extractors.map((e) => e.type);\n\tconst candidates = rootNode.descendantsOfType(targetTypes);\n\n\t// Convert to regions\n\tconst raw: ExtractedRegion[] = [];\n\tfor (const node of candidates) {\n\t\tconst ext = typeToExtractors.get(node.type);\n\t\tif (!ext) continue;\n\n\t\t// For struct_specifier in C, only extract if it has a body (field_declaration_list)\n\t\tif (node.type === \"struct_specifier\") {\n\t\t\tconst hasBody = node.children.some((c) => c.type === \"field_declaration_list\");\n\t\t\tif (!hasBody) continue;\n\t\t}\n\n\t\t// For arrow_function, only extract if parent is variable_declarator (named assignment)\n\t\tif (node.type === \"arrow_function\") {\n\t\t\tif (node.parent?.type !== \"variable_declarator\") continue;\n\t\t}\n\n\t\tconst startLine = node.startPosition.row + 1; // 0→1 indexed\n\t\tconst endLine = node.endPosition.row + 1;\n\n\t\traw.push({\n\t\t\tname: ext.getName(node),\n\t\t\tkind: ext.kind,\n\t\t\tstartLine,\n\t\t\tendLine,\n\t\t\tcontent: node.text,\n\t\t});\n\t}\n\n\t// Sort by start line, then by end line descending (larger ranges first)\n\traw.sort((a, b) => a.startLine - b.startLine || b.endLine - a.endLine);\n\n\t// Remove nested regions — keep only outermost\n\tconst regions: ExtractedRegion[] = [];\n\tlet lastEndLine = -1;\n\n\tfor (const region of raw) {\n\t\tif (region.startLine > lastEndLine) {\n\t\t\tregions.push(region);\n\t\t\tlastEndLine = region.endLine;\n\t\t}\n\t\t// else: this region is nested inside the previous one — skip\n\t}\n\n\treturn regions;\n}\n\n// ============================================================================\n// Gap Collection\n// ============================================================================\n\n/** Minimum number of non-blank lines for a gap to become its own chunk. */\nconst MIN_GAP_LINES = 3;\n\n/**\n * Create file-level chunks for substantial code between extracted regions.\n */\nfunction collectGaps(\n\tregions: ExtractedRegion[],\n\tsourceLines: string[],\n\tfilePath: string,\n\tfileType: TreeSitterLanguage,\n): Chunk[] {\n\tconst gaps: Chunk[] = [];\n\tlet cursor = 1; // 1-indexed current line\n\n\tfor (const region of regions) {\n\t\tif (region.startLine > cursor) {\n\t\t\tconst gapLines = sourceLines.slice(cursor - 1, region.startLine - 1);\n\t\t\tconst nonBlank = gapLines.filter((l) => l.trim().length > 0).length;\n\t\t\tif (nonBlank > MIN_GAP_LINES) {\n\t\t\t\tgaps.push({\n\t\t\t\t\tfilePath,\n\t\t\t\t\tstartLine: cursor,\n\t\t\t\t\tendLine: region.startLine - 1,\n\t\t\t\t\tkind: \"file\",\n\t\t\t\t\tname: null,\n\t\t\t\t\tcontent: gapLines.join(\"\\n\"),\n\t\t\t\t\tfileType,\n\t\t\t\t});\n\t\t\t}\n\t\t}\n\t\tcursor = region.endLine + 1;\n\t}\n\n\t// Trailing gap after last region\n\tif (cursor <= sourceLines.length) {\n\t\tconst gapLines = sourceLines.slice(cursor - 1);\n\t\tconst nonBlank = gapLines.filter((l) => l.trim().length > 0).length;\n\t\tif (nonBlank > MIN_GAP_LINES) {\n\t\t\tgaps.push({\n\t\t\t\tfilePath,\n\t\t\t\tstartLine: cursor,\n\t\t\t\tendLine: sourceLines.length,\n\t\t\t\tkind: \"file\",\n\t\t\t\tname: null,\n\t\t\t\tcontent: gapLines.join(\"\\n\"),\n\t\t\t\tfileType,\n\t\t\t});\n\t\t}\n\t}\n\n\treturn gaps;\n}\n\n// ============================================================================\n// Public API\n// ============================================================================\n\n/**\n * Parse a source file with tree-sitter and extract AST-aware chunks.\n *\n * Returns chunks for functions, classes, methods, and other language-specific\n * constructs, plus file-level chunks for substantial gaps between them.\n *\n * @param content - Raw source code text\n * @param filePath - Relative file path (stored in chunk metadata)\n * @param language - Tree-sitter language identifier\n */\nexport async function chunkWithTreeSitter(\n\tcontent: string,\n\tfilePath: string,\n\tlanguage: TreeSitterLanguage,\n): Promise<Chunk[]> {\n\tif (!initialized || !Parser) {\n\t\tawait initTreeSitter();\n\t}\n\n\t// After init, Parser is guaranteed to be set\n\tconst ParserCtor = Parser!;\n\tconst lang = await loadLanguage(language);\n\tconst parser = new ParserCtor();\n\tparser.setLanguage(lang);\n\n\tconst tree = parser.parse(content);\n\tif (!tree) {\n\t\t// Parse failed — free the parser WASM memory before returning\n\t\tparser.delete();\n\t\tconst lines = content.split(\"\\n\");\n\t\treturn [\n\t\t\t{\n\t\t\t\tfilePath,\n\t\t\t\tstartLine: 1,\n\t\t\t\tendLine: lines.length,\n\t\t\t\tkind: \"file\",\n\t\t\t\tname: null,\n\t\t\t\tcontent,\n\t\t\t\tfileType: language,\n\t\t\t},\n\t\t];\n\t}\n\n\ttry {\n\t\tconst sourceLines = content.split(\"\\n\");\n\t\tconst extractors = LANGUAGE_EXTRACTORS[language];\n\t\tconst regions = extractRegions(tree.rootNode, extractors, sourceLines);\n\n\t\t// Convert regions to Chunk objects\n\t\tconst chunks: Chunk[] = regions.map((r) => ({\n\t\t\tfilePath,\n\t\t\tstartLine: r.startLine,\n\t\t\tendLine: r.endLine,\n\t\t\tkind: r.kind,\n\t\t\tname: r.name,\n\t\t\tcontent: r.content,\n\t\t\tfileType: language,\n\t\t}));\n\n\t\t// Add gap chunks\n\t\tconst gaps = collectGaps(regions, sourceLines, filePath, language);\n\n\t\t// Merge and sort by start line\n\t\tconst all = [...chunks, ...gaps];\n\t\tall.sort((a, b) => a.startLine - b.startLine);\n\n\t\t// If no regions were extracted, return the whole file as one chunk\n\t\tif (chunks.length === 0) {\n\t\t\treturn [\n\t\t\t\t{\n\t\t\t\t\tfilePath,\n\t\t\t\t\tstartLine: 1,\n\t\t\t\t\tendLine: sourceLines.length,\n\t\t\t\t\tkind: \"file\",\n\t\t\t\t\tname: null,\n\t\t\t\t\tcontent,\n\t\t\t\t\tfileType: language,\n\t\t\t\t},\n\t\t\t];\n\t\t}\n\n\t\treturn all;\n\t} finally {\n\t\ttree.delete();\n\t\tparser.delete();\n\t}\n}\n"]}
|
|
1
|
+
{"version":3,"file":"tree-sitter-chunker.js","sourceRoot":"","sources":["../src/tree-sitter-chunker.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,IAAI,CAAC;AAClC,OAAO,EAAE,aAAa,EAAE,MAAM,QAAQ,CAAC;AAIvC,4DAA4D;AAC5D,MAAM,OAAO,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAiC/C,IAAI,MAAM,GAAuB,IAAI,CAAC;AACtC,IAAI,QAAQ,GAAyB,IAAI,CAAC;AAE1C,IAAI,WAAW,GAAyB,IAAI,CAAC;AAC7C,IAAI,WAAW,GAAG,KAAK,CAAC;AAExB,+EAA+E;AAC/E,iBAAiB;AACjB,+EAA+E;AAE/E,MAAM,aAAa,GAAG,IAAI,GAAG,EAA0D,CAAC;AAExF,4CAA4C;AAC5C,MAAM,aAAa,GAAuC;IACzD,UAAU,EAAE,oDAAoD;IAChE,GAAG,EAAE,6CAA6C;IAClD,UAAU,EAAE,oDAAoD;IAChE,MAAM,EAAE,4CAA4C;IACpD,EAAE,EAAE,oCAAoC;IACxC,IAAI,EAAE,wCAAwC;IAC9C,IAAI,EAAE,wCAAwC;IAC9C,CAAC,EAAE,kCAAkC;IACrC,GAAG,EAAE,sCAAsC;IAC3C,QAAQ,EAAE,uCAAuC;CACjD,CAAC;AAEF,+EAA+E;AAC/E,kBAAkB;AAClB,+EAA+E;AAE/E,2CAA2C;AAC3C,SAAS,SAAS,CAAC,IAAY;IAC9B,OAAO,IAAI,CAAC,iBAAiB,CAAC,MAAM,CAAC,EAAE,IAAI,IAAI,IAAI,CAAC;AACrD,CAAC;AAED,6DAA6D;AAC7D,SAAS,iBAAiB,CAAC,IAAY;IACtC,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC;IAC3B,IAAI,MAAM,EAAE,IAAI,KAAK,qBAAqB,EAAE,CAAC;QAC5C,OAAO,MAAM,CAAC,iBAAiB,CAAC,MAAM,CAAC,EAAE,IAAI,IAAI,IAAI,CAAC;IACvD,CAAC;IACD,OAAO,IAAI,CAAC;AACb,CAAC;AAED,oFAAoF;AACpF,SAAS,aAAa,CAAC,IAAY;IAClC,MAAM,UAAU,GAAG,IAAI,CAAC,iBAAiB,CAAC,YAAY,CAAC,CAAC;IACxD,IAAI,CAAC,UAAU;QAAE,OAAO,IAAI,CAAC;IAC7B,mEAAmE;IACnE,IAAI,UAAU,CAAC,IAAI,KAAK,qBAAqB,EAAE,CAAC;QAC/C,OAAO,UAAU,CAAC,iBAAiB,CAAC,YAAY,CAAC,EAAE,IAAI,IAAI,IAAI,CAAC;IACjE,CAAC;IACD,OAAO,UAAU,CAAC,IAAI,IAAI,IAAI,CAAC;AAChC,CAAC;AAED,6DAA6D;AAC7D,SAAS,UAAU,CAAC,IAAY;IAC/B,MAAM,IAAI,GAAG,IAAI,CAAC,iBAAiB,CAAC,aAAa,CAAC,CAAC;IACnD,IAAI,CAAC,IAAI,EAAE,CAAC;QACX,uEAAuE;QACvE,yCAAyC;QACzC,OAAO,IAAI,CAAC;IACb,CAAC;IACD,OAAO,IAAI,CAAC,iBAAiB,CAAC,MAAM,CAAC,EAAE,IAAI,IAAI,IAAI,CAAC;AACrD,CAAC;AAED,+EAA+E;AAC/E,+BAA+B;AAC/B,+EAA+E;AAE/E,MAAM,aAAa,GAAoB;IACtC,EAAE,IAAI,EAAE,sBAAsB,EAAE,IAAI,EAAE,UAAU,EAAE,OAAO,EAAE,SAAS,EAAE;IACtE,EAAE,IAAI,EAAE,mBAAmB,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,SAAS,EAAE;IACjE,EAAE,IAAI,EAAE,mBAAmB,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE;IAChE,EAAE,IAAI,EAAE,uBAAuB,EAAE,IAAI,EAAE,WAAW,EAAE,OAAO,EAAE,SAAS,EAAE;IACxE,EAAE,IAAI,EAAE,wBAAwB,EAAE,IAAI,EAAE,YAAY,EAAE,OAAO,EAAE,SAAS,EAAE;IAC1E,EAAE,IAAI,EAAE,kBAAkB,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,UAAU,EAAE;IACjE,EAAE,IAAI,EAAE,gBAAgB,EAAE,IAAI,EAAE,UAAU,EAAE,OAAO,EAAE,iBAAiB,EAAE;CACxE,CAAC;AAEF,MAAM,aAAa,GAAoB;IACtC,EAAE,IAAI,EAAE,sBAAsB,EAAE,IAAI,EAAE,UAAU,EAAE,OAAO,EAAE,SAAS,EAAE;IACtE,EAAE,IAAI,EAAE,mBAAmB,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,SAAS,EAAE;IACjE,EAAE,IAAI,EAAE,mBAAmB,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE;IAChE,EAAE,IAAI,EAAE,kBAAkB,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,UAAU,EAAE;IACjE,EAAE,IAAI,EAAE,gBAAgB,EAAE,IAAI,EAAE,UAAU,EAAE,OAAO,EAAE,iBAAiB,EAAE;CACxE,CAAC;AAEF,MAAM,iBAAiB,GAAoB;IAC1C,EAAE,IAAI,EAAE,qBAAqB,EAAE,IAAI,EAAE,UAAU,EAAE,OAAO,EAAE,SAAS,EAAE;IACrE,EAAE,IAAI,EAAE,kBAAkB,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE;CAC/D,CAAC;AAEF,MAAM,aAAa,GAAoB;IACtC,EAAE,IAAI,EAAE,sBAAsB,EAAE,IAAI,EAAE,UAAU,EAAE,OAAO,EAAE,SAAS,EAAE;IACtE,EAAE,IAAI,EAAE,oBAAoB,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,SAAS,EAAE;IAClE,EAAE,IAAI,EAAE,WAAW,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,SAAS,EAAE;CACzD,CAAC;AAEF,MAAM,eAAe,GAAoB;IACxC,EAAE,IAAI,EAAE,eAAe,EAAE,IAAI,EAAE,UAAU,EAAE,OAAO,EAAE,SAAS,EAAE;IAC/D,EAAE,IAAI,EAAE,WAAW,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,iBAAiB,CAAC,MAAM,CAAC,EAAE,IAAI,IAAI,IAAI,EAAE;IAC9F,EAAE,IAAI,EAAE,aAAa,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,SAAS,EAAE;IAC3D,EAAE,IAAI,EAAE,WAAW,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE;IACvD,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,WAAW,EAAE,OAAO,EAAE,SAAS,EAAE;CAC7D,CAAC;AAEF,MAAM,eAAe,GAAoB;IACxC,EAAE,IAAI,EAAE,mBAAmB,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE;IAChE,EAAE,IAAI,EAAE,oBAAoB,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,SAAS,EAAE;IAClE,EAAE,IAAI,EAAE,uBAAuB,EAAE,IAAI,EAAE,WAAW,EAAE,OAAO,EAAE,SAAS,EAAE;CACxE,CAAC;AAEF,MAAM,YAAY,GAAoB;IACrC,EAAE,IAAI,EAAE,qBAAqB,EAAE,IAAI,EAAE,UAAU,EAAE,OAAO,EAAE,aAAa,EAAE;IACzE,EAAE,IAAI,EAAE,kBAAkB,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,SAAS,EAAE;CAChE,CAAC;AAEF,MAAM,cAAc,GAAoB;IACvC,GAAG,YAAY;IACf,EAAE,IAAI,EAAE,iBAAiB,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE;CAC9D,CAAC;AAEF,MAAM,mBAAmB,GAAoB;IAC5C,EAAE,IAAI,EAAE,qBAAqB,EAAE,IAAI,EAAE,UAAU,EAAE,OAAO,EAAE,SAAS,EAAE;IACrE,EAAE,IAAI,EAAE,kBAAkB,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE;IAC/D,EAAE,IAAI,EAAE,iBAAiB,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE;CAC7D,CAAC;AAEF,MAAM,mBAAmB,GAAgD;IACxE,UAAU,EAAE,aAAa;IACzB,GAAG,EAAE,aAAa;IAClB,UAAU,EAAE,aAAa;IACzB,MAAM,EAAE,iBAAiB;IACzB,EAAE,EAAE,aAAa;IACjB,IAAI,EAAE,eAAe;IACrB,IAAI,EAAE,eAAe;IACrB,CAAC,EAAE,YAAY;IACf,GAAG,EAAE,cAAc;IACnB,QAAQ,EAAE,mBAAmB;CAC7B,CAAC;AAEF,+EAA+E;AAC/E,iBAAiB;AACjB,+EAA+E;AAE/E;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc;IACnC,IAAI,WAAW;QAAE,OAAO;IACxB,IAAI,WAAW;QAAE,OAAO,WAAW,CAAC;IAEpC,WAAW,GAAG,CAAC,KAAK,IAAI,EAAE;QACzB,IAAI,CAAC;YACJ,MAAM,GAAG,GAAG,MAAM,MAAM,CAAC,iBAAiB,CAAC,CAAC;YAC5C,MAAM,GAAG,GAAG,CAAC,MAAM,CAAC;YACpB,QAAQ,GAAG,GAAG,CAAC,QAAQ,CAAC;YAExB,MAAM,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAC,sCAAsC,CAAC,CAAC;YACzE,MAAM,OAAO,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;YACvC,MAAM,MAAM,CAAC,IAAI,CAAC,EAAE,UAAU,EAAE,GAAG,EAAE,CAAC,QAAQ,EAAE,UAAU,EAAE,OAAO,EAAE,CAAC,CAAC;YACvE,WAAW,GAAG,IAAI,CAAC;QACpB,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACd,2DAA2D;YAC3D,oCAAoC;YACpC,WAAW,GAAG,IAAI,CAAC;YACnB,MAAM,GAAG,CAAC;QACX,CAAC;IACF,CAAC,CAAC,EAAE,CAAC;IAEL,OAAO,WAAW,CAAC;AACpB,CAAC;AAED,+EAA+E;AAC/E,mBAAmB;AACnB,+EAA+E;AAE/E,qDAAqD;AACrD,KAAK,UAAU,YAAY,CAAC,IAAwB;IACnD,MAAM,MAAM,GAAG,aAAa,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IACvC,IAAI,MAAM;QAAE,OAAO,MAAM,CAAC;IAE1B,IAAI,CAAC,QAAQ,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CAAC,2DAA2D,CAAC,CAAC;IAC9E,CAAC;IAED,MAAM,WAAW,GAAG,OAAO,CAAC,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC,CAAC;IACzD,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IAChD,aAAa,CAAC,GAAG,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;IAChC,OAAO,MAAM,CAAC;AACf,CAAC;AAED,+EAA+E;AAC/E,iBAAiB;AACjB,+EAA+E;AAE/E;;;;GAIG;AACH,SAAS,cAAc,CAAC,QAAgB,EAAE,UAA2B,EAAE,YAAsB;IAC5F,+BAA+B;IAC/B,MAAM,gBAAgB,GAAG,IAAI,GAAG,EAAyB,CAAC;IAC1D,KAAK,MAAM,GAAG,IAAI,UAAU,EAAE,CAAC;QAC9B,gBAAgB,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;IACrC,CAAC;IAED,MAAM,WAAW,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;IAClD,MAAM,UAAU,GAAG,QAAQ,CAAC,iBAAiB,CAAC,WAAW,CAAC,CAAC;IAE3D,qBAAqB;IACrB,MAAM,GAAG,GAAsB,EAAE,CAAC;IAClC,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;QAC/B,MAAM,GAAG,GAAG,gBAAgB,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC5C,IAAI,CAAC,GAAG;YAAE,SAAS;QAEnB,oFAAoF;QACpF,IAAI,IAAI,CAAC,IAAI,KAAK,kBAAkB,EAAE,CAAC;YACtC,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,wBAAwB,CAAC,CAAC;YAC/E,IAAI,CAAC,OAAO;gBAAE,SAAS;QACxB,CAAC;QAED,uFAAuF;QACvF,IAAI,IAAI,CAAC,IAAI,KAAK,gBAAgB,EAAE,CAAC;YACpC,IAAI,IAAI,CAAC,MAAM,EAAE,IAAI,KAAK,qBAAqB;gBAAE,SAAS;QAC3D,CAAC;QAED,MAAM,SAAS,GAAG,IAAI,CAAC,aAAa,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,cAAc;QAC5D,MAAM,OAAO,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,GAAG,CAAC,CAAC;QAEzC,GAAG,CAAC,IAAI,CAAC;YACR,IAAI,EAAE,GAAG,CAAC,OAAO,CAAC,IAAI,CAAC;YACvB,IAAI,EAAE,GAAG,CAAC,IAAI;YACd,SAAS;YACT,OAAO;YACP,OAAO,EAAE,IAAI,CAAC,IAAI;SAClB,CAAC,CAAC;IACJ,CAAC;IAED,wEAAwE;IACxE,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,GAAG,CAAC,CAAC,SAAS,IAAI,CAAC,CAAC,OAAO,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC;IAEvE,8CAA8C;IAC9C,MAAM,OAAO,GAAsB,EAAE,CAAC;IACtC,IAAI,WAAW,GAAG,CAAC,CAAC,CAAC;IAErB,KAAK,MAAM,MAAM,IAAI,GAAG,EAAE,CAAC;QAC1B,IAAI,MAAM,CAAC,SAAS,GAAG,WAAW,EAAE,CAAC;YACpC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACrB,WAAW,GAAG,MAAM,CAAC,OAAO,CAAC;QAC9B,CAAC;QACD,6DAA6D;IAC9D,CAAC;IAED,OAAO,OAAO,CAAC;AAChB,CAAC;AAED,+EAA+E;AAC/E,iBAAiB;AACjB,+EAA+E;AAE/E,2EAA2E;AAC3E,MAAM,aAAa,GAAG,CAAC,CAAC;AAExB;;GAEG;AACH,SAAS,WAAW,CACnB,OAA0B,EAC1B,WAAqB,EACrB,QAAgB,EAChB,QAA4B;IAE5B,MAAM,IAAI,GAAY,EAAE,CAAC;IACzB,IAAI,MAAM,GAAG,CAAC,CAAC,CAAC,yBAAyB;IAEzC,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC9B,IAAI,MAAM,CAAC,SAAS,GAAG,MAAM,EAAE,CAAC;YAC/B,MAAM,QAAQ,GAAG,WAAW,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,MAAM,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC;YACrE,MAAM,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC;YACpE,IAAI,QAAQ,GAAG,aAAa,EAAE,CAAC;gBAC9B,IAAI,CAAC,IAAI,CAAC;oBACT,QAAQ;oBACR,SAAS,EAAE,MAAM;oBACjB,OAAO,EAAE,MAAM,CAAC,SAAS,GAAG,CAAC;oBAC7B,IAAI,EAAE,MAAM;oBACZ,IAAI,EAAE,IAAI;oBACV,OAAO,EAAE,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC;oBAC5B,QAAQ;iBACR,CAAC,CAAC;YACJ,CAAC;QACF,CAAC;QACD,MAAM,GAAG,MAAM,CAAC,OAAO,GAAG,CAAC,CAAC;IAC7B,CAAC;IAED,iCAAiC;IACjC,IAAI,MAAM,IAAI,WAAW,CAAC,MAAM,EAAE,CAAC;QAClC,MAAM,QAAQ,GAAG,WAAW,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAC/C,MAAM,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC;QACpE,IAAI,QAAQ,GAAG,aAAa,EAAE,CAAC;YAC9B,IAAI,CAAC,IAAI,CAAC;gBACT,QAAQ;gBACR,SAAS,EAAE,MAAM;gBACjB,OAAO,EAAE,WAAW,CAAC,MAAM;gBAC3B,IAAI,EAAE,MAAM;gBACZ,IAAI,EAAE,IAAI;gBACV,OAAO,EAAE,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC;gBAC5B,QAAQ;aACR,CAAC,CAAC;QACJ,CAAC;IACF,CAAC;IAED,OAAO,IAAI,CAAC;AACb,CAAC;AAED,+EAA+E;AAC/E,aAAa;AACb,+EAA+E;AAE/E;;;;;;;;;GASG;AACH,MAAM,CAAC,KAAK,UAAU,mBAAmB,CACxC,OAAe,EACf,QAAgB,EAChB,QAA4B;IAE5B,IAAI,CAAC,WAAW,IAAI,CAAC,MAAM,EAAE,CAAC;QAC7B,MAAM,cAAc,EAAE,CAAC;IACxB,CAAC;IAED,6CAA6C;IAC7C,MAAM,UAAU,GAAG,MAAO,CAAC;IAC3B,MAAM,IAAI,GAAG,MAAM,YAAY,CAAC,QAAQ,CAAC,CAAC;IAC1C,MAAM,MAAM,GAAG,IAAI,UAAU,EAAE,CAAC;IAChC,MAAM,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;IAEzB,MAAM,IAAI,GAAG,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IACnC,IAAI,CAAC,IAAI,EAAE,CAAC;QACX,8DAA8D;QAC9D,MAAM,CAAC,MAAM,EAAE,CAAC;QAChB,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAClC,OAAO;YACN;gBACC,QAAQ;gBACR,SAAS,EAAE,CAAC;gBACZ,OAAO,EAAE,KAAK,CAAC,MAAM;gBACrB,IAAI,EAAE,MAAM;gBACZ,IAAI,EAAE,IAAI;gBACV,OAAO;gBACP,QAAQ,EAAE,QAAQ;aAClB;SACD,CAAC;IACH,CAAC;IAED,IAAI,CAAC;QACJ,MAAM,WAAW,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QACxC,MAAM,UAAU,GAAG,mBAAmB,CAAC,QAAQ,CAAC,CAAC;QACjD,MAAM,OAAO,GAAG,cAAc,CAAC,IAAI,CAAC,QAAQ,EAAE,UAAU,EAAE,WAAW,CAAC,CAAC;QAEvE,mCAAmC;QACnC,MAAM,MAAM,GAAY,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YAC3C,QAAQ;YACR,SAAS,EAAE,CAAC,CAAC,SAAS;YACtB,OAAO,EAAE,CAAC,CAAC,OAAO;YAClB,IAAI,EAAE,CAAC,CAAC,IAAI;YACZ,IAAI,EAAE,CAAC,CAAC,IAAI;YACZ,OAAO,EAAE,CAAC,CAAC,OAAO;YAClB,QAAQ,EAAE,QAAQ;SAClB,CAAC,CAAC,CAAC;QAEJ,iBAAiB;QACjB,MAAM,IAAI,GAAG,WAAW,CAAC,OAAO,EAAE,WAAW,EAAE,QAAQ,EAAE,QAAQ,CAAC,CAAC;QAEnE,+BAA+B;QAC/B,MAAM,GAAG,GAAG,CAAC,GAAG,MAAM,EAAE,GAAG,IAAI,CAAC,CAAC;QACjC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,GAAG,CAAC,CAAC,SAAS,CAAC,CAAC;QAE9C,mEAAmE;QACnE,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACzB,OAAO;gBACN;oBACC,QAAQ;oBACR,SAAS,EAAE,CAAC;oBACZ,OAAO,EAAE,WAAW,CAAC,MAAM;oBAC3B,IAAI,EAAE,MAAM;oBACZ,IAAI,EAAE,IAAI;oBACV,OAAO;oBACP,QAAQ,EAAE,QAAQ;iBAClB;aACD,CAAC;QACH,CAAC;QAED,OAAO,GAAG,CAAC;IACZ,CAAC;YAAS,CAAC;QACV,IAAI,CAAC,MAAM,EAAE,CAAC;QACd,MAAM,CAAC,MAAM,EAAE,CAAC;IACjB,CAAC;AACF,CAAC","sourcesContent":["/**\n * AST-aware code chunking using tree-sitter (WASM).\n *\n * Parses source files into syntax trees and extracts meaningful code constructs\n * (functions, classes, methods, structs, etc.) as individual chunks. Gaps between\n * extracted nodes are captured as file-level chunks when substantial.\n */\n\nimport { readFileSync } from \"fs\";\nimport { createRequire } from \"module\";\nimport type { Node as TSNode } from \"web-tree-sitter\";\nimport type { Chunk, ChunkKind, TreeSitterLanguage } from \"./types.js\";\n\n// Use createRequire for resolving WASM paths in ESM context\nconst require = createRequire(import.meta.url);\n\n// ============================================================================\n// Types\n// ============================================================================\n\n/** Describes which AST node types to extract for a language and how to get names. */\ninterface NodeExtractor {\n\t/** The tree-sitter node type string. */\n\ttype: string;\n\t/** The ChunkKind to assign to extracted chunks. */\n\tkind: ChunkKind;\n\t/** How to extract the symbol name from the node. */\n\tgetName: (node: TSNode) => string | null;\n}\n\n/** Intermediate representation of an extracted AST region. */\ninterface ExtractedRegion {\n\tname: string | null;\n\tkind: ChunkKind;\n\tstartLine: number; // 1-indexed\n\tendLine: number; // 1-indexed, inclusive\n\tcontent: string;\n}\n\n// ============================================================================\n// Lazy Imports\n// ============================================================================\n\n// web-tree-sitter types imported dynamically to avoid top-level await\ntype ParserClass = typeof import(\"web-tree-sitter\").Parser;\ntype LanguageClass = typeof import(\"web-tree-sitter\").Language;\n\nlet Parser: ParserClass | null = null;\nlet Language: LanguageClass | null = null;\n\nlet initPromise: Promise<void> | null = null;\nlet initialized = false;\n\n// ============================================================================\n// Language Cache\n// ============================================================================\n\nconst languageCache = new Map<TreeSitterLanguage, import(\"web-tree-sitter\").Language>();\n\n/** Grammar WASM paths keyed by language. */\nconst GRAMMAR_PATHS: Record<TreeSitterLanguage, string> = {\n\ttypescript: \"tree-sitter-typescript/tree-sitter-typescript.wasm\",\n\ttsx: \"tree-sitter-typescript/tree-sitter-tsx.wasm\",\n\tjavascript: \"tree-sitter-javascript/tree-sitter-javascript.wasm\",\n\tpython: \"tree-sitter-python/tree-sitter-python.wasm\",\n\tgo: \"tree-sitter-go/tree-sitter-go.wasm\",\n\trust: \"tree-sitter-rust/tree-sitter-rust.wasm\",\n\tjava: \"tree-sitter-java/tree-sitter-java.wasm\",\n\tc: \"tree-sitter-c/tree-sitter-c.wasm\",\n\tcpp: \"tree-sitter-cpp/tree-sitter-cpp.wasm\",\n\tgdscript: \"../grammars/tree-sitter-gdscript.wasm\",\n};\n\n// ============================================================================\n// Name Extractors\n// ============================================================================\n\n/** Get name from a node's `name` field. */\nfunction nameField(node: TSNode): string | null {\n\treturn node.childForFieldName(\"name\")?.text ?? null;\n}\n\n/** Get name for an arrow function assigned to a variable. */\nfunction arrowFunctionName(node: TSNode): string | null {\n\tconst parent = node.parent;\n\tif (parent?.type === \"variable_declarator\") {\n\t\treturn parent.childForFieldName(\"name\")?.text ?? null;\n\t}\n\treturn null;\n}\n\n/** Get name for C function_definition: name is in the function_declarator child. */\nfunction cFunctionName(node: TSNode): string | null {\n\tconst declarator = node.childForFieldName(\"declarator\");\n\tif (!declarator) return null;\n\t// function_declarator has a `declarator` field for the actual name\n\tif (declarator.type === \"function_declarator\") {\n\t\treturn declarator.childForFieldName(\"declarator\")?.text ?? null;\n\t}\n\treturn declarator.text ?? null;\n}\n\n/** Get name from an export_statement's inner declaration. */\nfunction exportName(node: TSNode): string | null {\n\tconst decl = node.childForFieldName(\"declaration\");\n\tif (!decl) {\n\t\t// Named export like `export { foo }` — use the full text isn't useful,\n\t\t// just return null for anonymous exports\n\t\treturn null;\n\t}\n\treturn decl.childForFieldName(\"name\")?.text ?? null;\n}\n\n// ============================================================================\n// Per-Language Node Extractors\n// ============================================================================\n\nconst TS_EXTRACTORS: NodeExtractor[] = [\n\t{ type: \"function_declaration\", kind: \"function\", getName: nameField },\n\t{ type: \"method_definition\", kind: \"method\", getName: nameField },\n\t{ type: \"class_declaration\", kind: \"class\", getName: nameField },\n\t{ type: \"interface_declaration\", kind: \"interface\", getName: nameField },\n\t{ type: \"type_alias_declaration\", kind: \"type_alias\", getName: nameField },\n\t{ type: \"export_statement\", kind: \"export\", getName: exportName },\n\t{ type: \"arrow_function\", kind: \"function\", getName: arrowFunctionName },\n];\n\nconst JS_EXTRACTORS: NodeExtractor[] = [\n\t{ type: \"function_declaration\", kind: \"function\", getName: nameField },\n\t{ type: \"method_definition\", kind: \"method\", getName: nameField },\n\t{ type: \"class_declaration\", kind: \"class\", getName: nameField },\n\t{ type: \"export_statement\", kind: \"export\", getName: exportName },\n\t{ type: \"arrow_function\", kind: \"function\", getName: arrowFunctionName },\n];\n\nconst PYTHON_EXTRACTORS: NodeExtractor[] = [\n\t{ type: \"function_definition\", kind: \"function\", getName: nameField },\n\t{ type: \"class_definition\", kind: \"class\", getName: nameField },\n];\n\nconst GO_EXTRACTORS: NodeExtractor[] = [\n\t{ type: \"function_declaration\", kind: \"function\", getName: nameField },\n\t{ type: \"method_declaration\", kind: \"method\", getName: nameField },\n\t{ type: \"type_spec\", kind: \"struct\", getName: nameField },\n];\n\nconst RUST_EXTRACTORS: NodeExtractor[] = [\n\t{ type: \"function_item\", kind: \"function\", getName: nameField },\n\t{ type: \"impl_item\", kind: \"impl\", getName: (n) => n.childForFieldName(\"type\")?.text ?? null },\n\t{ type: \"struct_item\", kind: \"struct\", getName: nameField },\n\t{ type: \"enum_item\", kind: \"enum\", getName: nameField },\n\t{ type: \"trait_item\", kind: \"interface\", getName: nameField },\n];\n\nconst JAVA_EXTRACTORS: NodeExtractor[] = [\n\t{ type: \"class_declaration\", kind: \"class\", getName: nameField },\n\t{ type: \"method_declaration\", kind: \"method\", getName: nameField },\n\t{ type: \"interface_declaration\", kind: \"interface\", getName: nameField },\n];\n\nconst C_EXTRACTORS: NodeExtractor[] = [\n\t{ type: \"function_definition\", kind: \"function\", getName: cFunctionName },\n\t{ type: \"struct_specifier\", kind: \"struct\", getName: nameField },\n];\n\nconst CPP_EXTRACTORS: NodeExtractor[] = [\n\t...C_EXTRACTORS,\n\t{ type: \"class_specifier\", kind: \"class\", getName: nameField },\n];\n\nconst GDSCRIPT_EXTRACTORS: NodeExtractor[] = [\n\t{ type: \"function_definition\", kind: \"function\", getName: nameField },\n\t{ type: \"class_definition\", kind: \"class\", getName: nameField },\n\t{ type: \"enum_definition\", kind: \"enum\", getName: nameField },\n];\n\nconst LANGUAGE_EXTRACTORS: Record<TreeSitterLanguage, NodeExtractor[]> = {\n\ttypescript: TS_EXTRACTORS,\n\ttsx: TS_EXTRACTORS,\n\tjavascript: JS_EXTRACTORS,\n\tpython: PYTHON_EXTRACTORS,\n\tgo: GO_EXTRACTORS,\n\trust: RUST_EXTRACTORS,\n\tjava: JAVA_EXTRACTORS,\n\tc: C_EXTRACTORS,\n\tcpp: CPP_EXTRACTORS,\n\tgdscript: GDSCRIPT_EXTRACTORS,\n};\n\n// ============================================================================\n// Initialization\n// ============================================================================\n\n/**\n * Initialize the tree-sitter WASM runtime. Must be called before parsing.\n * Safe to call multiple times — subsequent calls are no-ops.\n */\nexport async function initTreeSitter(): Promise<void> {\n\tif (initialized) return;\n\tif (initPromise) return initPromise;\n\n\tinitPromise = (async () => {\n\t\ttry {\n\t\t\tconst mod = await import(\"web-tree-sitter\");\n\t\t\tParser = mod.Parser;\n\t\t\tLanguage = mod.Language;\n\n\t\t\tconst wasmPath = require.resolve(\"web-tree-sitter/web-tree-sitter.wasm\");\n\t\t\tconst wasmBuf = readFileSync(wasmPath);\n\t\t\tawait Parser.init({ locateFile: () => wasmPath, wasmBinary: wasmBuf });\n\t\t\tinitialized = true;\n\t\t} catch (err) {\n\t\t\t// Reset so subsequent calls can retry instead of returning\n\t\t\t// the same rejected promise forever\n\t\t\tinitPromise = null;\n\t\t\tthrow err;\n\t\t}\n\t})();\n\n\treturn initPromise;\n}\n\n// ============================================================================\n// Language Loading\n// ============================================================================\n\n/** Load and cache a tree-sitter language grammar. */\nasync function loadLanguage(lang: TreeSitterLanguage): Promise<import(\"web-tree-sitter\").Language> {\n\tconst cached = languageCache.get(lang);\n\tif (cached) return cached;\n\n\tif (!Language) {\n\t\tthrow new Error(\"tree-sitter not initialized — call initTreeSitter() first\");\n\t}\n\n\tconst grammarPath = require.resolve(GRAMMAR_PATHS[lang]);\n\tconst loaded = await Language.load(grammarPath);\n\tlanguageCache.set(lang, loaded);\n\treturn loaded;\n}\n\n// ============================================================================\n// AST Extraction\n// ============================================================================\n\n/**\n * Walk the tree and collect nodes matching the target types.\n * Returns regions sorted by start position, with nested nodes skipped\n * (only outermost matches are kept).\n */\nfunction extractRegions(rootNode: TSNode, extractors: NodeExtractor[], _sourceLines: string[]): ExtractedRegion[] {\n\t// Gather all target node types\n\tconst typeToExtractors = new Map<string, NodeExtractor>();\n\tfor (const ext of extractors) {\n\t\ttypeToExtractors.set(ext.type, ext);\n\t}\n\n\tconst targetTypes = extractors.map((e) => e.type);\n\tconst candidates = rootNode.descendantsOfType(targetTypes);\n\n\t// Convert to regions\n\tconst raw: ExtractedRegion[] = [];\n\tfor (const node of candidates) {\n\t\tconst ext = typeToExtractors.get(node.type);\n\t\tif (!ext) continue;\n\n\t\t// For struct_specifier in C, only extract if it has a body (field_declaration_list)\n\t\tif (node.type === \"struct_specifier\") {\n\t\t\tconst hasBody = node.children.some((c) => c.type === \"field_declaration_list\");\n\t\t\tif (!hasBody) continue;\n\t\t}\n\n\t\t// For arrow_function, only extract if parent is variable_declarator (named assignment)\n\t\tif (node.type === \"arrow_function\") {\n\t\t\tif (node.parent?.type !== \"variable_declarator\") continue;\n\t\t}\n\n\t\tconst startLine = node.startPosition.row + 1; // 0→1 indexed\n\t\tconst endLine = node.endPosition.row + 1;\n\n\t\traw.push({\n\t\t\tname: ext.getName(node),\n\t\t\tkind: ext.kind,\n\t\t\tstartLine,\n\t\t\tendLine,\n\t\t\tcontent: node.text,\n\t\t});\n\t}\n\n\t// Sort by start line, then by end line descending (larger ranges first)\n\traw.sort((a, b) => a.startLine - b.startLine || b.endLine - a.endLine);\n\n\t// Remove nested regions — keep only outermost\n\tconst regions: ExtractedRegion[] = [];\n\tlet lastEndLine = -1;\n\n\tfor (const region of raw) {\n\t\tif (region.startLine > lastEndLine) {\n\t\t\tregions.push(region);\n\t\t\tlastEndLine = region.endLine;\n\t\t}\n\t\t// else: this region is nested inside the previous one — skip\n\t}\n\n\treturn regions;\n}\n\n// ============================================================================\n// Gap Collection\n// ============================================================================\n\n/** Minimum number of non-blank lines for a gap to become its own chunk. */\nconst MIN_GAP_LINES = 3;\n\n/**\n * Create file-level chunks for substantial code between extracted regions.\n */\nfunction collectGaps(\n\tregions: ExtractedRegion[],\n\tsourceLines: string[],\n\tfilePath: string,\n\tfileType: TreeSitterLanguage,\n): Chunk[] {\n\tconst gaps: Chunk[] = [];\n\tlet cursor = 1; // 1-indexed current line\n\n\tfor (const region of regions) {\n\t\tif (region.startLine > cursor) {\n\t\t\tconst gapLines = sourceLines.slice(cursor - 1, region.startLine - 1);\n\t\t\tconst nonBlank = gapLines.filter((l) => l.trim().length > 0).length;\n\t\t\tif (nonBlank > MIN_GAP_LINES) {\n\t\t\t\tgaps.push({\n\t\t\t\t\tfilePath,\n\t\t\t\t\tstartLine: cursor,\n\t\t\t\t\tendLine: region.startLine - 1,\n\t\t\t\t\tkind: \"file\",\n\t\t\t\t\tname: null,\n\t\t\t\t\tcontent: gapLines.join(\"\\n\"),\n\t\t\t\t\tfileType,\n\t\t\t\t});\n\t\t\t}\n\t\t}\n\t\tcursor = region.endLine + 1;\n\t}\n\n\t// Trailing gap after last region\n\tif (cursor <= sourceLines.length) {\n\t\tconst gapLines = sourceLines.slice(cursor - 1);\n\t\tconst nonBlank = gapLines.filter((l) => l.trim().length > 0).length;\n\t\tif (nonBlank > MIN_GAP_LINES) {\n\t\t\tgaps.push({\n\t\t\t\tfilePath,\n\t\t\t\tstartLine: cursor,\n\t\t\t\tendLine: sourceLines.length,\n\t\t\t\tkind: \"file\",\n\t\t\t\tname: null,\n\t\t\t\tcontent: gapLines.join(\"\\n\"),\n\t\t\t\tfileType,\n\t\t\t});\n\t\t}\n\t}\n\n\treturn gaps;\n}\n\n// ============================================================================\n// Public API\n// ============================================================================\n\n/**\n * Parse a source file with tree-sitter and extract AST-aware chunks.\n *\n * Returns chunks for functions, classes, methods, and other language-specific\n * constructs, plus file-level chunks for substantial gaps between them.\n *\n * @param content - Raw source code text\n * @param filePath - Relative file path (stored in chunk metadata)\n * @param language - Tree-sitter language identifier\n */\nexport async function chunkWithTreeSitter(\n\tcontent: string,\n\tfilePath: string,\n\tlanguage: TreeSitterLanguage,\n): Promise<Chunk[]> {\n\tif (!initialized || !Parser) {\n\t\tawait initTreeSitter();\n\t}\n\n\t// After init, Parser is guaranteed to be set\n\tconst ParserCtor = Parser!;\n\tconst lang = await loadLanguage(language);\n\tconst parser = new ParserCtor();\n\tparser.setLanguage(lang);\n\n\tconst tree = parser.parse(content);\n\tif (!tree) {\n\t\t// Parse failed — free the parser WASM memory before returning\n\t\tparser.delete();\n\t\tconst lines = content.split(\"\\n\");\n\t\treturn [\n\t\t\t{\n\t\t\t\tfilePath,\n\t\t\t\tstartLine: 1,\n\t\t\t\tendLine: lines.length,\n\t\t\t\tkind: \"file\",\n\t\t\t\tname: null,\n\t\t\t\tcontent,\n\t\t\t\tfileType: language,\n\t\t\t},\n\t\t];\n\t}\n\n\ttry {\n\t\tconst sourceLines = content.split(\"\\n\");\n\t\tconst extractors = LANGUAGE_EXTRACTORS[language];\n\t\tconst regions = extractRegions(tree.rootNode, extractors, sourceLines);\n\n\t\t// Convert regions to Chunk objects\n\t\tconst chunks: Chunk[] = regions.map((r) => ({\n\t\t\tfilePath,\n\t\t\tstartLine: r.startLine,\n\t\t\tendLine: r.endLine,\n\t\t\tkind: r.kind,\n\t\t\tname: r.name,\n\t\t\tcontent: r.content,\n\t\t\tfileType: language,\n\t\t}));\n\n\t\t// Add gap chunks\n\t\tconst gaps = collectGaps(regions, sourceLines, filePath, language);\n\n\t\t// Merge and sort by start line\n\t\tconst all = [...chunks, ...gaps];\n\t\tall.sort((a, b) => a.startLine - b.startLine);\n\n\t\t// If no regions were extracted, return the whole file as one chunk\n\t\tif (chunks.length === 0) {\n\t\t\treturn [\n\t\t\t\t{\n\t\t\t\t\tfilePath,\n\t\t\t\t\tstartLine: 1,\n\t\t\t\t\tendLine: sourceLines.length,\n\t\t\t\t\tkind: \"file\",\n\t\t\t\t\tname: null,\n\t\t\t\t\tcontent,\n\t\t\t\t\tfileType: language,\n\t\t\t\t},\n\t\t\t];\n\t\t}\n\n\t\treturn all;\n\t} finally {\n\t\ttree.delete();\n\t\tparser.delete();\n\t}\n}\n"]}
|
package/dist/types.d.ts
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* Shared types for the semantic codebase search subsystem.
|
|
3
3
|
*/
|
|
4
4
|
/** Languages supported by tree-sitter AST chunking. */
|
|
5
|
-
export type TreeSitterLanguage = "typescript" | "tsx" | "javascript" | "python" | "go" | "rust" | "java" | "c" | "cpp";
|
|
5
|
+
export type TreeSitterLanguage = "typescript" | "tsx" | "javascript" | "python" | "go" | "rust" | "java" | "c" | "cpp" | "gdscript";
|
|
6
6
|
/** Non-code file types chunked by format-specific rules. */
|
|
7
7
|
export type TextFileType = "markdown" | "yaml" | "json" | "toml" | "plaintext";
|
|
8
8
|
/** Union of all recognized file types. */
|
package/dist/types.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAMH,uDAAuD;AACvD,MAAM,MAAM,kBAAkB,
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAMH,uDAAuD;AACvD,MAAM,MAAM,kBAAkB,GAC3B,YAAY,GACZ,KAAK,GACL,YAAY,GACZ,QAAQ,GACR,IAAI,GACJ,MAAM,GACN,MAAM,GACN,GAAG,GACH,KAAK,GACL,UAAU,CAAC;AAEd,4DAA4D;AAC5D,MAAM,MAAM,YAAY,GAAG,UAAU,GAAG,MAAM,GAAG,MAAM,GAAG,MAAM,GAAG,WAAW,CAAC;AAE/E,0CAA0C;AAC1C,MAAM,MAAM,QAAQ,GAAG,kBAAkB,GAAG,YAAY,CAAC;AAMzD,qDAAqD;AACrD,MAAM,MAAM,SAAS,GAClB,UAAU,GACV,QAAQ,GACR,OAAO,GACP,WAAW,GACX,QAAQ,GACR,MAAM,GACN,MAAM,GACN,QAAQ,GACR,YAAY,GACZ,QAAQ,GACR,iBAAiB,GACjB,eAAe,GACf,WAAW,GACX,MAAM,CAAC;AAEV,qDAAqD;AACrD,MAAM,WAAW,KAAK;IACrB,4CAA4C;IAC5C,QAAQ,EAAE,MAAM,CAAC;IACjB,+CAA+C;IAC/C,SAAS,EAAE,MAAM,CAAC;IAClB,yDAAyD;IACzD,OAAO,EAAE,MAAM,CAAC;IAChB,mDAAmD;IACnD,IAAI,EAAE,SAAS,CAAC;IAChB,8FAA8F;IAC9F,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IACpB,wCAAwC;IACxC,OAAO,EAAE,MAAM,CAAC;IAChB,0BAA0B;IAC1B,QAAQ,EAAE,QAAQ,CAAC;CACnB;AAMD,0CAA0C;AAC1C,MAAM,WAAW,WAAW;IAC3B,yCAAyC;IACzC,WAAW,EAAE,MAAM,CAAC;IACpB,qDAAqD;IACrD,QAAQ,EAAE,MAAM,CAAC;IACjB,2EAA2E;IAC3E,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,uEAAuE;IACvE,WAAW,CAAC,EAAE,MAAM,EAAE,CAAC;IACvB,+DAA+D;IAC/D,SAAS,EAAE,MAAM,CAAC;CAClB;AAED,+CAA+C;AAC/C,MAAM,WAAW,WAAW;IAC3B,EAAE,EAAE,MAAM,CAAC;IACX,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,QAAQ,CAAC;CACnB;AAED,4CAA4C;AAC5C,MAAM,WAAW,WAAW;IAC3B,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,SAAS,CAAC;IAChB,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IACpB,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,QAAQ,CAAC;CACnB;AAED,8BAA8B;AAC9B,MAAM,WAAW,eAAe;IAC/B,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,YAAY,CAAC;CACrB;AAMD,uDAAuD;AACvD,MAAM,WAAW,YAAY;IAC5B,uCAAuC;IACvC,KAAK,EAAE,WAAW,CAAC;IACnB,wDAAwD;IACxD,MAAM,EAAE,YAAY,CAAC;IACrB,0EAA0E;IAC1E,IAAI,EAAE,MAAM,CAAC;CACb;AAED,uCAAuC;AACvC,MAAM,WAAW,YAAY;IAC5B,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,MAAM,CAAC;CACnB;AAED,sCAAsC;AACtC,MAAM,MAAM,UAAU,GAAG,MAAM,YAAY,CAAC;AAE5C,kDAAkD;AAClD,eAAO,MAAM,YAAY,EAAE,UAAU,EAAgF,CAAC;AAMtH,wDAAwD;AACxD,MAAM,MAAM,qBAAqB,GAAG,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,KAAK,IAAI,CAAC;AAM5F,+DAA+D;AAC/D,MAAM,WAAW,UAAU;IAC1B,2CAA2C;IAC3C,MAAM,EAAE,MAAM,CAAC;IACf,0CAA0C;IAC1C,MAAM,EAAE,MAAM,CAAC;CACf"}
|
package/dist/types.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAyIH,kDAAkD;AAClD,MAAM,CAAC,MAAM,YAAY,GAAiB,CAAC,MAAM,EAAE,QAAQ,EAAE,WAAW,EAAE,aAAa,EAAE,aAAa,EAAE,YAAY,CAAC,CAAC","sourcesContent":["/**\n * Shared types for the semantic codebase search subsystem.\n */\n\n// ============================================================================\n// Languages\n// ============================================================================\n\n/** Languages supported by tree-sitter AST chunking. */\nexport type TreeSitterLanguage =\n\t| \"typescript\"\n\t| \"tsx\"\n\t| \"javascript\"\n\t| \"python\"\n\t| \"go\"\n\t| \"rust\"\n\t| \"java\"\n\t| \"c\"\n\t| \"cpp\"\n\t| \"gdscript\";\n\n/** Non-code file types chunked by format-specific rules. */\nexport type TextFileType = \"markdown\" | \"yaml\" | \"json\" | \"toml\" | \"plaintext\";\n\n/** Union of all recognized file types. */\nexport type FileType = TreeSitterLanguage | TextFileType;\n\n// ============================================================================\n// Chunks\n// ============================================================================\n\n/** The kind of code construct a chunk represents. */\nexport type ChunkKind =\n\t| \"function\"\n\t| \"method\"\n\t| \"class\"\n\t| \"interface\"\n\t| \"struct\"\n\t| \"enum\"\n\t| \"impl\"\n\t| \"export\"\n\t| \"type_alias\"\n\t| \"module\"\n\t| \"heading_section\"\n\t| \"top_level_key\"\n\t| \"paragraph\"\n\t| \"file\";\n\n/** A chunk of code or text extracted from a file. */\nexport interface Chunk {\n\t/** Relative file path from project root. */\n\tfilePath: string;\n\t/** 1-indexed start line in the source file. */\n\tstartLine: number;\n\t/** 1-indexed end line (inclusive) in the source file. */\n\tendLine: number;\n\t/** The kind of construct this chunk represents. */\n\tkind: ChunkKind;\n\t/** Symbol name (function name, class name, heading text, etc.). Null for anonymous chunks. */\n\tname: string | null;\n\t/** The raw source text of the chunk. */\n\tcontent: string;\n\t/** Detected file type. */\n\tfileType: FileType;\n}\n\n// ============================================================================\n// Index\n// ============================================================================\n\n/** Configuration for the search index. */\nexport interface IndexConfig {\n\t/** Absolute path to the project root. */\n\tprojectRoot: string;\n\t/** Absolute path to the index database directory. */\n\tindexDir: string;\n\t/** Absolute path to the global memory directory (e.g. ~/.dreb/memory/). */\n\tglobalMemoryDir?: string;\n\t/** Additional directories to include in scans (bypasses gitignore). */\n\tvisibleDirs?: string[];\n\t/** Embedding model name (used to key the embeddings table). */\n\tmodelName: string;\n}\n\n/** Stored metadata for a file in the index. */\nexport interface IndexedFile {\n\tid: number;\n\tfilePath: string;\n\tmtime: number;\n\tfileType: FileType;\n}\n\n/** A stored chunk row from the database. */\nexport interface StoredChunk {\n\tid: number;\n\tfileId: number;\n\tfilePath: string;\n\tstartLine: number;\n\tendLine: number;\n\tkind: ChunkKind;\n\tname: string | null;\n\tcontent: string;\n\tfileType: FileType;\n}\n\n/** A stored embedding row. */\nexport interface StoredEmbedding {\n\tchunkId: number;\n\tmodelName: string;\n\tvector: Float32Array;\n}\n\n// ============================================================================\n// Search Results\n// ============================================================================\n\n/** A single search result with scores and metadata. */\nexport interface SearchResult {\n\t/** The chunk this result refers to. */\n\tchunk: StoredChunk;\n\t/** Individual metric scores (0–1, higher is better). */\n\tscores: MetricScores;\n\t/** Combined rank from POEM (lower is better, 0 = top of Pareto front). */\n\trank: number;\n}\n\n/** Scores from each ranking metric. */\nexport interface MetricScores {\n\tbm25: number;\n\tcosine: number;\n\tpathMatch: number;\n\tsymbolMatch: number;\n\timportGraph: number;\n\tgitRecency: number;\n}\n\n/** Names of the 6 ranking metrics. */\nexport type MetricName = keyof MetricScores;\n\n/** All metric names as an array for iteration. */\nexport const METRIC_NAMES: MetricName[] = [\"bm25\", \"cosine\", \"pathMatch\", \"symbolMatch\", \"importGraph\", \"gitRecency\"];\n\n// ============================================================================\n// Callbacks\n// ============================================================================\n\n/** Progress reporting callback for index operations. */\nexport type IndexProgressCallback = (phase: string, current: number, total: number) => void;\n\n// ============================================================================\n// Import Graph\n// ============================================================================\n\n/** A resolved import edge: source file imports target file. */\nexport interface ImportEdge {\n\t/** Relative path of the importing file. */\n\tsource: string;\n\t/** Relative path of the imported file. */\n\ttarget: string;\n}\n"]}
|
|
Binary file
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@dreb/semantic-search",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.5.0",
|
|
4
4
|
"description": "Semantic codebase search engine with embedding-based ranking and MCP server",
|
|
5
5
|
"publishConfig": {
|
|
6
6
|
"access": "public"
|
|
@@ -20,6 +20,7 @@
|
|
|
20
20
|
"files": [
|
|
21
21
|
"dist",
|
|
22
22
|
"bin",
|
|
23
|
+
"grammars",
|
|
23
24
|
".claude-plugin",
|
|
24
25
|
".mcp.json",
|
|
25
26
|
"skills",
|
|
@@ -49,6 +50,7 @@
|
|
|
49
50
|
"devDependencies": {
|
|
50
51
|
"@types/node": "^24.3.0",
|
|
51
52
|
"shx": "^0.4.0",
|
|
53
|
+
"tree-sitter-gdscript": "^6.1.0",
|
|
52
54
|
"typescript": "^5.9.2",
|
|
53
55
|
"vitest": "^3.2.4"
|
|
54
56
|
},
|