@dreb/semantic-search 1.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/.claude-plugin/plugin.json +17 -0
  2. package/.mcp.json +8 -0
  3. package/README.md +97 -0
  4. package/bin/server.js +14 -0
  5. package/dist/chunker.d.ts +21 -0
  6. package/dist/chunker.d.ts.map +1 -0
  7. package/dist/chunker.js +51 -0
  8. package/dist/chunker.js.map +1 -0
  9. package/dist/db.d.ts +89 -0
  10. package/dist/db.d.ts.map +1 -0
  11. package/dist/db.js +406 -0
  12. package/dist/db.js.map +1 -0
  13. package/dist/embedder.d.ts +52 -0
  14. package/dist/embedder.d.ts.map +1 -0
  15. package/dist/embedder.js +158 -0
  16. package/dist/embedder.js.map +1 -0
  17. package/dist/format.d.ts +4 -0
  18. package/dist/format.d.ts.map +1 -0
  19. package/dist/format.js +37 -0
  20. package/dist/format.js.map +1 -0
  21. package/dist/index-manager.d.ts +55 -0
  22. package/dist/index-manager.d.ts.map +1 -0
  23. package/dist/index-manager.js +311 -0
  24. package/dist/index-manager.js.map +1 -0
  25. package/dist/index.d.ts +18 -0
  26. package/dist/index.d.ts.map +1 -0
  27. package/dist/index.js +21 -0
  28. package/dist/index.js.map +1 -0
  29. package/dist/mcp-server.d.ts +25 -0
  30. package/dist/mcp-server.d.ts.map +1 -0
  31. package/dist/mcp-server.js +149 -0
  32. package/dist/mcp-server.js.map +1 -0
  33. package/dist/metrics/bm25.d.ts +10 -0
  34. package/dist/metrics/bm25.d.ts.map +1 -0
  35. package/dist/metrics/bm25.js +32 -0
  36. package/dist/metrics/bm25.js.map +1 -0
  37. package/dist/metrics/git-recency.d.ts +14 -0
  38. package/dist/metrics/git-recency.d.ts.map +1 -0
  39. package/dist/metrics/git-recency.js +123 -0
  40. package/dist/metrics/git-recency.js.map +1 -0
  41. package/dist/metrics/import-graph.d.ts +15 -0
  42. package/dist/metrics/import-graph.d.ts.map +1 -0
  43. package/dist/metrics/import-graph.js +115 -0
  44. package/dist/metrics/import-graph.js.map +1 -0
  45. package/dist/metrics/path-match.d.ts +13 -0
  46. package/dist/metrics/path-match.d.ts.map +1 -0
  47. package/dist/metrics/path-match.js +54 -0
  48. package/dist/metrics/path-match.js.map +1 -0
  49. package/dist/metrics/symbol-match.d.ts +12 -0
  50. package/dist/metrics/symbol-match.d.ts.map +1 -0
  51. package/dist/metrics/symbol-match.js +62 -0
  52. package/dist/metrics/symbol-match.js.map +1 -0
  53. package/dist/metrics/tokenize.d.ts +12 -0
  54. package/dist/metrics/tokenize.d.ts.map +1 -0
  55. package/dist/metrics/tokenize.js +29 -0
  56. package/dist/metrics/tokenize.js.map +1 -0
  57. package/dist/poem.d.ts +38 -0
  58. package/dist/poem.d.ts.map +1 -0
  59. package/dist/poem.js +214 -0
  60. package/dist/poem.js.map +1 -0
  61. package/dist/query-classifier.d.ts +17 -0
  62. package/dist/query-classifier.d.ts.map +1 -0
  63. package/dist/query-classifier.js +54 -0
  64. package/dist/query-classifier.js.map +1 -0
  65. package/dist/scanner.d.ts +30 -0
  66. package/dist/scanner.d.ts.map +1 -0
  67. package/dist/scanner.js +343 -0
  68. package/dist/scanner.js.map +1 -0
  69. package/dist/search.d.ts +63 -0
  70. package/dist/search.d.ts.map +1 -0
  71. package/dist/search.js +400 -0
  72. package/dist/search.js.map +1 -0
  73. package/dist/text-chunker.d.ts +15 -0
  74. package/dist/text-chunker.d.ts.map +1 -0
  75. package/dist/text-chunker.js +580 -0
  76. package/dist/text-chunker.js.map +1 -0
  77. package/dist/tree-sitter-chunker.d.ts +25 -0
  78. package/dist/tree-sitter-chunker.d.ts.map +1 -0
  79. package/dist/tree-sitter-chunker.js +357 -0
  80. package/dist/tree-sitter-chunker.js.map +1 -0
  81. package/dist/types.d.ts +98 -0
  82. package/dist/types.d.ts.map +1 -0
  83. package/dist/types.js +6 -0
  84. package/dist/types.js.map +1 -0
  85. package/dist/vector-store.d.ts +43 -0
  86. package/dist/vector-store.d.ts.map +1 -0
  87. package/dist/vector-store.js +73 -0
  88. package/dist/vector-store.js.map +1 -0
  89. package/package.json +71 -0
  90. package/skills/search/SKILL.md +56 -0
@@ -0,0 +1,25 @@
1
+ /**
2
+ * AST-aware code chunking using tree-sitter (WASM).
3
+ *
4
+ * Parses source files into syntax trees and extracts meaningful code constructs
5
+ * (functions, classes, methods, structs, etc.) as individual chunks. Gaps between
6
+ * extracted nodes are captured as file-level chunks when substantial.
7
+ */
8
+ import type { Chunk, TreeSitterLanguage } from "./types.js";
9
+ /**
10
+ * Initialize the tree-sitter WASM runtime. Must be called before parsing.
11
+ * Safe to call multiple times — subsequent calls are no-ops.
12
+ */
13
+ export declare function initTreeSitter(): Promise<void>;
14
+ /**
15
+ * Parse a source file with tree-sitter and extract AST-aware chunks.
16
+ *
17
+ * Returns chunks for functions, classes, methods, and other language-specific
18
+ * constructs, plus file-level chunks for substantial gaps between them.
19
+ *
20
+ * @param content - Raw source code text
21
+ * @param filePath - Relative file path (stored in chunk metadata)
22
+ * @param language - Tree-sitter language identifier
23
+ */
24
+ export declare function chunkWithTreeSitter(content: string, filePath: string, language: TreeSitterLanguage): Promise<Chunk[]>;
25
+ //# sourceMappingURL=tree-sitter-chunker.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tree-sitter-chunker.d.ts","sourceRoot":"","sources":["../src/tree-sitter-chunker.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAKH,OAAO,KAAK,EAAE,KAAK,EAAa,kBAAkB,EAAE,MAAM,YAAY,CAAC;AA8KvE;;;GAGG;AACH,wBAAsB,cAAc,IAAI,OAAO,CAAC,IAAI,CAAC,CAuBpD;AAqJD;;;;;;;;;GASG;AACH,wBAAsB,mBAAmB,CACxC,OAAO,EAAE,MAAM,EACf,QAAQ,EAAE,MAAM,EAChB,QAAQ,EAAE,kBAAkB,GAC1B,OAAO,CAAC,KAAK,EAAE,CAAC,CAwElB","sourcesContent":["/**\n * AST-aware code chunking using tree-sitter (WASM).\n *\n * Parses source files into syntax trees and extracts meaningful code constructs\n * (functions, classes, methods, structs, etc.) as individual chunks. Gaps between\n * extracted nodes are captured as file-level chunks when substantial.\n */\n\nimport { readFileSync } from \"fs\";\nimport { createRequire } from \"module\";\nimport type { Node as TSNode } from \"web-tree-sitter\";\nimport type { Chunk, ChunkKind, TreeSitterLanguage } from \"./types.js\";\n\n// Use createRequire for resolving WASM paths in ESM context\nconst require = createRequire(import.meta.url);\n\n// ============================================================================\n// Types\n// ============================================================================\n\n/** Describes which AST node types to extract for a language and how to get names. */\ninterface NodeExtractor {\n\t/** The tree-sitter node type string. */\n\ttype: string;\n\t/** The ChunkKind to assign to extracted chunks. */\n\tkind: ChunkKind;\n\t/** How to extract the symbol name from the node. */\n\tgetName: (node: TSNode) => string | null;\n}\n\n/** Intermediate representation of an extracted AST region. */\ninterface ExtractedRegion {\n\tname: string | null;\n\tkind: ChunkKind;\n\tstartLine: number; // 1-indexed\n\tendLine: number; // 1-indexed, inclusive\n\tcontent: string;\n}\n\n// ============================================================================\n// Lazy Imports\n// ============================================================================\n\n// web-tree-sitter types imported dynamically to avoid top-level await\ntype ParserClass = typeof import(\"web-tree-sitter\").Parser;\ntype LanguageClass = typeof import(\"web-tree-sitter\").Language;\n\nlet Parser: ParserClass | null = null;\nlet Language: LanguageClass | null = null;\n\nlet initPromise: Promise<void> | null = null;\nlet initialized = false;\n\n// ============================================================================\n// Language Cache\n// ============================================================================\n\nconst languageCache = new Map<TreeSitterLanguage, import(\"web-tree-sitter\").Language>();\n\n/** Grammar WASM paths keyed by language. */\nconst GRAMMAR_PATHS: Record<TreeSitterLanguage, string> = {\n\ttypescript: \"tree-sitter-typescript/tree-sitter-typescript.wasm\",\n\ttsx: \"tree-sitter-typescript/tree-sitter-tsx.wasm\",\n\tjavascript: \"tree-sitter-javascript/tree-sitter-javascript.wasm\",\n\tpython: \"tree-sitter-python/tree-sitter-python.wasm\",\n\tgo: \"tree-sitter-go/tree-sitter-go.wasm\",\n\trust: \"tree-sitter-rust/tree-sitter-rust.wasm\",\n\tjava: \"tree-sitter-java/tree-sitter-java.wasm\",\n\tc: \"tree-sitter-c/tree-sitter-c.wasm\",\n\tcpp: \"tree-sitter-cpp/tree-sitter-cpp.wasm\",\n};\n\n// ============================================================================\n// Name Extractors\n// ============================================================================\n\n/** Get name from a node's `name` field. */\nfunction nameField(node: TSNode): string | null {\n\treturn node.childForFieldName(\"name\")?.text ?? null;\n}\n\n/** Get name for an arrow function assigned to a variable. */\nfunction arrowFunctionName(node: TSNode): string | null {\n\tconst parent = node.parent;\n\tif (parent?.type === \"variable_declarator\") {\n\t\treturn parent.childForFieldName(\"name\")?.text ?? null;\n\t}\n\treturn null;\n}\n\n/** Get name for C function_definition: name is in the function_declarator child. */\nfunction cFunctionName(node: TSNode): string | null {\n\tconst declarator = node.childForFieldName(\"declarator\");\n\tif (!declarator) return null;\n\t// function_declarator has a `declarator` field for the actual name\n\tif (declarator.type === \"function_declarator\") {\n\t\treturn declarator.childForFieldName(\"declarator\")?.text ?? null;\n\t}\n\treturn declarator.text ?? null;\n}\n\n/** Get name from an export_statement's inner declaration. */\nfunction exportName(node: TSNode): string | null {\n\tconst decl = node.childForFieldName(\"declaration\");\n\tif (!decl) {\n\t\t// Named export like `export { foo }` — use the full text isn't useful,\n\t\t// just return null for anonymous exports\n\t\treturn null;\n\t}\n\treturn decl.childForFieldName(\"name\")?.text ?? null;\n}\n\n// ============================================================================\n// Per-Language Node Extractors\n// ============================================================================\n\nconst TS_EXTRACTORS: NodeExtractor[] = [\n\t{ type: \"function_declaration\", kind: \"function\", getName: nameField },\n\t{ type: \"method_definition\", kind: \"method\", getName: nameField },\n\t{ type: \"class_declaration\", kind: \"class\", getName: nameField },\n\t{ type: \"interface_declaration\", kind: \"interface\", getName: nameField },\n\t{ type: \"type_alias_declaration\", kind: \"type_alias\", getName: nameField },\n\t{ type: \"export_statement\", kind: \"export\", getName: exportName },\n\t{ type: \"arrow_function\", kind: \"function\", getName: arrowFunctionName },\n];\n\nconst JS_EXTRACTORS: NodeExtractor[] = [\n\t{ type: \"function_declaration\", kind: \"function\", getName: nameField },\n\t{ type: \"method_definition\", kind: \"method\", getName: nameField },\n\t{ type: \"class_declaration\", kind: \"class\", getName: nameField },\n\t{ type: \"export_statement\", kind: \"export\", getName: exportName },\n\t{ type: \"arrow_function\", kind: \"function\", getName: arrowFunctionName },\n];\n\nconst PYTHON_EXTRACTORS: NodeExtractor[] = [\n\t{ type: \"function_definition\", kind: \"function\", getName: nameField },\n\t{ type: \"class_definition\", kind: \"class\", getName: nameField },\n];\n\nconst GO_EXTRACTORS: NodeExtractor[] = [\n\t{ type: \"function_declaration\", kind: \"function\", getName: nameField },\n\t{ type: \"method_declaration\", kind: \"method\", getName: nameField },\n\t{ type: \"type_spec\", kind: \"struct\", getName: nameField },\n];\n\nconst RUST_EXTRACTORS: NodeExtractor[] = [\n\t{ type: \"function_item\", kind: \"function\", getName: nameField },\n\t{ type: \"impl_item\", kind: \"impl\", getName: (n) => n.childForFieldName(\"type\")?.text ?? null },\n\t{ type: \"struct_item\", kind: \"struct\", getName: nameField },\n\t{ type: \"enum_item\", kind: \"enum\", getName: nameField },\n\t{ type: \"trait_item\", kind: \"interface\", getName: nameField },\n];\n\nconst JAVA_EXTRACTORS: NodeExtractor[] = [\n\t{ type: \"class_declaration\", kind: \"class\", getName: nameField },\n\t{ type: \"method_declaration\", kind: \"method\", getName: nameField },\n\t{ type: \"interface_declaration\", kind: \"interface\", getName: nameField },\n];\n\nconst C_EXTRACTORS: NodeExtractor[] = [\n\t{ type: \"function_definition\", kind: \"function\", getName: cFunctionName },\n\t{ type: \"struct_specifier\", kind: \"struct\", getName: nameField },\n];\n\nconst CPP_EXTRACTORS: NodeExtractor[] = [\n\t...C_EXTRACTORS,\n\t{ type: \"class_specifier\", kind: \"class\", getName: nameField },\n];\n\nconst LANGUAGE_EXTRACTORS: Record<TreeSitterLanguage, NodeExtractor[]> = {\n\ttypescript: TS_EXTRACTORS,\n\ttsx: TS_EXTRACTORS,\n\tjavascript: JS_EXTRACTORS,\n\tpython: PYTHON_EXTRACTORS,\n\tgo: GO_EXTRACTORS,\n\trust: RUST_EXTRACTORS,\n\tjava: JAVA_EXTRACTORS,\n\tc: C_EXTRACTORS,\n\tcpp: CPP_EXTRACTORS,\n};\n\n// ============================================================================\n// Initialization\n// ============================================================================\n\n/**\n * Initialize the tree-sitter WASM runtime. Must be called before parsing.\n * Safe to call multiple times — subsequent calls are no-ops.\n */\nexport async function initTreeSitter(): Promise<void> {\n\tif (initialized) return;\n\tif (initPromise) return initPromise;\n\n\tinitPromise = (async () => {\n\t\ttry {\n\t\t\tconst mod = await import(\"web-tree-sitter\");\n\t\t\tParser = mod.Parser;\n\t\t\tLanguage = mod.Language;\n\n\t\t\tconst wasmPath = require.resolve(\"web-tree-sitter/web-tree-sitter.wasm\");\n\t\t\tconst wasmBuf = readFileSync(wasmPath);\n\t\t\tawait Parser.init({ locateFile: () => wasmPath, wasmBinary: wasmBuf });\n\t\t\tinitialized = true;\n\t\t} catch (err) {\n\t\t\t// Reset so subsequent calls can retry instead of returning\n\t\t\t// the same rejected promise forever\n\t\t\tinitPromise = null;\n\t\t\tthrow err;\n\t\t}\n\t})();\n\n\treturn initPromise;\n}\n\n// ============================================================================\n// Language Loading\n// ============================================================================\n\n/** Load and cache a tree-sitter language grammar. */\nasync function loadLanguage(lang: TreeSitterLanguage): Promise<import(\"web-tree-sitter\").Language> {\n\tconst cached = languageCache.get(lang);\n\tif (cached) return cached;\n\n\tif (!Language) {\n\t\tthrow new Error(\"tree-sitter not initialized — call initTreeSitter() first\");\n\t}\n\n\tconst grammarPath = require.resolve(GRAMMAR_PATHS[lang]);\n\tconst loaded = await Language.load(grammarPath);\n\tlanguageCache.set(lang, loaded);\n\treturn loaded;\n}\n\n// ============================================================================\n// AST Extraction\n// ============================================================================\n\n/**\n * Walk the tree and collect nodes matching the target types.\n * Returns regions sorted by start position, with nested nodes skipped\n * (only outermost matches are kept).\n */\nfunction extractRegions(rootNode: TSNode, extractors: NodeExtractor[], _sourceLines: string[]): ExtractedRegion[] {\n\t// Gather all target node types\n\tconst typeToExtractors = new Map<string, NodeExtractor>();\n\tfor (const ext of extractors) {\n\t\ttypeToExtractors.set(ext.type, ext);\n\t}\n\n\tconst targetTypes = extractors.map((e) => e.type);\n\tconst candidates = rootNode.descendantsOfType(targetTypes);\n\n\t// Convert to regions\n\tconst raw: ExtractedRegion[] = [];\n\tfor (const node of candidates) {\n\t\tconst ext = typeToExtractors.get(node.type);\n\t\tif (!ext) continue;\n\n\t\t// For struct_specifier in C, only extract if it has a body (field_declaration_list)\n\t\tif (node.type === \"struct_specifier\") {\n\t\t\tconst hasBody = node.children.some((c) => c.type === \"field_declaration_list\");\n\t\t\tif (!hasBody) continue;\n\t\t}\n\n\t\t// For arrow_function, only extract if parent is variable_declarator (named assignment)\n\t\tif (node.type === \"arrow_function\") {\n\t\t\tif (node.parent?.type !== \"variable_declarator\") continue;\n\t\t}\n\n\t\tconst startLine = node.startPosition.row + 1; // 0→1 indexed\n\t\tconst endLine = node.endPosition.row + 1;\n\n\t\traw.push({\n\t\t\tname: ext.getName(node),\n\t\t\tkind: ext.kind,\n\t\t\tstartLine,\n\t\t\tendLine,\n\t\t\tcontent: node.text,\n\t\t});\n\t}\n\n\t// Sort by start line, then by end line descending (larger ranges first)\n\traw.sort((a, b) => a.startLine - b.startLine || b.endLine - a.endLine);\n\n\t// Remove nested regions — keep only outermost\n\tconst regions: ExtractedRegion[] = [];\n\tlet lastEndLine = -1;\n\n\tfor (const region of raw) {\n\t\tif (region.startLine > lastEndLine) {\n\t\t\tregions.push(region);\n\t\t\tlastEndLine = region.endLine;\n\t\t}\n\t\t// else: this region is nested inside the previous one — skip\n\t}\n\n\treturn regions;\n}\n\n// ============================================================================\n// Gap Collection\n// ============================================================================\n\n/** Minimum number of non-blank lines for a gap to become its own chunk. */\nconst MIN_GAP_LINES = 3;\n\n/**\n * Create file-level chunks for substantial code between extracted regions.\n */\nfunction collectGaps(\n\tregions: ExtractedRegion[],\n\tsourceLines: string[],\n\tfilePath: string,\n\tfileType: TreeSitterLanguage,\n): Chunk[] {\n\tconst gaps: Chunk[] = [];\n\tlet cursor = 1; // 1-indexed current line\n\n\tfor (const region of regions) {\n\t\tif (region.startLine > cursor) {\n\t\t\tconst gapLines = sourceLines.slice(cursor - 1, region.startLine - 1);\n\t\t\tconst nonBlank = gapLines.filter((l) => l.trim().length > 0).length;\n\t\t\tif (nonBlank > MIN_GAP_LINES) {\n\t\t\t\tgaps.push({\n\t\t\t\t\tfilePath,\n\t\t\t\t\tstartLine: cursor,\n\t\t\t\t\tendLine: region.startLine - 1,\n\t\t\t\t\tkind: \"file\",\n\t\t\t\t\tname: null,\n\t\t\t\t\tcontent: gapLines.join(\"\\n\"),\n\t\t\t\t\tfileType,\n\t\t\t\t});\n\t\t\t}\n\t\t}\n\t\tcursor = region.endLine + 1;\n\t}\n\n\t// Trailing gap after last region\n\tif (cursor <= sourceLines.length) {\n\t\tconst gapLines = sourceLines.slice(cursor - 1);\n\t\tconst nonBlank = gapLines.filter((l) => l.trim().length > 0).length;\n\t\tif (nonBlank > MIN_GAP_LINES) {\n\t\t\tgaps.push({\n\t\t\t\tfilePath,\n\t\t\t\tstartLine: cursor,\n\t\t\t\tendLine: sourceLines.length,\n\t\t\t\tkind: \"file\",\n\t\t\t\tname: null,\n\t\t\t\tcontent: gapLines.join(\"\\n\"),\n\t\t\t\tfileType,\n\t\t\t});\n\t\t}\n\t}\n\n\treturn gaps;\n}\n\n// ============================================================================\n// Public API\n// ============================================================================\n\n/**\n * Parse a source file with tree-sitter and extract AST-aware chunks.\n *\n * Returns chunks for functions, classes, methods, and other language-specific\n * constructs, plus file-level chunks for substantial gaps between them.\n *\n * @param content - Raw source code text\n * @param filePath - Relative file path (stored in chunk metadata)\n * @param language - Tree-sitter language identifier\n */\nexport async function chunkWithTreeSitter(\n\tcontent: string,\n\tfilePath: string,\n\tlanguage: TreeSitterLanguage,\n): Promise<Chunk[]> {\n\tif (!initialized || !Parser) {\n\t\tawait initTreeSitter();\n\t}\n\n\t// After init, Parser is guaranteed to be set\n\tconst ParserCtor = Parser!;\n\tconst lang = await loadLanguage(language);\n\tconst parser = new ParserCtor();\n\tparser.setLanguage(lang);\n\n\tconst tree = parser.parse(content);\n\tif (!tree) {\n\t\t// Parse failed — free the parser WASM memory before returning\n\t\tparser.delete();\n\t\tconst lines = content.split(\"\\n\");\n\t\treturn [\n\t\t\t{\n\t\t\t\tfilePath,\n\t\t\t\tstartLine: 1,\n\t\t\t\tendLine: lines.length,\n\t\t\t\tkind: \"file\",\n\t\t\t\tname: null,\n\t\t\t\tcontent,\n\t\t\t\tfileType: language,\n\t\t\t},\n\t\t];\n\t}\n\n\ttry {\n\t\tconst sourceLines = content.split(\"\\n\");\n\t\tconst extractors = LANGUAGE_EXTRACTORS[language];\n\t\tconst regions = extractRegions(tree.rootNode, extractors, sourceLines);\n\n\t\t// Convert regions to Chunk objects\n\t\tconst chunks: Chunk[] = regions.map((r) => ({\n\t\t\tfilePath,\n\t\t\tstartLine: r.startLine,\n\t\t\tendLine: r.endLine,\n\t\t\tkind: r.kind,\n\t\t\tname: r.name,\n\t\t\tcontent: r.content,\n\t\t\tfileType: language,\n\t\t}));\n\n\t\t// Add gap chunks\n\t\tconst gaps = collectGaps(regions, sourceLines, filePath, language);\n\n\t\t// Merge and sort by start line\n\t\tconst all = [...chunks, ...gaps];\n\t\tall.sort((a, b) => a.startLine - b.startLine);\n\n\t\t// If no regions were extracted, return the whole file as one chunk\n\t\tif (chunks.length === 0) {\n\t\t\treturn [\n\t\t\t\t{\n\t\t\t\t\tfilePath,\n\t\t\t\t\tstartLine: 1,\n\t\t\t\t\tendLine: sourceLines.length,\n\t\t\t\t\tkind: \"file\",\n\t\t\t\t\tname: null,\n\t\t\t\t\tcontent,\n\t\t\t\t\tfileType: language,\n\t\t\t\t},\n\t\t\t];\n\t\t}\n\n\t\treturn all;\n\t} finally {\n\t\ttree.delete();\n\t\tparser.delete();\n\t}\n}\n"]}
@@ -0,0 +1,357 @@
1
+ /**
2
+ * AST-aware code chunking using tree-sitter (WASM).
3
+ *
4
+ * Parses source files into syntax trees and extracts meaningful code constructs
5
+ * (functions, classes, methods, structs, etc.) as individual chunks. Gaps between
6
+ * extracted nodes are captured as file-level chunks when substantial.
7
+ */
8
+ import { readFileSync } from "fs";
9
+ import { createRequire } from "module";
10
+ // Use createRequire for resolving WASM paths in ESM context
11
+ const require = createRequire(import.meta.url);
12
+ let Parser = null;
13
+ let Language = null;
14
+ let initPromise = null;
15
+ let initialized = false;
16
+ // ============================================================================
17
+ // Language Cache
18
+ // ============================================================================
19
+ const languageCache = new Map();
20
+ /** Grammar WASM paths keyed by language. */
21
+ const GRAMMAR_PATHS = {
22
+ typescript: "tree-sitter-typescript/tree-sitter-typescript.wasm",
23
+ tsx: "tree-sitter-typescript/tree-sitter-tsx.wasm",
24
+ javascript: "tree-sitter-javascript/tree-sitter-javascript.wasm",
25
+ python: "tree-sitter-python/tree-sitter-python.wasm",
26
+ go: "tree-sitter-go/tree-sitter-go.wasm",
27
+ rust: "tree-sitter-rust/tree-sitter-rust.wasm",
28
+ java: "tree-sitter-java/tree-sitter-java.wasm",
29
+ c: "tree-sitter-c/tree-sitter-c.wasm",
30
+ cpp: "tree-sitter-cpp/tree-sitter-cpp.wasm",
31
+ };
32
+ // ============================================================================
33
+ // Name Extractors
34
+ // ============================================================================
35
+ /** Get name from a node's `name` field. */
36
+ function nameField(node) {
37
+ return node.childForFieldName("name")?.text ?? null;
38
+ }
39
+ /** Get name for an arrow function assigned to a variable. */
40
+ function arrowFunctionName(node) {
41
+ const parent = node.parent;
42
+ if (parent?.type === "variable_declarator") {
43
+ return parent.childForFieldName("name")?.text ?? null;
44
+ }
45
+ return null;
46
+ }
47
+ /** Get name for C function_definition: name is in the function_declarator child. */
48
+ function cFunctionName(node) {
49
+ const declarator = node.childForFieldName("declarator");
50
+ if (!declarator)
51
+ return null;
52
+ // function_declarator has a `declarator` field for the actual name
53
+ if (declarator.type === "function_declarator") {
54
+ return declarator.childForFieldName("declarator")?.text ?? null;
55
+ }
56
+ return declarator.text ?? null;
57
+ }
58
+ /** Get name from an export_statement's inner declaration. */
59
+ function exportName(node) {
60
+ const decl = node.childForFieldName("declaration");
61
+ if (!decl) {
62
+ // Named export like `export { foo }` — use the full text isn't useful,
63
+ // just return null for anonymous exports
64
+ return null;
65
+ }
66
+ return decl.childForFieldName("name")?.text ?? null;
67
+ }
68
+ // ============================================================================
69
+ // Per-Language Node Extractors
70
+ // ============================================================================
71
+ const TS_EXTRACTORS = [
72
+ { type: "function_declaration", kind: "function", getName: nameField },
73
+ { type: "method_definition", kind: "method", getName: nameField },
74
+ { type: "class_declaration", kind: "class", getName: nameField },
75
+ { type: "interface_declaration", kind: "interface", getName: nameField },
76
+ { type: "type_alias_declaration", kind: "type_alias", getName: nameField },
77
+ { type: "export_statement", kind: "export", getName: exportName },
78
+ { type: "arrow_function", kind: "function", getName: arrowFunctionName },
79
+ ];
80
+ const JS_EXTRACTORS = [
81
+ { type: "function_declaration", kind: "function", getName: nameField },
82
+ { type: "method_definition", kind: "method", getName: nameField },
83
+ { type: "class_declaration", kind: "class", getName: nameField },
84
+ { type: "export_statement", kind: "export", getName: exportName },
85
+ { type: "arrow_function", kind: "function", getName: arrowFunctionName },
86
+ ];
87
+ const PYTHON_EXTRACTORS = [
88
+ { type: "function_definition", kind: "function", getName: nameField },
89
+ { type: "class_definition", kind: "class", getName: nameField },
90
+ ];
91
+ const GO_EXTRACTORS = [
92
+ { type: "function_declaration", kind: "function", getName: nameField },
93
+ { type: "method_declaration", kind: "method", getName: nameField },
94
+ { type: "type_spec", kind: "struct", getName: nameField },
95
+ ];
96
+ const RUST_EXTRACTORS = [
97
+ { type: "function_item", kind: "function", getName: nameField },
98
+ { type: "impl_item", kind: "impl", getName: (n) => n.childForFieldName("type")?.text ?? null },
99
+ { type: "struct_item", kind: "struct", getName: nameField },
100
+ { type: "enum_item", kind: "enum", getName: nameField },
101
+ { type: "trait_item", kind: "interface", getName: nameField },
102
+ ];
103
+ const JAVA_EXTRACTORS = [
104
+ { type: "class_declaration", kind: "class", getName: nameField },
105
+ { type: "method_declaration", kind: "method", getName: nameField },
106
+ { type: "interface_declaration", kind: "interface", getName: nameField },
107
+ ];
108
+ const C_EXTRACTORS = [
109
+ { type: "function_definition", kind: "function", getName: cFunctionName },
110
+ { type: "struct_specifier", kind: "struct", getName: nameField },
111
+ ];
112
+ const CPP_EXTRACTORS = [
113
+ ...C_EXTRACTORS,
114
+ { type: "class_specifier", kind: "class", getName: nameField },
115
+ ];
116
+ const LANGUAGE_EXTRACTORS = {
117
+ typescript: TS_EXTRACTORS,
118
+ tsx: TS_EXTRACTORS,
119
+ javascript: JS_EXTRACTORS,
120
+ python: PYTHON_EXTRACTORS,
121
+ go: GO_EXTRACTORS,
122
+ rust: RUST_EXTRACTORS,
123
+ java: JAVA_EXTRACTORS,
124
+ c: C_EXTRACTORS,
125
+ cpp: CPP_EXTRACTORS,
126
+ };
127
+ // ============================================================================
128
+ // Initialization
129
+ // ============================================================================
130
+ /**
131
+ * Initialize the tree-sitter WASM runtime. Must be called before parsing.
132
+ * Safe to call multiple times — subsequent calls are no-ops.
133
+ */
134
+ export async function initTreeSitter() {
135
+ if (initialized)
136
+ return;
137
+ if (initPromise)
138
+ return initPromise;
139
+ initPromise = (async () => {
140
+ try {
141
+ const mod = await import("web-tree-sitter");
142
+ Parser = mod.Parser;
143
+ Language = mod.Language;
144
+ const wasmPath = require.resolve("web-tree-sitter/web-tree-sitter.wasm");
145
+ const wasmBuf = readFileSync(wasmPath);
146
+ await Parser.init({ locateFile: () => wasmPath, wasmBinary: wasmBuf });
147
+ initialized = true;
148
+ }
149
+ catch (err) {
150
+ // Reset so subsequent calls can retry instead of returning
151
+ // the same rejected promise forever
152
+ initPromise = null;
153
+ throw err;
154
+ }
155
+ })();
156
+ return initPromise;
157
+ }
158
+ // ============================================================================
159
+ // Language Loading
160
+ // ============================================================================
161
+ /** Load and cache a tree-sitter language grammar. */
162
+ async function loadLanguage(lang) {
163
+ const cached = languageCache.get(lang);
164
+ if (cached)
165
+ return cached;
166
+ if (!Language) {
167
+ throw new Error("tree-sitter not initialized — call initTreeSitter() first");
168
+ }
169
+ const grammarPath = require.resolve(GRAMMAR_PATHS[lang]);
170
+ const loaded = await Language.load(grammarPath);
171
+ languageCache.set(lang, loaded);
172
+ return loaded;
173
+ }
174
+ // ============================================================================
175
+ // AST Extraction
176
+ // ============================================================================
177
+ /**
178
+ * Walk the tree and collect nodes matching the target types.
179
+ * Returns regions sorted by start position, with nested nodes skipped
180
+ * (only outermost matches are kept).
181
+ */
182
+ function extractRegions(rootNode, extractors, _sourceLines) {
183
+ // Gather all target node types
184
+ const typeToExtractors = new Map();
185
+ for (const ext of extractors) {
186
+ typeToExtractors.set(ext.type, ext);
187
+ }
188
+ const targetTypes = extractors.map((e) => e.type);
189
+ const candidates = rootNode.descendantsOfType(targetTypes);
190
+ // Convert to regions
191
+ const raw = [];
192
+ for (const node of candidates) {
193
+ const ext = typeToExtractors.get(node.type);
194
+ if (!ext)
195
+ continue;
196
+ // For struct_specifier in C, only extract if it has a body (field_declaration_list)
197
+ if (node.type === "struct_specifier") {
198
+ const hasBody = node.children.some((c) => c.type === "field_declaration_list");
199
+ if (!hasBody)
200
+ continue;
201
+ }
202
+ // For arrow_function, only extract if parent is variable_declarator (named assignment)
203
+ if (node.type === "arrow_function") {
204
+ if (node.parent?.type !== "variable_declarator")
205
+ continue;
206
+ }
207
+ const startLine = node.startPosition.row + 1; // 0→1 indexed
208
+ const endLine = node.endPosition.row + 1;
209
+ raw.push({
210
+ name: ext.getName(node),
211
+ kind: ext.kind,
212
+ startLine,
213
+ endLine,
214
+ content: node.text,
215
+ });
216
+ }
217
+ // Sort by start line, then by end line descending (larger ranges first)
218
+ raw.sort((a, b) => a.startLine - b.startLine || b.endLine - a.endLine);
219
+ // Remove nested regions — keep only outermost
220
+ const regions = [];
221
+ let lastEndLine = -1;
222
+ for (const region of raw) {
223
+ if (region.startLine > lastEndLine) {
224
+ regions.push(region);
225
+ lastEndLine = region.endLine;
226
+ }
227
+ // else: this region is nested inside the previous one — skip
228
+ }
229
+ return regions;
230
+ }
231
+ // ============================================================================
232
+ // Gap Collection
233
+ // ============================================================================
234
+ /** Minimum number of non-blank lines for a gap to become its own chunk. */
235
+ const MIN_GAP_LINES = 3;
236
+ /**
237
+ * Create file-level chunks for substantial code between extracted regions.
238
+ */
239
+ function collectGaps(regions, sourceLines, filePath, fileType) {
240
+ const gaps = [];
241
+ let cursor = 1; // 1-indexed current line
242
+ for (const region of regions) {
243
+ if (region.startLine > cursor) {
244
+ const gapLines = sourceLines.slice(cursor - 1, region.startLine - 1);
245
+ const nonBlank = gapLines.filter((l) => l.trim().length > 0).length;
246
+ if (nonBlank > MIN_GAP_LINES) {
247
+ gaps.push({
248
+ filePath,
249
+ startLine: cursor,
250
+ endLine: region.startLine - 1,
251
+ kind: "file",
252
+ name: null,
253
+ content: gapLines.join("\n"),
254
+ fileType,
255
+ });
256
+ }
257
+ }
258
+ cursor = region.endLine + 1;
259
+ }
260
+ // Trailing gap after last region
261
+ if (cursor <= sourceLines.length) {
262
+ const gapLines = sourceLines.slice(cursor - 1);
263
+ const nonBlank = gapLines.filter((l) => l.trim().length > 0).length;
264
+ if (nonBlank > MIN_GAP_LINES) {
265
+ gaps.push({
266
+ filePath,
267
+ startLine: cursor,
268
+ endLine: sourceLines.length,
269
+ kind: "file",
270
+ name: null,
271
+ content: gapLines.join("\n"),
272
+ fileType,
273
+ });
274
+ }
275
+ }
276
+ return gaps;
277
+ }
278
+ // ============================================================================
279
+ // Public API
280
+ // ============================================================================
281
+ /**
282
+ * Parse a source file with tree-sitter and extract AST-aware chunks.
283
+ *
284
+ * Returns chunks for functions, classes, methods, and other language-specific
285
+ * constructs, plus file-level chunks for substantial gaps between them.
286
+ *
287
+ * @param content - Raw source code text
288
+ * @param filePath - Relative file path (stored in chunk metadata)
289
+ * @param language - Tree-sitter language identifier
290
+ */
291
+ export async function chunkWithTreeSitter(content, filePath, language) {
292
+ if (!initialized || !Parser) {
293
+ await initTreeSitter();
294
+ }
295
+ // After init, Parser is guaranteed to be set
296
+ const ParserCtor = Parser;
297
+ const lang = await loadLanguage(language);
298
+ const parser = new ParserCtor();
299
+ parser.setLanguage(lang);
300
+ const tree = parser.parse(content);
301
+ if (!tree) {
302
+ // Parse failed — free the parser WASM memory before returning
303
+ parser.delete();
304
+ const lines = content.split("\n");
305
+ return [
306
+ {
307
+ filePath,
308
+ startLine: 1,
309
+ endLine: lines.length,
310
+ kind: "file",
311
+ name: null,
312
+ content,
313
+ fileType: language,
314
+ },
315
+ ];
316
+ }
317
+ try {
318
+ const sourceLines = content.split("\n");
319
+ const extractors = LANGUAGE_EXTRACTORS[language];
320
+ const regions = extractRegions(tree.rootNode, extractors, sourceLines);
321
+ // Convert regions to Chunk objects
322
+ const chunks = regions.map((r) => ({
323
+ filePath,
324
+ startLine: r.startLine,
325
+ endLine: r.endLine,
326
+ kind: r.kind,
327
+ name: r.name,
328
+ content: r.content,
329
+ fileType: language,
330
+ }));
331
+ // Add gap chunks
332
+ const gaps = collectGaps(regions, sourceLines, filePath, language);
333
+ // Merge and sort by start line
334
+ const all = [...chunks, ...gaps];
335
+ all.sort((a, b) => a.startLine - b.startLine);
336
+ // If no regions were extracted, return the whole file as one chunk
337
+ if (chunks.length === 0) {
338
+ return [
339
+ {
340
+ filePath,
341
+ startLine: 1,
342
+ endLine: sourceLines.length,
343
+ kind: "file",
344
+ name: null,
345
+ content,
346
+ fileType: language,
347
+ },
348
+ ];
349
+ }
350
+ return all;
351
+ }
352
+ finally {
353
+ tree.delete();
354
+ parser.delete();
355
+ }
356
+ }
357
+ //# sourceMappingURL=tree-sitter-chunker.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tree-sitter-chunker.js","sourceRoot":"","sources":["../src/tree-sitter-chunker.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,IAAI,CAAC;AAClC,OAAO,EAAE,aAAa,EAAE,MAAM,QAAQ,CAAC;AAIvC,4DAA4D;AAC5D,MAAM,OAAO,GAAG,aAAa,CAAC,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC;AAiC/C,IAAI,MAAM,GAAuB,IAAI,CAAC;AACtC,IAAI,QAAQ,GAAyB,IAAI,CAAC;AAE1C,IAAI,WAAW,GAAyB,IAAI,CAAC;AAC7C,IAAI,WAAW,GAAG,KAAK,CAAC;AAExB,+EAA+E;AAC/E,iBAAiB;AACjB,+EAA+E;AAE/E,MAAM,aAAa,GAAG,IAAI,GAAG,EAA0D,CAAC;AAExF,4CAA4C;AAC5C,MAAM,aAAa,GAAuC;IACzD,UAAU,EAAE,oDAAoD;IAChE,GAAG,EAAE,6CAA6C;IAClD,UAAU,EAAE,oDAAoD;IAChE,MAAM,EAAE,4CAA4C;IACpD,EAAE,EAAE,oCAAoC;IACxC,IAAI,EAAE,wCAAwC;IAC9C,IAAI,EAAE,wCAAwC;IAC9C,CAAC,EAAE,kCAAkC;IACrC,GAAG,EAAE,sCAAsC;CAC3C,CAAC;AAEF,+EAA+E;AAC/E,kBAAkB;AAClB,+EAA+E;AAE/E,2CAA2C;AAC3C,SAAS,SAAS,CAAC,IAAY,EAAiB;IAC/C,OAAO,IAAI,CAAC,iBAAiB,CAAC,MAAM,CAAC,EAAE,IAAI,IAAI,IAAI,CAAC;AAAA,CACpD;AAED,6DAA6D;AAC7D,SAAS,iBAAiB,CAAC,IAAY,EAAiB;IACvD,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC;IAC3B,IAAI,MAAM,EAAE,IAAI,KAAK,qBAAqB,EAAE,CAAC;QAC5C,OAAO,MAAM,CAAC,iBAAiB,CAAC,MAAM,CAAC,EAAE,IAAI,IAAI,IAAI,CAAC;IACvD,CAAC;IACD,OAAO,IAAI,CAAC;AAAA,CACZ;AAED,oFAAoF;AACpF,SAAS,aAAa,CAAC,IAAY,EAAiB;IACnD,MAAM,UAAU,GAAG,IAAI,CAAC,iBAAiB,CAAC,YAAY,CAAC,CAAC;IACxD,IAAI,CAAC,UAAU;QAAE,OAAO,IAAI,CAAC;IAC7B,mEAAmE;IACnE,IAAI,UAAU,CAAC,IAAI,KAAK,qBAAqB,EAAE,CAAC;QAC/C,OAAO,UAAU,CAAC,iBAAiB,CAAC,YAAY,CAAC,EAAE,IAAI,IAAI,IAAI,CAAC;IACjE,CAAC;IACD,OAAO,UAAU,CAAC,IAAI,IAAI,IAAI,CAAC;AAAA,CAC/B;AAED,6DAA6D;AAC7D,SAAS,UAAU,CAAC,IAAY,EAAiB;IAChD,MAAM,IAAI,GAAG,IAAI,CAAC,iBAAiB,CAAC,aAAa,CAAC,CAAC;IACnD,IAAI,CAAC,IAAI,EAAE,CAAC;QACX,yEAAuE;QACvE,yCAAyC;QACzC,OAAO,IAAI,CAAC;IACb,CAAC;IACD,OAAO,IAAI,CAAC,iBAAiB,CAAC,MAAM,CAAC,EAAE,IAAI,IAAI,IAAI,CAAC;AAAA,CACpD;AAED,+EAA+E;AAC/E,+BAA+B;AAC/B,+EAA+E;AAE/E,MAAM,aAAa,GAAoB;IACtC,EAAE,IAAI,EAAE,sBAAsB,EAAE,IAAI,EAAE,UAAU,EAAE,OAAO,EAAE,SAAS,EAAE;IACtE,EAAE,IAAI,EAAE,mBAAmB,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,SAAS,EAAE;IACjE,EAAE,IAAI,EAAE,mBAAmB,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE;IAChE,EAAE,IAAI,EAAE,uBAAuB,EAAE,IAAI,EAAE,WAAW,EAAE,OAAO,EAAE,SAAS,EAAE;IACxE,EAAE,IAAI,EAAE,wBAAwB,EAAE,IAAI,EAAE,YAAY,EAAE,OAAO,EAAE,SAAS,EAAE;IAC1E,EAAE,IAAI,EAAE,kBAAkB,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,UAAU,EAAE;IACjE,EAAE,IAAI,EAAE,gBAAgB,EAAE,IAAI,EAAE,UAAU,EAAE,OAAO,EAAE,iBAAiB,EAAE;CACxE,CAAC;AAEF,MAAM,aAAa,GAAoB;IACtC,EAAE,IAAI,EAAE,sBAAsB,EAAE,IAAI,EAAE,UAAU,EAAE,OAAO,EAAE,SAAS,EAAE;IACtE,EAAE,IAAI,EAAE,mBAAmB,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,SAAS,EAAE;IACjE,EAAE,IAAI,EAAE,mBAAmB,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE;IAChE,EAAE,IAAI,EAAE,kBAAkB,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,UAAU,EAAE;IACjE,EAAE,IAAI,EAAE,gBAAgB,EAAE,IAAI,EAAE,UAAU,EAAE,OAAO,EAAE,iBAAiB,EAAE;CACxE,CAAC;AAEF,MAAM,iBAAiB,GAAoB;IAC1C,EAAE,IAAI,EAAE,qBAAqB,EAAE,IAAI,EAAE,UAAU,EAAE,OAAO,EAAE,SAAS,EAAE;IACrE,EAAE,IAAI,EAAE,kBAAkB,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE;CAC/D,CAAC;AAEF,MAAM,aAAa,GAAoB;IACtC,EAAE,IAAI,EAAE,sBAAsB,EAAE,IAAI,EAAE,UAAU,EAAE,OAAO,EAAE,SAAS,EAAE;IACtE,EAAE,IAAI,EAAE,oBAAoB,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,SAAS,EAAE;IAClE,EAAE,IAAI,EAAE,WAAW,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,SAAS,EAAE;CACzD,CAAC;AAEF,MAAM,eAAe,GAAoB;IACxC,EAAE,IAAI,EAAE,eAAe,EAAE,IAAI,EAAE,UAAU,EAAE,OAAO,EAAE,SAAS,EAAE;IAC/D,EAAE,IAAI,EAAE,WAAW,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,iBAAiB,CAAC,MAAM,CAAC,EAAE,IAAI,IAAI,IAAI,EAAE;IAC9F,EAAE,IAAI,EAAE,aAAa,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,SAAS,EAAE;IAC3D,EAAE,IAAI,EAAE,WAAW,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE;IACvD,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,WAAW,EAAE,OAAO,EAAE,SAAS,EAAE;CAC7D,CAAC;AAEF,MAAM,eAAe,GAAoB;IACxC,EAAE,IAAI,EAAE,mBAAmB,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE;IAChE,EAAE,IAAI,EAAE,oBAAoB,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,SAAS,EAAE;IAClE,EAAE,IAAI,EAAE,uBAAuB,EAAE,IAAI,EAAE,WAAW,EAAE,OAAO,EAAE,SAAS,EAAE;CACxE,CAAC;AAEF,MAAM,YAAY,GAAoB;IACrC,EAAE,IAAI,EAAE,qBAAqB,EAAE,IAAI,EAAE,UAAU,EAAE,OAAO,EAAE,aAAa,EAAE;IACzE,EAAE,IAAI,EAAE,kBAAkB,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,SAAS,EAAE;CAChE,CAAC;AAEF,MAAM,cAAc,GAAoB;IACvC,GAAG,YAAY;IACf,EAAE,IAAI,EAAE,iBAAiB,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE;CAC9D,CAAC;AAEF,MAAM,mBAAmB,GAAgD;IACxE,UAAU,EAAE,aAAa;IACzB,GAAG,EAAE,aAAa;IAClB,UAAU,EAAE,aAAa;IACzB,MAAM,EAAE,iBAAiB;IACzB,EAAE,EAAE,aAAa;IACjB,IAAI,EAAE,eAAe;IACrB,IAAI,EAAE,eAAe;IACrB,CAAC,EAAE,YAAY;IACf,GAAG,EAAE,cAAc;CACnB,CAAC;AAEF,+EAA+E;AAC/E,iBAAiB;AACjB,+EAA+E;AAE/E;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,GAAkB;IACrD,IAAI,WAAW;QAAE,OAAO;IACxB,IAAI,WAAW;QAAE,OAAO,WAAW,CAAC;IAEpC,WAAW,GAAG,CAAC,KAAK,IAAI,EAAE,CAAC;QAC1B,IAAI,CAAC;YACJ,MAAM,GAAG,GAAG,MAAM,MAAM,CAAC,iBAAiB,CAAC,CAAC;YAC5C,MAAM,GAAG,GAAG,CAAC,MAAM,CAAC;YACpB,QAAQ,GAAG,GAAG,CAAC,QAAQ,CAAC;YAExB,MAAM,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAC,sCAAsC,CAAC,CAAC;YACzE,MAAM,OAAO,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;YACvC,MAAM,MAAM,CAAC,IAAI,CAAC,EAAE,UAAU,EAAE,GAAG,EAAE,CAAC,QAAQ,EAAE,UAAU,EAAE,OAAO,EAAE,CAAC,CAAC;YACvE,WAAW,GAAG,IAAI,CAAC;QACpB,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACd,2DAA2D;YAC3D,oCAAoC;YACpC,WAAW,GAAG,IAAI,CAAC;YACnB,MAAM,GAAG,CAAC;QACX,CAAC;IAAA,CACD,CAAC,EAAE,CAAC;IAEL,OAAO,WAAW,CAAC;AAAA,CACnB;AAED,+EAA+E;AAC/E,mBAAmB;AACnB,+EAA+E;AAE/E,qDAAqD;AACrD,KAAK,UAAU,YAAY,CAAC,IAAwB,EAA+C;IAClG,MAAM,MAAM,GAAG,aAAa,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IACvC,IAAI,MAAM;QAAE,OAAO,MAAM,CAAC;IAE1B,IAAI,CAAC,QAAQ,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CAAC,6DAA2D,CAAC,CAAC;IAC9E,CAAC;IAED,MAAM,WAAW,GAAG,OAAO,CAAC,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC,CAAC;IACzD,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IAChD,aAAa,CAAC,GAAG,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;IAChC,OAAO,MAAM,CAAC;AAAA,CACd;AAED,+EAA+E;AAC/E,iBAAiB;AACjB,+EAA+E;AAE/E;;;;GAIG;AACH,SAAS,cAAc,CAAC,QAAgB,EAAE,UAA2B,EAAE,YAAsB,EAAqB;IACjH,+BAA+B;IAC/B,MAAM,gBAAgB,GAAG,IAAI,GAAG,EAAyB,CAAC;IAC1D,KAAK,MAAM,GAAG,IAAI,UAAU,EAAE,CAAC;QAC9B,gBAAgB,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;IACrC,CAAC;IAED,MAAM,WAAW,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;IAClD,MAAM,UAAU,GAAG,QAAQ,CAAC,iBAAiB,CAAC,WAAW,CAAC,CAAC;IAE3D,qBAAqB;IACrB,MAAM,GAAG,GAAsB,EAAE,CAAC;IAClC,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;QAC/B,MAAM,GAAG,GAAG,gBAAgB,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC5C,IAAI,CAAC,GAAG;YAAE,SAAS;QAEnB,oFAAoF;QACpF,IAAI,IAAI,CAAC,IAAI,KAAK,kBAAkB,EAAE,CAAC;YACtC,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,wBAAwB,CAAC,CAAC;YAC/E,IAAI,CAAC,OAAO;gBAAE,SAAS;QACxB,CAAC;QAED,uFAAuF;QACvF,IAAI,IAAI,CAAC,IAAI,KAAK,gBAAgB,EAAE,CAAC;YACpC,IAAI,IAAI,CAAC,MAAM,EAAE,IAAI,KAAK,qBAAqB;gBAAE,SAAS;QAC3D,CAAC;QAED,MAAM,SAAS,GAAG,IAAI,CAAC,aAAa,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,gBAAc;QAC5D,MAAM,OAAO,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,GAAG,CAAC,CAAC;QAEzC,GAAG,CAAC,IAAI,CAAC;YACR,IAAI,EAAE,GAAG,CAAC,OAAO,CAAC,IAAI,CAAC;YACvB,IAAI,EAAE,GAAG,CAAC,IAAI;YACd,SAAS;YACT,OAAO;YACP,OAAO,EAAE,IAAI,CAAC,IAAI;SAClB,CAAC,CAAC;IACJ,CAAC;IAED,wEAAwE;IACxE,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,GAAG,CAAC,CAAC,SAAS,IAAI,CAAC,CAAC,OAAO,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC;IAEvE,gDAA8C;IAC9C,MAAM,OAAO,GAAsB,EAAE,CAAC;IACtC,IAAI,WAAW,GAAG,CAAC,CAAC,CAAC;IAErB,KAAK,MAAM,MAAM,IAAI,GAAG,EAAE,CAAC;QAC1B,IAAI,MAAM,CAAC,SAAS,GAAG,WAAW,EAAE,CAAC;YACpC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACrB,WAAW,GAAG,MAAM,CAAC,OAAO,CAAC;QAC9B,CAAC;QACD,+DAA6D;IAC9D,CAAC;IAED,OAAO,OAAO,CAAC;AAAA,CACf;AAED,+EAA+E;AAC/E,iBAAiB;AACjB,+EAA+E;AAE/E,2EAA2E;AAC3E,MAAM,aAAa,GAAG,CAAC,CAAC;AAExB;;GAEG;AACH,SAAS,WAAW,CACnB,OAA0B,EAC1B,WAAqB,EACrB,QAAgB,EAChB,QAA4B,EAClB;IACV,MAAM,IAAI,GAAY,EAAE,CAAC;IACzB,IAAI,MAAM,GAAG,CAAC,CAAC,CAAC,yBAAyB;IAEzC,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC9B,IAAI,MAAM,CAAC,SAAS,GAAG,MAAM,EAAE,CAAC;YAC/B,MAAM,QAAQ,GAAG,WAAW,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,MAAM,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC;YACrE,MAAM,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC;YACpE,IAAI,QAAQ,GAAG,aAAa,EAAE,CAAC;gBAC9B,IAAI,CAAC,IAAI,CAAC;oBACT,QAAQ;oBACR,SAAS,EAAE,MAAM;oBACjB,OAAO,EAAE,MAAM,CAAC,SAAS,GAAG,CAAC;oBAC7B,IAAI,EAAE,MAAM;oBACZ,IAAI,EAAE,IAAI;oBACV,OAAO,EAAE,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC;oBAC5B,QAAQ;iBACR,CAAC,CAAC;YACJ,CAAC;QACF,CAAC;QACD,MAAM,GAAG,MAAM,CAAC,OAAO,GAAG,CAAC,CAAC;IAC7B,CAAC;IAED,iCAAiC;IACjC,IAAI,MAAM,IAAI,WAAW,CAAC,MAAM,EAAE,CAAC;QAClC,MAAM,QAAQ,GAAG,WAAW,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAC/C,MAAM,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC;QACpE,IAAI,QAAQ,GAAG,aAAa,EAAE,CAAC;YAC9B,IAAI,CAAC,IAAI,CAAC;gBACT,QAAQ;gBACR,SAAS,EAAE,MAAM;gBACjB,OAAO,EAAE,WAAW,CAAC,MAAM;gBAC3B,IAAI,EAAE,MAAM;gBACZ,IAAI,EAAE,IAAI;gBACV,OAAO,EAAE,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC;gBAC5B,QAAQ;aACR,CAAC,CAAC;QACJ,CAAC;IACF,CAAC;IAED,OAAO,IAAI,CAAC;AAAA,CACZ;AAED,+EAA+E;AAC/E,aAAa;AACb,+EAA+E;AAE/E;;;;;;;;;GASG;AACH,MAAM,CAAC,KAAK,UAAU,mBAAmB,CACxC,OAAe,EACf,QAAgB,EAChB,QAA4B,EACT;IACnB,IAAI,CAAC,WAAW,IAAI,CAAC,MAAM,EAAE,CAAC;QAC7B,MAAM,cAAc,EAAE,CAAC;IACxB,CAAC;IAED,6CAA6C;IAC7C,MAAM,UAAU,GAAG,MAAO,CAAC;IAC3B,MAAM,IAAI,GAAG,MAAM,YAAY,CAAC,QAAQ,CAAC,CAAC;IAC1C,MAAM,MAAM,GAAG,IAAI,UAAU,EAAE,CAAC;IAChC,MAAM,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;IAEzB,MAAM,IAAI,GAAG,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IACnC,IAAI,CAAC,IAAI,EAAE,CAAC;QACX,gEAA8D;QAC9D,MAAM,CAAC,MAAM,EAAE,CAAC;QAChB,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAClC,OAAO;YACN;gBACC,QAAQ;gBACR,SAAS,EAAE,CAAC;gBACZ,OAAO,EAAE,KAAK,CAAC,MAAM;gBACrB,IAAI,EAAE,MAAM;gBACZ,IAAI,EAAE,IAAI;gBACV,OAAO;gBACP,QAAQ,EAAE,QAAQ;aAClB;SACD,CAAC;IACH,CAAC;IAED,IAAI,CAAC;QACJ,MAAM,WAAW,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QACxC,MAAM,UAAU,GAAG,mBAAmB,CAAC,QAAQ,CAAC,CAAC;QACjD,MAAM,OAAO,GAAG,cAAc,CAAC,IAAI,CAAC,QAAQ,EAAE,UAAU,EAAE,WAAW,CAAC,CAAC;QAEvE,mCAAmC;QACnC,MAAM,MAAM,GAAY,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YAC3C,QAAQ;YACR,SAAS,EAAE,CAAC,CAAC,SAAS;YACtB,OAAO,EAAE,CAAC,CAAC,OAAO;YAClB,IAAI,EAAE,CAAC,CAAC,IAAI;YACZ,IAAI,EAAE,CAAC,CAAC,IAAI;YACZ,OAAO,EAAE,CAAC,CAAC,OAAO;YAClB,QAAQ,EAAE,QAAQ;SAClB,CAAC,CAAC,CAAC;QAEJ,iBAAiB;QACjB,MAAM,IAAI,GAAG,WAAW,CAAC,OAAO,EAAE,WAAW,EAAE,QAAQ,EAAE,QAAQ,CAAC,CAAC;QAEnE,+BAA+B;QAC/B,MAAM,GAAG,GAAG,CAAC,GAAG,MAAM,EAAE,GAAG,IAAI,CAAC,CAAC;QACjC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,GAAG,CAAC,CAAC,SAAS,CAAC,CAAC;QAE9C,mEAAmE;QACnE,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACzB,OAAO;gBACN;oBACC,QAAQ;oBACR,SAAS,EAAE,CAAC;oBACZ,OAAO,EAAE,WAAW,CAAC,MAAM;oBAC3B,IAAI,EAAE,MAAM;oBACZ,IAAI,EAAE,IAAI;oBACV,OAAO;oBACP,QAAQ,EAAE,QAAQ;iBAClB;aACD,CAAC;QACH,CAAC;QAED,OAAO,GAAG,CAAC;IACZ,CAAC;YAAS,CAAC;QACV,IAAI,CAAC,MAAM,EAAE,CAAC;QACd,MAAM,CAAC,MAAM,EAAE,CAAC;IACjB,CAAC;AAAA,CACD","sourcesContent":["/**\n * AST-aware code chunking using tree-sitter (WASM).\n *\n * Parses source files into syntax trees and extracts meaningful code constructs\n * (functions, classes, methods, structs, etc.) as individual chunks. Gaps between\n * extracted nodes are captured as file-level chunks when substantial.\n */\n\nimport { readFileSync } from \"fs\";\nimport { createRequire } from \"module\";\nimport type { Node as TSNode } from \"web-tree-sitter\";\nimport type { Chunk, ChunkKind, TreeSitterLanguage } from \"./types.js\";\n\n// Use createRequire for resolving WASM paths in ESM context\nconst require = createRequire(import.meta.url);\n\n// ============================================================================\n// Types\n// ============================================================================\n\n/** Describes which AST node types to extract for a language and how to get names. */\ninterface NodeExtractor {\n\t/** The tree-sitter node type string. */\n\ttype: string;\n\t/** The ChunkKind to assign to extracted chunks. */\n\tkind: ChunkKind;\n\t/** How to extract the symbol name from the node. */\n\tgetName: (node: TSNode) => string | null;\n}\n\n/** Intermediate representation of an extracted AST region. */\ninterface ExtractedRegion {\n\tname: string | null;\n\tkind: ChunkKind;\n\tstartLine: number; // 1-indexed\n\tendLine: number; // 1-indexed, inclusive\n\tcontent: string;\n}\n\n// ============================================================================\n// Lazy Imports\n// ============================================================================\n\n// web-tree-sitter types imported dynamically to avoid top-level await\ntype ParserClass = typeof import(\"web-tree-sitter\").Parser;\ntype LanguageClass = typeof import(\"web-tree-sitter\").Language;\n\nlet Parser: ParserClass | null = null;\nlet Language: LanguageClass | null = null;\n\nlet initPromise: Promise<void> | null = null;\nlet initialized = false;\n\n// ============================================================================\n// Language Cache\n// ============================================================================\n\nconst languageCache = new Map<TreeSitterLanguage, import(\"web-tree-sitter\").Language>();\n\n/** Grammar WASM paths keyed by language. */\nconst GRAMMAR_PATHS: Record<TreeSitterLanguage, string> = {\n\ttypescript: \"tree-sitter-typescript/tree-sitter-typescript.wasm\",\n\ttsx: \"tree-sitter-typescript/tree-sitter-tsx.wasm\",\n\tjavascript: \"tree-sitter-javascript/tree-sitter-javascript.wasm\",\n\tpython: \"tree-sitter-python/tree-sitter-python.wasm\",\n\tgo: \"tree-sitter-go/tree-sitter-go.wasm\",\n\trust: \"tree-sitter-rust/tree-sitter-rust.wasm\",\n\tjava: \"tree-sitter-java/tree-sitter-java.wasm\",\n\tc: \"tree-sitter-c/tree-sitter-c.wasm\",\n\tcpp: \"tree-sitter-cpp/tree-sitter-cpp.wasm\",\n};\n\n// ============================================================================\n// Name Extractors\n// ============================================================================\n\n/** Get name from a node's `name` field. */\nfunction nameField(node: TSNode): string | null {\n\treturn node.childForFieldName(\"name\")?.text ?? null;\n}\n\n/** Get name for an arrow function assigned to a variable. */\nfunction arrowFunctionName(node: TSNode): string | null {\n\tconst parent = node.parent;\n\tif (parent?.type === \"variable_declarator\") {\n\t\treturn parent.childForFieldName(\"name\")?.text ?? null;\n\t}\n\treturn null;\n}\n\n/** Get name for C function_definition: name is in the function_declarator child. */\nfunction cFunctionName(node: TSNode): string | null {\n\tconst declarator = node.childForFieldName(\"declarator\");\n\tif (!declarator) return null;\n\t// function_declarator has a `declarator` field for the actual name\n\tif (declarator.type === \"function_declarator\") {\n\t\treturn declarator.childForFieldName(\"declarator\")?.text ?? null;\n\t}\n\treturn declarator.text ?? null;\n}\n\n/** Get name from an export_statement's inner declaration. */\nfunction exportName(node: TSNode): string | null {\n\tconst decl = node.childForFieldName(\"declaration\");\n\tif (!decl) {\n\t\t// Named export like `export { foo }` — use the full text isn't useful,\n\t\t// just return null for anonymous exports\n\t\treturn null;\n\t}\n\treturn decl.childForFieldName(\"name\")?.text ?? null;\n}\n\n// ============================================================================\n// Per-Language Node Extractors\n// ============================================================================\n\nconst TS_EXTRACTORS: NodeExtractor[] = [\n\t{ type: \"function_declaration\", kind: \"function\", getName: nameField },\n\t{ type: \"method_definition\", kind: \"method\", getName: nameField },\n\t{ type: \"class_declaration\", kind: \"class\", getName: nameField },\n\t{ type: \"interface_declaration\", kind: \"interface\", getName: nameField },\n\t{ type: \"type_alias_declaration\", kind: \"type_alias\", getName: nameField },\n\t{ type: \"export_statement\", kind: \"export\", getName: exportName },\n\t{ type: \"arrow_function\", kind: \"function\", getName: arrowFunctionName },\n];\n\nconst JS_EXTRACTORS: NodeExtractor[] = [\n\t{ type: \"function_declaration\", kind: \"function\", getName: nameField },\n\t{ type: \"method_definition\", kind: \"method\", getName: nameField },\n\t{ type: \"class_declaration\", kind: \"class\", getName: nameField },\n\t{ type: \"export_statement\", kind: \"export\", getName: exportName },\n\t{ type: \"arrow_function\", kind: \"function\", getName: arrowFunctionName },\n];\n\nconst PYTHON_EXTRACTORS: NodeExtractor[] = [\n\t{ type: \"function_definition\", kind: \"function\", getName: nameField },\n\t{ type: \"class_definition\", kind: \"class\", getName: nameField },\n];\n\nconst GO_EXTRACTORS: NodeExtractor[] = [\n\t{ type: \"function_declaration\", kind: \"function\", getName: nameField },\n\t{ type: \"method_declaration\", kind: \"method\", getName: nameField },\n\t{ type: \"type_spec\", kind: \"struct\", getName: nameField },\n];\n\nconst RUST_EXTRACTORS: NodeExtractor[] = [\n\t{ type: \"function_item\", kind: \"function\", getName: nameField },\n\t{ type: \"impl_item\", kind: \"impl\", getName: (n) => n.childForFieldName(\"type\")?.text ?? null },\n\t{ type: \"struct_item\", kind: \"struct\", getName: nameField },\n\t{ type: \"enum_item\", kind: \"enum\", getName: nameField },\n\t{ type: \"trait_item\", kind: \"interface\", getName: nameField },\n];\n\nconst JAVA_EXTRACTORS: NodeExtractor[] = [\n\t{ type: \"class_declaration\", kind: \"class\", getName: nameField },\n\t{ type: \"method_declaration\", kind: \"method\", getName: nameField },\n\t{ type: \"interface_declaration\", kind: \"interface\", getName: nameField },\n];\n\nconst C_EXTRACTORS: NodeExtractor[] = [\n\t{ type: \"function_definition\", kind: \"function\", getName: cFunctionName },\n\t{ type: \"struct_specifier\", kind: \"struct\", getName: nameField },\n];\n\nconst CPP_EXTRACTORS: NodeExtractor[] = [\n\t...C_EXTRACTORS,\n\t{ type: \"class_specifier\", kind: \"class\", getName: nameField },\n];\n\nconst LANGUAGE_EXTRACTORS: Record<TreeSitterLanguage, NodeExtractor[]> = {\n\ttypescript: TS_EXTRACTORS,\n\ttsx: TS_EXTRACTORS,\n\tjavascript: JS_EXTRACTORS,\n\tpython: PYTHON_EXTRACTORS,\n\tgo: GO_EXTRACTORS,\n\trust: RUST_EXTRACTORS,\n\tjava: JAVA_EXTRACTORS,\n\tc: C_EXTRACTORS,\n\tcpp: CPP_EXTRACTORS,\n};\n\n// ============================================================================\n// Initialization\n// ============================================================================\n\n/**\n * Initialize the tree-sitter WASM runtime. Must be called before parsing.\n * Safe to call multiple times — subsequent calls are no-ops.\n */\nexport async function initTreeSitter(): Promise<void> {\n\tif (initialized) return;\n\tif (initPromise) return initPromise;\n\n\tinitPromise = (async () => {\n\t\ttry {\n\t\t\tconst mod = await import(\"web-tree-sitter\");\n\t\t\tParser = mod.Parser;\n\t\t\tLanguage = mod.Language;\n\n\t\t\tconst wasmPath = require.resolve(\"web-tree-sitter/web-tree-sitter.wasm\");\n\t\t\tconst wasmBuf = readFileSync(wasmPath);\n\t\t\tawait Parser.init({ locateFile: () => wasmPath, wasmBinary: wasmBuf });\n\t\t\tinitialized = true;\n\t\t} catch (err) {\n\t\t\t// Reset so subsequent calls can retry instead of returning\n\t\t\t// the same rejected promise forever\n\t\t\tinitPromise = null;\n\t\t\tthrow err;\n\t\t}\n\t})();\n\n\treturn initPromise;\n}\n\n// ============================================================================\n// Language Loading\n// ============================================================================\n\n/** Load and cache a tree-sitter language grammar. */\nasync function loadLanguage(lang: TreeSitterLanguage): Promise<import(\"web-tree-sitter\").Language> {\n\tconst cached = languageCache.get(lang);\n\tif (cached) return cached;\n\n\tif (!Language) {\n\t\tthrow new Error(\"tree-sitter not initialized — call initTreeSitter() first\");\n\t}\n\n\tconst grammarPath = require.resolve(GRAMMAR_PATHS[lang]);\n\tconst loaded = await Language.load(grammarPath);\n\tlanguageCache.set(lang, loaded);\n\treturn loaded;\n}\n\n// ============================================================================\n// AST Extraction\n// ============================================================================\n\n/**\n * Walk the tree and collect nodes matching the target types.\n * Returns regions sorted by start position, with nested nodes skipped\n * (only outermost matches are kept).\n */\nfunction extractRegions(rootNode: TSNode, extractors: NodeExtractor[], _sourceLines: string[]): ExtractedRegion[] {\n\t// Gather all target node types\n\tconst typeToExtractors = new Map<string, NodeExtractor>();\n\tfor (const ext of extractors) {\n\t\ttypeToExtractors.set(ext.type, ext);\n\t}\n\n\tconst targetTypes = extractors.map((e) => e.type);\n\tconst candidates = rootNode.descendantsOfType(targetTypes);\n\n\t// Convert to regions\n\tconst raw: ExtractedRegion[] = [];\n\tfor (const node of candidates) {\n\t\tconst ext = typeToExtractors.get(node.type);\n\t\tif (!ext) continue;\n\n\t\t// For struct_specifier in C, only extract if it has a body (field_declaration_list)\n\t\tif (node.type === \"struct_specifier\") {\n\t\t\tconst hasBody = node.children.some((c) => c.type === \"field_declaration_list\");\n\t\t\tif (!hasBody) continue;\n\t\t}\n\n\t\t// For arrow_function, only extract if parent is variable_declarator (named assignment)\n\t\tif (node.type === \"arrow_function\") {\n\t\t\tif (node.parent?.type !== \"variable_declarator\") continue;\n\t\t}\n\n\t\tconst startLine = node.startPosition.row + 1; // 0→1 indexed\n\t\tconst endLine = node.endPosition.row + 1;\n\n\t\traw.push({\n\t\t\tname: ext.getName(node),\n\t\t\tkind: ext.kind,\n\t\t\tstartLine,\n\t\t\tendLine,\n\t\t\tcontent: node.text,\n\t\t});\n\t}\n\n\t// Sort by start line, then by end line descending (larger ranges first)\n\traw.sort((a, b) => a.startLine - b.startLine || b.endLine - a.endLine);\n\n\t// Remove nested regions — keep only outermost\n\tconst regions: ExtractedRegion[] = [];\n\tlet lastEndLine = -1;\n\n\tfor (const region of raw) {\n\t\tif (region.startLine > lastEndLine) {\n\t\t\tregions.push(region);\n\t\t\tlastEndLine = region.endLine;\n\t\t}\n\t\t// else: this region is nested inside the previous one — skip\n\t}\n\n\treturn regions;\n}\n\n// ============================================================================\n// Gap Collection\n// ============================================================================\n\n/** Minimum number of non-blank lines for a gap to become its own chunk. */\nconst MIN_GAP_LINES = 3;\n\n/**\n * Create file-level chunks for substantial code between extracted regions.\n */\nfunction collectGaps(\n\tregions: ExtractedRegion[],\n\tsourceLines: string[],\n\tfilePath: string,\n\tfileType: TreeSitterLanguage,\n): Chunk[] {\n\tconst gaps: Chunk[] = [];\n\tlet cursor = 1; // 1-indexed current line\n\n\tfor (const region of regions) {\n\t\tif (region.startLine > cursor) {\n\t\t\tconst gapLines = sourceLines.slice(cursor - 1, region.startLine - 1);\n\t\t\tconst nonBlank = gapLines.filter((l) => l.trim().length > 0).length;\n\t\t\tif (nonBlank > MIN_GAP_LINES) {\n\t\t\t\tgaps.push({\n\t\t\t\t\tfilePath,\n\t\t\t\t\tstartLine: cursor,\n\t\t\t\t\tendLine: region.startLine - 1,\n\t\t\t\t\tkind: \"file\",\n\t\t\t\t\tname: null,\n\t\t\t\t\tcontent: gapLines.join(\"\\n\"),\n\t\t\t\t\tfileType,\n\t\t\t\t});\n\t\t\t}\n\t\t}\n\t\tcursor = region.endLine + 1;\n\t}\n\n\t// Trailing gap after last region\n\tif (cursor <= sourceLines.length) {\n\t\tconst gapLines = sourceLines.slice(cursor - 1);\n\t\tconst nonBlank = gapLines.filter((l) => l.trim().length > 0).length;\n\t\tif (nonBlank > MIN_GAP_LINES) {\n\t\t\tgaps.push({\n\t\t\t\tfilePath,\n\t\t\t\tstartLine: cursor,\n\t\t\t\tendLine: sourceLines.length,\n\t\t\t\tkind: \"file\",\n\t\t\t\tname: null,\n\t\t\t\tcontent: gapLines.join(\"\\n\"),\n\t\t\t\tfileType,\n\t\t\t});\n\t\t}\n\t}\n\n\treturn gaps;\n}\n\n// ============================================================================\n// Public API\n// ============================================================================\n\n/**\n * Parse a source file with tree-sitter and extract AST-aware chunks.\n *\n * Returns chunks for functions, classes, methods, and other language-specific\n * constructs, plus file-level chunks for substantial gaps between them.\n *\n * @param content - Raw source code text\n * @param filePath - Relative file path (stored in chunk metadata)\n * @param language - Tree-sitter language identifier\n */\nexport async function chunkWithTreeSitter(\n\tcontent: string,\n\tfilePath: string,\n\tlanguage: TreeSitterLanguage,\n): Promise<Chunk[]> {\n\tif (!initialized || !Parser) {\n\t\tawait initTreeSitter();\n\t}\n\n\t// After init, Parser is guaranteed to be set\n\tconst ParserCtor = Parser!;\n\tconst lang = await loadLanguage(language);\n\tconst parser = new ParserCtor();\n\tparser.setLanguage(lang);\n\n\tconst tree = parser.parse(content);\n\tif (!tree) {\n\t\t// Parse failed — free the parser WASM memory before returning\n\t\tparser.delete();\n\t\tconst lines = content.split(\"\\n\");\n\t\treturn [\n\t\t\t{\n\t\t\t\tfilePath,\n\t\t\t\tstartLine: 1,\n\t\t\t\tendLine: lines.length,\n\t\t\t\tkind: \"file\",\n\t\t\t\tname: null,\n\t\t\t\tcontent,\n\t\t\t\tfileType: language,\n\t\t\t},\n\t\t];\n\t}\n\n\ttry {\n\t\tconst sourceLines = content.split(\"\\n\");\n\t\tconst extractors = LANGUAGE_EXTRACTORS[language];\n\t\tconst regions = extractRegions(tree.rootNode, extractors, sourceLines);\n\n\t\t// Convert regions to Chunk objects\n\t\tconst chunks: Chunk[] = regions.map((r) => ({\n\t\t\tfilePath,\n\t\t\tstartLine: r.startLine,\n\t\t\tendLine: r.endLine,\n\t\t\tkind: r.kind,\n\t\t\tname: r.name,\n\t\t\tcontent: r.content,\n\t\t\tfileType: language,\n\t\t}));\n\n\t\t// Add gap chunks\n\t\tconst gaps = collectGaps(regions, sourceLines, filePath, language);\n\n\t\t// Merge and sort by start line\n\t\tconst all = [...chunks, ...gaps];\n\t\tall.sort((a, b) => a.startLine - b.startLine);\n\n\t\t// If no regions were extracted, return the whole file as one chunk\n\t\tif (chunks.length === 0) {\n\t\t\treturn [\n\t\t\t\t{\n\t\t\t\t\tfilePath,\n\t\t\t\t\tstartLine: 1,\n\t\t\t\t\tendLine: sourceLines.length,\n\t\t\t\t\tkind: \"file\",\n\t\t\t\t\tname: null,\n\t\t\t\t\tcontent,\n\t\t\t\t\tfileType: language,\n\t\t\t\t},\n\t\t\t];\n\t\t}\n\n\t\treturn all;\n\t} finally {\n\t\ttree.delete();\n\t\tparser.delete();\n\t}\n}\n"]}
@@ -0,0 +1,98 @@
1
+ /**
2
+ * Shared types for the semantic codebase search subsystem.
3
+ */
4
+ /** Languages supported by tree-sitter AST chunking. */
5
+ export type TreeSitterLanguage = "typescript" | "tsx" | "javascript" | "python" | "go" | "rust" | "java" | "c" | "cpp";
6
+ /** Non-code file types chunked by format-specific rules. */
7
+ export type TextFileType = "markdown" | "yaml" | "json" | "toml" | "plaintext";
8
+ /** Union of all recognized file types. */
9
+ export type FileType = TreeSitterLanguage | TextFileType;
10
+ /** The kind of code construct a chunk represents. */
11
+ export type ChunkKind = "function" | "method" | "class" | "interface" | "struct" | "enum" | "impl" | "export" | "type_alias" | "module" | "heading_section" | "top_level_key" | "paragraph" | "file";
12
+ /** A chunk of code or text extracted from a file. */
13
+ export interface Chunk {
14
+ /** Relative file path from project root. */
15
+ filePath: string;
16
+ /** 1-indexed start line in the source file. */
17
+ startLine: number;
18
+ /** 1-indexed end line (inclusive) in the source file. */
19
+ endLine: number;
20
+ /** The kind of construct this chunk represents. */
21
+ kind: ChunkKind;
22
+ /** Symbol name (function name, class name, heading text, etc.). Null for anonymous chunks. */
23
+ name: string | null;
24
+ /** The raw source text of the chunk. */
25
+ content: string;
26
+ /** Detected file type. */
27
+ fileType: FileType;
28
+ }
29
+ /** Configuration for the search index. */
30
+ export interface IndexConfig {
31
+ /** Absolute path to the project root. */
32
+ projectRoot: string;
33
+ /** Absolute path to the index database directory. */
34
+ indexDir: string;
35
+ /** Absolute path to the global memory directory (e.g. ~/.dreb/memory/). */
36
+ globalMemoryDir?: string;
37
+ /** Additional directories to include in scans (bypasses gitignore). */
38
+ visibleDirs?: string[];
39
+ /** Embedding model name (used to key the embeddings table). */
40
+ modelName: string;
41
+ }
42
+ /** Stored metadata for a file in the index. */
43
+ export interface IndexedFile {
44
+ id: number;
45
+ filePath: string;
46
+ mtime: number;
47
+ fileType: FileType;
48
+ }
49
+ /** A stored chunk row from the database. */
50
+ export interface StoredChunk {
51
+ id: number;
52
+ fileId: number;
53
+ filePath: string;
54
+ startLine: number;
55
+ endLine: number;
56
+ kind: ChunkKind;
57
+ name: string | null;
58
+ content: string;
59
+ fileType: FileType;
60
+ }
61
+ /** A stored embedding row. */
62
+ export interface StoredEmbedding {
63
+ chunkId: number;
64
+ modelName: string;
65
+ vector: Float32Array;
66
+ }
67
+ /** A single search result with scores and metadata. */
68
+ export interface SearchResult {
69
+ /** The chunk this result refers to. */
70
+ chunk: StoredChunk;
71
+ /** Individual metric scores (0–1, higher is better). */
72
+ scores: MetricScores;
73
+ /** Combined rank from POEM (lower is better, 0 = top of Pareto front). */
74
+ rank: number;
75
+ }
76
+ /** Scores from each ranking metric. */
77
+ export interface MetricScores {
78
+ bm25: number;
79
+ cosine: number;
80
+ pathMatch: number;
81
+ symbolMatch: number;
82
+ importGraph: number;
83
+ gitRecency: number;
84
+ }
85
+ /** Names of the 6 ranking metrics. */
86
+ export type MetricName = keyof MetricScores;
87
+ /** All metric names as an array for iteration. */
88
+ export declare const METRIC_NAMES: MetricName[];
89
+ /** Progress reporting callback for index operations. */
90
+ export type IndexProgressCallback = (phase: string, current: number, total: number) => void;
91
+ /** A resolved import edge: source file imports target file. */
92
+ export interface ImportEdge {
93
+ /** Relative path of the importing file. */
94
+ source: string;
95
+ /** Relative path of the imported file. */
96
+ target: string;
97
+ }
98
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAMH,uDAAuD;AACvD,MAAM,MAAM,kBAAkB,GAAG,YAAY,GAAG,KAAK,GAAG,YAAY,GAAG,QAAQ,GAAG,IAAI,GAAG,MAAM,GAAG,MAAM,GAAG,GAAG,GAAG,KAAK,CAAC;AAEvH,4DAA4D;AAC5D,MAAM,MAAM,YAAY,GAAG,UAAU,GAAG,MAAM,GAAG,MAAM,GAAG,MAAM,GAAG,WAAW,CAAC;AAE/E,0CAA0C;AAC1C,MAAM,MAAM,QAAQ,GAAG,kBAAkB,GAAG,YAAY,CAAC;AAMzD,qDAAqD;AACrD,MAAM,MAAM,SAAS,GAClB,UAAU,GACV,QAAQ,GACR,OAAO,GACP,WAAW,GACX,QAAQ,GACR,MAAM,GACN,MAAM,GACN,QAAQ,GACR,YAAY,GACZ,QAAQ,GACR,iBAAiB,GACjB,eAAe,GACf,WAAW,GACX,MAAM,CAAC;AAEV,qDAAqD;AACrD,MAAM,WAAW,KAAK;IACrB,4CAA4C;IAC5C,QAAQ,EAAE,MAAM,CAAC;IACjB,+CAA+C;IAC/C,SAAS,EAAE,MAAM,CAAC;IAClB,yDAAyD;IACzD,OAAO,EAAE,MAAM,CAAC;IAChB,mDAAmD;IACnD,IAAI,EAAE,SAAS,CAAC;IAChB,8FAA8F;IAC9F,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IACpB,wCAAwC;IACxC,OAAO,EAAE,MAAM,CAAC;IAChB,0BAA0B;IAC1B,QAAQ,EAAE,QAAQ,CAAC;CACnB;AAMD,0CAA0C;AAC1C,MAAM,WAAW,WAAW;IAC3B,yCAAyC;IACzC,WAAW,EAAE,MAAM,CAAC;IACpB,qDAAqD;IACrD,QAAQ,EAAE,MAAM,CAAC;IACjB,2EAA2E;IAC3E,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,uEAAuE;IACvE,WAAW,CAAC,EAAE,MAAM,EAAE,CAAC;IACvB,+DAA+D;IAC/D,SAAS,EAAE,MAAM,CAAC;CAClB;AAED,+CAA+C;AAC/C,MAAM,WAAW,WAAW;IAC3B,EAAE,EAAE,MAAM,CAAC;IACX,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,QAAQ,CAAC;CACnB;AAED,4CAA4C;AAC5C,MAAM,WAAW,WAAW;IAC3B,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,SAAS,CAAC;IAChB,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IACpB,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,QAAQ,CAAC;CACnB;AAED,8BAA8B;AAC9B,MAAM,WAAW,eAAe;IAC/B,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,YAAY,CAAC;CACrB;AAMD,uDAAuD;AACvD,MAAM,WAAW,YAAY;IAC5B,uCAAuC;IACvC,KAAK,EAAE,WAAW,CAAC;IACnB,0DAAwD;IACxD,MAAM,EAAE,YAAY,CAAC;IACrB,0EAA0E;IAC1E,IAAI,EAAE,MAAM,CAAC;CACb;AAED,uCAAuC;AACvC,MAAM,WAAW,YAAY;IAC5B,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,MAAM,CAAC;CACnB;AAED,sCAAsC;AACtC,MAAM,MAAM,UAAU,GAAG,MAAM,YAAY,CAAC;AAE5C,kDAAkD;AAClD,eAAO,MAAM,YAAY,EAAE,UAAU,EAAgF,CAAC;AAMtH,wDAAwD;AACxD,MAAM,MAAM,qBAAqB,GAAG,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,KAAK,IAAI,CAAC;AAM5F,+DAA+D;AAC/D,MAAM,WAAW,UAAU;IAC1B,2CAA2C;IAC3C,MAAM,EAAE,MAAM,CAAC;IACf,0CAA0C;IAC1C,MAAM,EAAE,MAAM,CAAC;CACf","sourcesContent":["/**\n * Shared types for the semantic codebase search subsystem.\n */\n\n// ============================================================================\n// Languages\n// ============================================================================\n\n/** Languages supported by tree-sitter AST chunking. */\nexport type TreeSitterLanguage = \"typescript\" | \"tsx\" | \"javascript\" | \"python\" | \"go\" | \"rust\" | \"java\" | \"c\" | \"cpp\";\n\n/** Non-code file types chunked by format-specific rules. */\nexport type TextFileType = \"markdown\" | \"yaml\" | \"json\" | \"toml\" | \"plaintext\";\n\n/** Union of all recognized file types. */\nexport type FileType = TreeSitterLanguage | TextFileType;\n\n// ============================================================================\n// Chunks\n// ============================================================================\n\n/** The kind of code construct a chunk represents. */\nexport type ChunkKind =\n\t| \"function\"\n\t| \"method\"\n\t| \"class\"\n\t| \"interface\"\n\t| \"struct\"\n\t| \"enum\"\n\t| \"impl\"\n\t| \"export\"\n\t| \"type_alias\"\n\t| \"module\"\n\t| \"heading_section\"\n\t| \"top_level_key\"\n\t| \"paragraph\"\n\t| \"file\";\n\n/** A chunk of code or text extracted from a file. */\nexport interface Chunk {\n\t/** Relative file path from project root. */\n\tfilePath: string;\n\t/** 1-indexed start line in the source file. */\n\tstartLine: number;\n\t/** 1-indexed end line (inclusive) in the source file. */\n\tendLine: number;\n\t/** The kind of construct this chunk represents. */\n\tkind: ChunkKind;\n\t/** Symbol name (function name, class name, heading text, etc.). Null for anonymous chunks. */\n\tname: string | null;\n\t/** The raw source text of the chunk. */\n\tcontent: string;\n\t/** Detected file type. */\n\tfileType: FileType;\n}\n\n// ============================================================================\n// Index\n// ============================================================================\n\n/** Configuration for the search index. */\nexport interface IndexConfig {\n\t/** Absolute path to the project root. */\n\tprojectRoot: string;\n\t/** Absolute path to the index database directory. */\n\tindexDir: string;\n\t/** Absolute path to the global memory directory (e.g. ~/.dreb/memory/). */\n\tglobalMemoryDir?: string;\n\t/** Additional directories to include in scans (bypasses gitignore). */\n\tvisibleDirs?: string[];\n\t/** Embedding model name (used to key the embeddings table). */\n\tmodelName: string;\n}\n\n/** Stored metadata for a file in the index. */\nexport interface IndexedFile {\n\tid: number;\n\tfilePath: string;\n\tmtime: number;\n\tfileType: FileType;\n}\n\n/** A stored chunk row from the database. */\nexport interface StoredChunk {\n\tid: number;\n\tfileId: number;\n\tfilePath: string;\n\tstartLine: number;\n\tendLine: number;\n\tkind: ChunkKind;\n\tname: string | null;\n\tcontent: string;\n\tfileType: FileType;\n}\n\n/** A stored embedding row. */\nexport interface StoredEmbedding {\n\tchunkId: number;\n\tmodelName: string;\n\tvector: Float32Array;\n}\n\n// ============================================================================\n// Search Results\n// ============================================================================\n\n/** A single search result with scores and metadata. */\nexport interface SearchResult {\n\t/** The chunk this result refers to. */\n\tchunk: StoredChunk;\n\t/** Individual metric scores (0–1, higher is better). */\n\tscores: MetricScores;\n\t/** Combined rank from POEM (lower is better, 0 = top of Pareto front). */\n\trank: number;\n}\n\n/** Scores from each ranking metric. */\nexport interface MetricScores {\n\tbm25: number;\n\tcosine: number;\n\tpathMatch: number;\n\tsymbolMatch: number;\n\timportGraph: number;\n\tgitRecency: number;\n}\n\n/** Names of the 6 ranking metrics. */\nexport type MetricName = keyof MetricScores;\n\n/** All metric names as an array for iteration. */\nexport const METRIC_NAMES: MetricName[] = [\"bm25\", \"cosine\", \"pathMatch\", \"symbolMatch\", \"importGraph\", \"gitRecency\"];\n\n// ============================================================================\n// Callbacks\n// ============================================================================\n\n/** Progress reporting callback for index operations. */\nexport type IndexProgressCallback = (phase: string, current: number, total: number) => void;\n\n// ============================================================================\n// Import Graph\n// ============================================================================\n\n/** A resolved import edge: source file imports target file. */\nexport interface ImportEdge {\n\t/** Relative path of the importing file. */\n\tsource: string;\n\t/** Relative path of the imported file. */\n\ttarget: string;\n}\n"]}
package/dist/types.js ADDED
@@ -0,0 +1,6 @@
1
+ /**
2
+ * Shared types for the semantic codebase search subsystem.
3
+ */
4
+ /** All metric names as an array for iteration. */
5
+ export const METRIC_NAMES = ["bm25", "cosine", "pathMatch", "symbolMatch", "importGraph", "gitRecency"];
6
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AA+HH,kDAAkD;AAClD,MAAM,CAAC,MAAM,YAAY,GAAiB,CAAC,MAAM,EAAE,QAAQ,EAAE,WAAW,EAAE,aAAa,EAAE,aAAa,EAAE,YAAY,CAAC,CAAC","sourcesContent":["/**\n * Shared types for the semantic codebase search subsystem.\n */\n\n// ============================================================================\n// Languages\n// ============================================================================\n\n/** Languages supported by tree-sitter AST chunking. */\nexport type TreeSitterLanguage = \"typescript\" | \"tsx\" | \"javascript\" | \"python\" | \"go\" | \"rust\" | \"java\" | \"c\" | \"cpp\";\n\n/** Non-code file types chunked by format-specific rules. */\nexport type TextFileType = \"markdown\" | \"yaml\" | \"json\" | \"toml\" | \"plaintext\";\n\n/** Union of all recognized file types. */\nexport type FileType = TreeSitterLanguage | TextFileType;\n\n// ============================================================================\n// Chunks\n// ============================================================================\n\n/** The kind of code construct a chunk represents. */\nexport type ChunkKind =\n\t| \"function\"\n\t| \"method\"\n\t| \"class\"\n\t| \"interface\"\n\t| \"struct\"\n\t| \"enum\"\n\t| \"impl\"\n\t| \"export\"\n\t| \"type_alias\"\n\t| \"module\"\n\t| \"heading_section\"\n\t| \"top_level_key\"\n\t| \"paragraph\"\n\t| \"file\";\n\n/** A chunk of code or text extracted from a file. */\nexport interface Chunk {\n\t/** Relative file path from project root. */\n\tfilePath: string;\n\t/** 1-indexed start line in the source file. */\n\tstartLine: number;\n\t/** 1-indexed end line (inclusive) in the source file. */\n\tendLine: number;\n\t/** The kind of construct this chunk represents. */\n\tkind: ChunkKind;\n\t/** Symbol name (function name, class name, heading text, etc.). Null for anonymous chunks. */\n\tname: string | null;\n\t/** The raw source text of the chunk. */\n\tcontent: string;\n\t/** Detected file type. */\n\tfileType: FileType;\n}\n\n// ============================================================================\n// Index\n// ============================================================================\n\n/** Configuration for the search index. */\nexport interface IndexConfig {\n\t/** Absolute path to the project root. */\n\tprojectRoot: string;\n\t/** Absolute path to the index database directory. */\n\tindexDir: string;\n\t/** Absolute path to the global memory directory (e.g. ~/.dreb/memory/). */\n\tglobalMemoryDir?: string;\n\t/** Additional directories to include in scans (bypasses gitignore). */\n\tvisibleDirs?: string[];\n\t/** Embedding model name (used to key the embeddings table). */\n\tmodelName: string;\n}\n\n/** Stored metadata for a file in the index. */\nexport interface IndexedFile {\n\tid: number;\n\tfilePath: string;\n\tmtime: number;\n\tfileType: FileType;\n}\n\n/** A stored chunk row from the database. */\nexport interface StoredChunk {\n\tid: number;\n\tfileId: number;\n\tfilePath: string;\n\tstartLine: number;\n\tendLine: number;\n\tkind: ChunkKind;\n\tname: string | null;\n\tcontent: string;\n\tfileType: FileType;\n}\n\n/** A stored embedding row. */\nexport interface StoredEmbedding {\n\tchunkId: number;\n\tmodelName: string;\n\tvector: Float32Array;\n}\n\n// ============================================================================\n// Search Results\n// ============================================================================\n\n/** A single search result with scores and metadata. */\nexport interface SearchResult {\n\t/** The chunk this result refers to. */\n\tchunk: StoredChunk;\n\t/** Individual metric scores (0–1, higher is better). */\n\tscores: MetricScores;\n\t/** Combined rank from POEM (lower is better, 0 = top of Pareto front). */\n\trank: number;\n}\n\n/** Scores from each ranking metric. */\nexport interface MetricScores {\n\tbm25: number;\n\tcosine: number;\n\tpathMatch: number;\n\tsymbolMatch: number;\n\timportGraph: number;\n\tgitRecency: number;\n}\n\n/** Names of the 6 ranking metrics. */\nexport type MetricName = keyof MetricScores;\n\n/** All metric names as an array for iteration. */\nexport const METRIC_NAMES: MetricName[] = [\"bm25\", \"cosine\", \"pathMatch\", \"symbolMatch\", \"importGraph\", \"gitRecency\"];\n\n// ============================================================================\n// Callbacks\n// ============================================================================\n\n/** Progress reporting callback for index operations. */\nexport type IndexProgressCallback = (phase: string, current: number, total: number) => void;\n\n// ============================================================================\n// Import Graph\n// ============================================================================\n\n/** A resolved import edge: source file imports target file. */\nexport interface ImportEdge {\n\t/** Relative path of the importing file. */\n\tsource: string;\n\t/** Relative path of the imported file. */\n\ttarget: string;\n}\n"]}