viberag 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. package/LICENSE +661 -0
  2. package/README.md +219 -0
  3. package/dist/cli/__tests__/mcp-setup.test.d.ts +6 -0
  4. package/dist/cli/__tests__/mcp-setup.test.js +597 -0
  5. package/dist/cli/app.d.ts +2 -0
  6. package/dist/cli/app.js +238 -0
  7. package/dist/cli/commands/handlers.d.ts +57 -0
  8. package/dist/cli/commands/handlers.js +231 -0
  9. package/dist/cli/commands/index.d.ts +2 -0
  10. package/dist/cli/commands/index.js +2 -0
  11. package/dist/cli/commands/mcp-setup.d.ts +107 -0
  12. package/dist/cli/commands/mcp-setup.js +509 -0
  13. package/dist/cli/commands/useRagCommands.d.ts +23 -0
  14. package/dist/cli/commands/useRagCommands.js +180 -0
  15. package/dist/cli/components/CleanWizard.d.ts +17 -0
  16. package/dist/cli/components/CleanWizard.js +169 -0
  17. package/dist/cli/components/InitWizard.d.ts +20 -0
  18. package/dist/cli/components/InitWizard.js +370 -0
  19. package/dist/cli/components/McpSetupWizard.d.ts +37 -0
  20. package/dist/cli/components/McpSetupWizard.js +387 -0
  21. package/dist/cli/components/SearchResultsDisplay.d.ts +13 -0
  22. package/dist/cli/components/SearchResultsDisplay.js +130 -0
  23. package/dist/cli/components/WelcomeBanner.d.ts +10 -0
  24. package/dist/cli/components/WelcomeBanner.js +26 -0
  25. package/dist/cli/components/index.d.ts +1 -0
  26. package/dist/cli/components/index.js +1 -0
  27. package/dist/cli/data/mcp-editors.d.ts +80 -0
  28. package/dist/cli/data/mcp-editors.js +270 -0
  29. package/dist/cli/index.d.ts +2 -0
  30. package/dist/cli/index.js +26 -0
  31. package/dist/cli-bundle.cjs +5269 -0
  32. package/dist/common/commands/terminalSetup.d.ts +2 -0
  33. package/dist/common/commands/terminalSetup.js +144 -0
  34. package/dist/common/components/CommandSuggestions.d.ts +9 -0
  35. package/dist/common/components/CommandSuggestions.js +20 -0
  36. package/dist/common/components/StaticWithResize.d.ts +23 -0
  37. package/dist/common/components/StaticWithResize.js +62 -0
  38. package/dist/common/components/StatusBar.d.ts +8 -0
  39. package/dist/common/components/StatusBar.js +64 -0
  40. package/dist/common/components/TextInput.d.ts +12 -0
  41. package/dist/common/components/TextInput.js +239 -0
  42. package/dist/common/components/index.d.ts +3 -0
  43. package/dist/common/components/index.js +3 -0
  44. package/dist/common/hooks/index.d.ts +4 -0
  45. package/dist/common/hooks/index.js +4 -0
  46. package/dist/common/hooks/useCommandHistory.d.ts +7 -0
  47. package/dist/common/hooks/useCommandHistory.js +51 -0
  48. package/dist/common/hooks/useCtrlC.d.ts +9 -0
  49. package/dist/common/hooks/useCtrlC.js +40 -0
  50. package/dist/common/hooks/useKittyKeyboard.d.ts +10 -0
  51. package/dist/common/hooks/useKittyKeyboard.js +26 -0
  52. package/dist/common/hooks/useStaticOutputBuffer.d.ts +31 -0
  53. package/dist/common/hooks/useStaticOutputBuffer.js +58 -0
  54. package/dist/common/hooks/useTerminalResize.d.ts +28 -0
  55. package/dist/common/hooks/useTerminalResize.js +51 -0
  56. package/dist/common/hooks/useTextBuffer.d.ts +13 -0
  57. package/dist/common/hooks/useTextBuffer.js +165 -0
  58. package/dist/common/index.d.ts +13 -0
  59. package/dist/common/index.js +17 -0
  60. package/dist/common/types.d.ts +162 -0
  61. package/dist/common/types.js +1 -0
  62. package/dist/mcp/index.d.ts +12 -0
  63. package/dist/mcp/index.js +66 -0
  64. package/dist/mcp/server.d.ts +25 -0
  65. package/dist/mcp/server.js +837 -0
  66. package/dist/mcp/watcher.d.ts +86 -0
  67. package/dist/mcp/watcher.js +334 -0
  68. package/dist/rag/__tests__/grammar-smoke.test.d.ts +9 -0
  69. package/dist/rag/__tests__/grammar-smoke.test.js +161 -0
  70. package/dist/rag/__tests__/helpers.d.ts +30 -0
  71. package/dist/rag/__tests__/helpers.js +67 -0
  72. package/dist/rag/__tests__/merkle.test.d.ts +5 -0
  73. package/dist/rag/__tests__/merkle.test.js +161 -0
  74. package/dist/rag/__tests__/metadata-extraction.test.d.ts +10 -0
  75. package/dist/rag/__tests__/metadata-extraction.test.js +202 -0
  76. package/dist/rag/__tests__/multi-language.test.d.ts +13 -0
  77. package/dist/rag/__tests__/multi-language.test.js +535 -0
  78. package/dist/rag/__tests__/rag.test.d.ts +10 -0
  79. package/dist/rag/__tests__/rag.test.js +311 -0
  80. package/dist/rag/__tests__/search-exhaustive.test.d.ts +9 -0
  81. package/dist/rag/__tests__/search-exhaustive.test.js +87 -0
  82. package/dist/rag/__tests__/search-filters.test.d.ts +10 -0
  83. package/dist/rag/__tests__/search-filters.test.js +250 -0
  84. package/dist/rag/__tests__/search-modes.test.d.ts +8 -0
  85. package/dist/rag/__tests__/search-modes.test.js +133 -0
  86. package/dist/rag/config/index.d.ts +61 -0
  87. package/dist/rag/config/index.js +111 -0
  88. package/dist/rag/constants.d.ts +41 -0
  89. package/dist/rag/constants.js +57 -0
  90. package/dist/rag/embeddings/fastembed.d.ts +62 -0
  91. package/dist/rag/embeddings/fastembed.js +124 -0
  92. package/dist/rag/embeddings/gemini.d.ts +26 -0
  93. package/dist/rag/embeddings/gemini.js +116 -0
  94. package/dist/rag/embeddings/index.d.ts +10 -0
  95. package/dist/rag/embeddings/index.js +9 -0
  96. package/dist/rag/embeddings/local-4b.d.ts +28 -0
  97. package/dist/rag/embeddings/local-4b.js +51 -0
  98. package/dist/rag/embeddings/local.d.ts +29 -0
  99. package/dist/rag/embeddings/local.js +119 -0
  100. package/dist/rag/embeddings/mistral.d.ts +22 -0
  101. package/dist/rag/embeddings/mistral.js +85 -0
  102. package/dist/rag/embeddings/openai.d.ts +22 -0
  103. package/dist/rag/embeddings/openai.js +85 -0
  104. package/dist/rag/embeddings/types.d.ts +37 -0
  105. package/dist/rag/embeddings/types.js +1 -0
  106. package/dist/rag/gitignore/index.d.ts +57 -0
  107. package/dist/rag/gitignore/index.js +178 -0
  108. package/dist/rag/index.d.ts +15 -0
  109. package/dist/rag/index.js +25 -0
  110. package/dist/rag/indexer/chunker.d.ts +129 -0
  111. package/dist/rag/indexer/chunker.js +1352 -0
  112. package/dist/rag/indexer/index.d.ts +6 -0
  113. package/dist/rag/indexer/index.js +6 -0
  114. package/dist/rag/indexer/indexer.d.ts +73 -0
  115. package/dist/rag/indexer/indexer.js +356 -0
  116. package/dist/rag/indexer/types.d.ts +68 -0
  117. package/dist/rag/indexer/types.js +47 -0
  118. package/dist/rag/logger/index.d.ts +20 -0
  119. package/dist/rag/logger/index.js +75 -0
  120. package/dist/rag/manifest/index.d.ts +50 -0
  121. package/dist/rag/manifest/index.js +97 -0
  122. package/dist/rag/merkle/diff.d.ts +26 -0
  123. package/dist/rag/merkle/diff.js +95 -0
  124. package/dist/rag/merkle/hash.d.ts +34 -0
  125. package/dist/rag/merkle/hash.js +165 -0
  126. package/dist/rag/merkle/index.d.ts +68 -0
  127. package/dist/rag/merkle/index.js +298 -0
  128. package/dist/rag/merkle/node.d.ts +51 -0
  129. package/dist/rag/merkle/node.js +69 -0
  130. package/dist/rag/search/filters.d.ts +21 -0
  131. package/dist/rag/search/filters.js +100 -0
  132. package/dist/rag/search/fts.d.ts +32 -0
  133. package/dist/rag/search/fts.js +61 -0
  134. package/dist/rag/search/hybrid.d.ts +17 -0
  135. package/dist/rag/search/hybrid.js +58 -0
  136. package/dist/rag/search/index.d.ts +89 -0
  137. package/dist/rag/search/index.js +367 -0
  138. package/dist/rag/search/types.d.ts +130 -0
  139. package/dist/rag/search/types.js +4 -0
  140. package/dist/rag/search/vector.d.ts +25 -0
  141. package/dist/rag/search/vector.js +44 -0
  142. package/dist/rag/storage/index.d.ts +92 -0
  143. package/dist/rag/storage/index.js +287 -0
  144. package/dist/rag/storage/lancedb-native.d.ts +7 -0
  145. package/dist/rag/storage/lancedb-native.js +10 -0
  146. package/dist/rag/storage/schema.d.ts +23 -0
  147. package/dist/rag/storage/schema.js +50 -0
  148. package/dist/rag/storage/types.d.ts +100 -0
  149. package/dist/rag/storage/types.js +68 -0
  150. package/package.json +67 -0
  151. package/scripts/check-node-version.js +37 -0
@@ -0,0 +1,1352 @@
1
+ import path from 'node:path';
2
+ import { createRequire } from 'node:module';
3
+ import Parser from 'web-tree-sitter';
4
+ import { computeStringHash } from '../merkle/hash.js';
5
+ import { EXTENSION_TO_LANGUAGE, } from './types.js';
6
+ // Use createRequire to resolve WASM file paths from tree-sitter-wasms
7
+ const require = createRequire(import.meta.url);
8
+ /**
9
+ * Mapping from our language names to tree-sitter-wasms filenames.
10
+ * WASM files are in node_modules/tree-sitter-wasms/out/
11
+ * Note: Dart is temporarily disabled due to tree-sitter version mismatch.
12
+ * tree-sitter-wasms Dart WASM is version 15, web-tree-sitter 0.24.7 supports 13-14.
13
+ */
14
+ const LANGUAGE_WASM_FILES = {
15
+ javascript: 'tree-sitter-javascript.wasm',
16
+ typescript: 'tree-sitter-typescript.wasm',
17
+ tsx: 'tree-sitter-tsx.wasm',
18
+ python: 'tree-sitter-python.wasm',
19
+ go: 'tree-sitter-go.wasm',
20
+ rust: 'tree-sitter-rust.wasm',
21
+ java: 'tree-sitter-java.wasm',
22
+ csharp: 'tree-sitter-c_sharp.wasm',
23
+ kotlin: 'tree-sitter-kotlin.wasm',
24
+ swift: 'tree-sitter-swift.wasm',
25
+ dart: null, // Disabled: version 15 incompatible with web-tree-sitter 0.24.7 (supports 13-14)
26
+ php: 'tree-sitter-php.wasm',
27
+ };
28
+ /**
29
+ * Node types that represent functions in each language.
30
+ */
31
+ const FUNCTION_NODE_TYPES = {
32
+ // JavaScript/TypeScript
33
+ javascript: ['function_declaration', 'function_expression', 'arrow_function'],
34
+ typescript: ['function_declaration', 'function_expression', 'arrow_function'],
35
+ tsx: ['function_declaration', 'function_expression', 'arrow_function'],
36
+ // Python
37
+ python: ['function_definition'],
38
+ // Go
39
+ go: ['function_declaration'],
40
+ // Rust
41
+ rust: ['function_item'],
42
+ // Java
43
+ java: ['method_declaration', 'constructor_declaration'],
44
+ // C#
45
+ csharp: ['method_declaration', 'constructor_declaration'],
46
+ // Dart
47
+ dart: ['function_signature', 'method_signature'],
48
+ // Swift
49
+ swift: ['function_declaration'],
50
+ // Kotlin
51
+ kotlin: ['function_declaration'],
52
+ // PHP
53
+ php: ['function_definition', 'method_declaration'],
54
+ };
55
+ /**
56
+ * Node types that represent classes in each language.
57
+ */
58
+ const CLASS_NODE_TYPES = {
59
+ // JavaScript/TypeScript
60
+ javascript: ['class_declaration'],
61
+ typescript: ['class_declaration'],
62
+ tsx: ['class_declaration'],
63
+ // Python
64
+ python: ['class_definition'],
65
+ // Go (structs via type declarations)
66
+ go: ['type_declaration'],
67
+ // Rust
68
+ rust: ['struct_item', 'impl_item', 'enum_item', 'trait_item'],
69
+ // Java
70
+ java: ['class_declaration', 'interface_declaration', 'enum_declaration'],
71
+ // C#
72
+ csharp: [
73
+ 'class_declaration',
74
+ 'interface_declaration',
75
+ 'struct_declaration',
76
+ 'enum_declaration',
77
+ ],
78
+ // Dart
79
+ dart: ['class_definition'],
80
+ // Swift
81
+ swift: ['class_declaration', 'struct_declaration', 'protocol_declaration'],
82
+ // Kotlin
83
+ kotlin: ['class_declaration', 'object_declaration', 'interface_declaration'],
84
+ // PHP
85
+ php: ['class_declaration', 'interface_declaration', 'trait_declaration'],
86
+ };
87
+ /**
88
+ * Node types that represent methods in each language.
89
+ */
90
+ const METHOD_NODE_TYPES = {
91
+ // JavaScript/TypeScript
92
+ javascript: ['method_definition'],
93
+ typescript: ['method_definition'],
94
+ tsx: ['method_definition'],
95
+ // Python (function_definition inside class)
96
+ python: ['function_definition'],
97
+ // Go
98
+ go: ['method_declaration'],
99
+ // Rust (function_item inside impl)
100
+ rust: ['function_item'],
101
+ // Java
102
+ java: ['method_declaration'],
103
+ // C#
104
+ csharp: ['method_declaration'],
105
+ // Dart
106
+ dart: ['method_signature'],
107
+ // Swift
108
+ swift: ['function_declaration'],
109
+ // Kotlin
110
+ kotlin: ['function_declaration'],
111
+ // PHP
112
+ php: ['method_declaration'],
113
+ };
114
+ /**
115
+ * Node types that indicate export in JS/TS.
116
+ */
117
+ const EXPORT_WRAPPER_TYPES = [
118
+ 'export_statement',
119
+ 'export_specifier',
120
+ 'lexical_declaration', // May have export modifier
121
+ ];
122
+ /**
123
+ * Default max chunk size in characters.
124
+ */
125
+ const DEFAULT_MAX_CHUNK_SIZE = 2000;
126
+ /**
127
+ * Minimum chunk size before merging with siblings.
128
+ */
129
+ const MIN_CHUNK_SIZE = 100;
130
+ /**
131
+ * Default overlap in lines for non-AST chunks (unsupported languages).
132
+ * Provides context continuity for embeddings at chunk boundaries.
133
+ */
134
+ const DEFAULT_OVERLAP_LINES = 5;
135
+ /**
136
+ * Overlap in lines for markdown chunks.
137
+ * Slightly higher than code for better natural language context.
138
+ */
139
+ const MARKDOWN_OVERLAP_LINES = 7;
140
+ /**
141
+ * Target chunk size in lines for markdown files.
142
+ */
143
+ const MARKDOWN_TARGET_LINES = 60;
144
+ /**
145
+ * Markdown file extensions.
146
+ */
147
+ const MARKDOWN_EXTENSIONS = new Set(['.md', '.mdx', '.markdown']);
148
+ /**
149
+ * Chunker that uses web-tree-sitter (WASM) to extract semantic code chunks.
150
+ * Provides 100% platform compatibility - no native compilation required.
151
+ */
152
+ export class Chunker {
153
+ constructor() {
154
+ Object.defineProperty(this, "parser", {
155
+ enumerable: true,
156
+ configurable: true,
157
+ writable: true,
158
+ value: null
159
+ });
160
+ Object.defineProperty(this, "languages", {
161
+ enumerable: true,
162
+ configurable: true,
163
+ writable: true,
164
+ value: new Map()
165
+ });
166
+ Object.defineProperty(this, "initialized", {
167
+ enumerable: true,
168
+ configurable: true,
169
+ writable: true,
170
+ value: false
171
+ });
172
+ Object.defineProperty(this, "wasmBasePath", {
173
+ enumerable: true,
174
+ configurable: true,
175
+ writable: true,
176
+ value: null
177
+ });
178
+ // Parser instance created in initialize()
179
+ }
180
+ /**
181
+ * Initialize web-tree-sitter and load language grammars.
182
+ * Must be called before using chunkFile().
183
+ */
184
+ async initialize() {
185
+ if (this.initialized)
186
+ return;
187
+ // Initialize the web-tree-sitter WASM module
188
+ await Parser.init();
189
+ // Create parser instance after init
190
+ this.parser = new Parser();
191
+ // Resolve the path to tree-sitter-wasms/out/
192
+ const wasmPackagePath = require.resolve('tree-sitter-wasms/package.json');
193
+ this.wasmBasePath = path.join(path.dirname(wasmPackagePath), 'out');
194
+ // Load all language grammars sequentially (skip null entries like Dart)
195
+ // IMPORTANT: Must be sequential - web-tree-sitter has global state that
196
+ // gets corrupted when loading multiple WASM modules in parallel.
197
+ for (const [lang, wasmFile] of Object.entries(LANGUAGE_WASM_FILES)) {
198
+ if (!wasmFile) {
199
+ // Language temporarily disabled (e.g., Dart due to version mismatch)
200
+ continue;
201
+ }
202
+ try {
203
+ const wasmPath = path.join(this.wasmBasePath, wasmFile);
204
+ const language = await Parser.Language.load(wasmPath);
205
+ this.languages.set(lang, language);
206
+ }
207
+ catch (error) {
208
+ // Log but don't fail - we can still work with other languages
209
+ console.error(`Failed to load ${lang} grammar:`, error);
210
+ }
211
+ }
212
+ this.initialized = true;
213
+ }
214
+ /**
215
+ * Get the language for a file extension.
216
+ */
217
+ getLanguageForExtension(ext) {
218
+ return EXTENSION_TO_LANGUAGE[ext] ?? null;
219
+ }
220
+ /**
221
+ * Check if a language is supported.
222
+ */
223
+ isLanguageSupported(lang) {
224
+ return this.languages.has(lang);
225
+ }
226
+ /**
227
+ * Check if a file is a markdown file.
228
+ */
229
+ isMarkdownFile(filepath) {
230
+ const ext = path.extname(filepath).toLowerCase();
231
+ return MARKDOWN_EXTENSIONS.has(ext);
232
+ }
233
+ /**
234
+ * Extract chunks from a file.
235
+ *
236
+ * @param filepath - Path to the file (used for extension detection and context headers)
237
+ * @param content - File content to parse
238
+ * @param maxChunkSize - Maximum chunk size in characters (default: 2000)
239
+ * @returns Array of extracted chunks
240
+ */
241
+ chunkFile(filepath, content, maxChunkSize = DEFAULT_MAX_CHUNK_SIZE) {
242
+ if (!this.initialized || !this.parser) {
243
+ throw new Error('Chunker not initialized. Call initialize() before chunkFile().');
244
+ }
245
+ // Determine language from extension
246
+ const ext = path.extname(filepath);
247
+ const lang = this.getLanguageForExtension(ext);
248
+ // Handle markdown files with heading-aware chunking
249
+ if (this.isMarkdownFile(filepath)) {
250
+ return this.chunkMarkdown(filepath, content, maxChunkSize);
251
+ }
252
+ if (!lang || !this.languages.has(lang)) {
253
+ // Unsupported language - return module-level chunk (with size enforcement + overlap)
254
+ const moduleChunk = this.createModuleChunk(filepath, content);
255
+ return this.enforceSizeLimits([moduleChunk], maxChunkSize, content, lang ?? 'javascript', // Use any lang for splitting (line-based)
256
+ filepath, DEFAULT_OVERLAP_LINES);
257
+ }
258
+ // Set parser language
259
+ const language = this.languages.get(lang);
260
+ this.parser.setLanguage(language);
261
+ // Parse the content
262
+ const tree = this.parser.parse(content);
263
+ // If parsing failed, fall back to module chunk (with size enforcement + overlap)
264
+ if (!tree) {
265
+ const moduleChunk = this.createModuleChunk(filepath, content);
266
+ return this.enforceSizeLimits([moduleChunk], maxChunkSize, content, lang, filepath, DEFAULT_OVERLAP_LINES);
267
+ }
268
+ // Extract chunks based on language with context tracking
269
+ const chunks = this.extractChunks(tree.rootNode, content, lang, filepath);
270
+ // If no chunks found, fall back to module chunk (with size enforcement + overlap)
271
+ if (chunks.length === 0) {
272
+ const moduleChunk = this.createModuleChunk(filepath, content);
273
+ return this.enforceSizeLimits([moduleChunk], maxChunkSize, content, lang, filepath, DEFAULT_OVERLAP_LINES);
274
+ }
275
+ // Split oversized chunks and merge tiny ones
276
+ const sizedChunks = this.enforceSizeLimits(chunks, maxChunkSize, content, lang, filepath);
277
+ return sizedChunks;
278
+ }
279
+ /**
280
+ * Extract chunks from a syntax tree.
281
+ */
282
+ extractChunks(root, content, lang, filepath) {
283
+ const chunks = [];
284
+ const lines = content.split('\n');
285
+ // Traverse the tree with context tracking
286
+ this.traverseNode(root, lang, lines, chunks, filepath, null);
287
+ return chunks;
288
+ }
289
+ /**
290
+ * Recursively traverse nodes to extract chunks.
291
+ * Tracks parent context (class name) for context headers.
292
+ */
293
+ traverseNode(node, lang, lines, chunks, filepath, parentClassName) {
294
+ const nodeType = node.type;
295
+ // Check for class
296
+ if (CLASS_NODE_TYPES[lang].includes(nodeType)) {
297
+ const className = this.extractName(node, lang);
298
+ const chunk = this.nodeToChunk(node, lines, 'class', lang, filepath, null);
299
+ if (chunk) {
300
+ chunks.push(chunk);
301
+ }
302
+ // Also extract methods from inside the class
303
+ for (let i = 0; i < node.childCount; i++) {
304
+ const child = node.child(i);
305
+ if (child) {
306
+ this.traverseNode(child, lang, lines, chunks, filepath, className);
307
+ }
308
+ }
309
+ return;
310
+ }
311
+ // Check for function/method
312
+ const functionTypes = FUNCTION_NODE_TYPES[lang];
313
+ const methodTypes = METHOD_NODE_TYPES[lang];
314
+ if (parentClassName && methodTypes.includes(nodeType)) {
315
+ // This is a method inside a class
316
+ const chunk = this.nodeToChunk(node, lines, 'method', lang, filepath, parentClassName);
317
+ if (chunk) {
318
+ chunks.push(chunk);
319
+ }
320
+ return;
321
+ }
322
+ if (!parentClassName && functionTypes.includes(nodeType)) {
323
+ // This is a top-level function
324
+ const chunk = this.nodeToChunk(node, lines, 'function', lang, filepath, null);
325
+ if (chunk) {
326
+ chunks.push(chunk);
327
+ }
328
+ return;
329
+ }
330
+ // Recurse into children
331
+ for (let i = 0; i < node.childCount; i++) {
332
+ const child = node.child(i);
333
+ if (child) {
334
+ this.traverseNode(child, lang, lines, chunks, filepath, parentClassName);
335
+ }
336
+ }
337
+ }
338
+ /**
339
+ * Convert a syntax node to a chunk.
340
+ */
341
+ nodeToChunk(node, lines, type, lang, filepath, parentClassName) {
342
+ // Get name from the node
343
+ const name = this.extractName(node, lang);
344
+ // Get start and end lines (1-indexed)
345
+ const startLine = node.startPosition.row + 1;
346
+ const endLine = node.endPosition.row + 1;
347
+ // Extract text
348
+ const text = lines.slice(startLine - 1, endLine).join('\n');
349
+ if (!text.trim()) {
350
+ return null;
351
+ }
352
+ // Build context header
353
+ const contextHeader = this.buildContextHeader(filepath, parentClassName, type === 'method' ? null : name, // Don't include function name for methods (class provides context)
354
+ false);
355
+ // Hash includes context header for unique embedding per context
356
+ const fullText = contextHeader ? `${contextHeader}\n${text}` : text;
357
+ // Extract new metadata fields
358
+ const signature = this.extractSignature(node, lines, lang);
359
+ const docstring = this.extractDocstring(node, lang);
360
+ const isExported = this.extractIsExported(node, lang);
361
+ const decoratorNames = this.extractDecoratorNames(node, lang);
362
+ return {
363
+ text,
364
+ contextHeader,
365
+ type,
366
+ name,
367
+ startLine,
368
+ endLine,
369
+ contentHash: computeStringHash(fullText),
370
+ signature,
371
+ docstring,
372
+ isExported,
373
+ decoratorNames,
374
+ };
375
+ }
376
+ /**
377
+ * Extract the signature line (first line of function/class declaration).
378
+ */
379
+ extractSignature(node, lines, lang) {
380
+ const startLine = node.startPosition.row;
381
+ // Python: Signature ends with colon
382
+ if (lang === 'python') {
383
+ let signatureEnd = startLine;
384
+ for (let i = startLine; i < lines.length && i < startLine + 10; i++) {
385
+ const line = lines[i];
386
+ if (line?.includes(':')) {
387
+ signatureEnd = i;
388
+ break;
389
+ }
390
+ }
391
+ return lines
392
+ .slice(startLine, signatureEnd + 1)
393
+ .join('\n')
394
+ .trim();
395
+ }
396
+ // C-style languages (Go, Rust, Java, C#, Swift, Kotlin, Dart, PHP):
397
+ // Signature ends at opening brace, may span multiple lines
398
+ if (lang === 'go' ||
399
+ lang === 'rust' ||
400
+ lang === 'java' ||
401
+ lang === 'csharp' ||
402
+ lang === 'swift' ||
403
+ lang === 'kotlin' ||
404
+ lang === 'dart' ||
405
+ lang === 'php') {
406
+ const signatureLines = [];
407
+ for (let i = startLine; i < lines.length && i < startLine + 10; i++) {
408
+ const line = lines[i];
409
+ if (!line)
410
+ continue;
411
+ signatureLines.push(line);
412
+ // Check for opening brace to end signature
413
+ if (line.includes('{')) {
414
+ const lastLine = signatureLines[signatureLines.length - 1];
415
+ if (lastLine) {
416
+ const braceIndex = lastLine.indexOf('{');
417
+ signatureLines[signatureLines.length - 1] = lastLine
418
+ .slice(0, braceIndex)
419
+ .trim();
420
+ }
421
+ break;
422
+ }
423
+ }
424
+ const result = signatureLines.join('\n').trim();
425
+ return result || null;
426
+ }
427
+ // JS/TS: First line up to opening brace or arrow
428
+ const firstLine = lines[startLine];
429
+ if (!firstLine)
430
+ return null;
431
+ // Remove opening brace and body
432
+ const braceIndex = firstLine.indexOf('{');
433
+ if (braceIndex !== -1) {
434
+ return firstLine.slice(0, braceIndex).trim();
435
+ }
436
+ // Arrow function might not have brace on same line
437
+ const arrowIndex = firstLine.indexOf('=>');
438
+ if (arrowIndex !== -1) {
439
+ return firstLine.slice(0, arrowIndex + 2).trim();
440
+ }
441
+ return firstLine.trim();
442
+ }
443
+ /**
444
+ * Extract docstring from a function/class node.
445
+ */
446
+ extractDocstring(node, lang) {
447
+ // Python: Docstring as first string in body
448
+ if (lang === 'python') {
449
+ const body = node.childForFieldName('body');
450
+ if (!body)
451
+ return null;
452
+ const firstStatement = body.child(0);
453
+ if (firstStatement?.type === 'expression_statement') {
454
+ const stringNode = firstStatement.child(0);
455
+ if (stringNode?.type === 'string') {
456
+ let text = stringNode.text;
457
+ if (text.startsWith('"""') || text.startsWith("'''")) {
458
+ text = text.slice(3, -3);
459
+ }
460
+ else if (text.startsWith('"') || text.startsWith("'")) {
461
+ text = text.slice(1, -1);
462
+ }
463
+ return text.trim() || null;
464
+ }
465
+ }
466
+ return null;
467
+ }
468
+ // Go: // comment(s) immediately before
469
+ if (lang === 'go') {
470
+ const comments = [];
471
+ let sibling = node.previousSibling;
472
+ while (sibling) {
473
+ if (sibling.type === 'comment') {
474
+ const text = sibling.text.replace(/^\/\/\s*/, '');
475
+ comments.unshift(text);
476
+ }
477
+ else {
478
+ break;
479
+ }
480
+ sibling = sibling.previousSibling;
481
+ }
482
+ return comments.length > 0 ? comments.join('\n').trim() : null;
483
+ }
484
+ // Rust: /// or //! doc comments
485
+ if (lang === 'rust') {
486
+ const comments = [];
487
+ let sibling = node.previousSibling;
488
+ while (sibling) {
489
+ if (sibling.type === 'line_comment') {
490
+ const text = sibling.text;
491
+ if (text.startsWith('///') || text.startsWith('//!')) {
492
+ comments.unshift(text.replace(/^\/\/[/!]\s*/, ''));
493
+ }
494
+ else {
495
+ break;
496
+ }
497
+ }
498
+ else if (sibling.type === 'block_comment') {
499
+ break;
500
+ }
501
+ else {
502
+ break;
503
+ }
504
+ sibling = sibling.previousSibling;
505
+ }
506
+ return comments.length > 0 ? comments.join('\n').trim() : null;
507
+ }
508
+ // C#: /// XML doc comments
509
+ if (lang === 'csharp') {
510
+ const comments = [];
511
+ let sibling = node.previousSibling;
512
+ while (sibling) {
513
+ if (sibling.type === 'comment') {
514
+ const text = sibling.text;
515
+ if (text.startsWith('///')) {
516
+ // Strip /// and XML tags
517
+ comments.unshift(text
518
+ .replace(/^\/\/\/\s*/, '')
519
+ .replace(/<\/?[^>]+>/g, '')
520
+ .trim());
521
+ }
522
+ else {
523
+ break;
524
+ }
525
+ }
526
+ else {
527
+ break;
528
+ }
529
+ sibling = sibling.previousSibling;
530
+ }
531
+ return comments.length > 0 ? comments.join(' ').trim() : null;
532
+ }
533
+ // Java, Kotlin, PHP: /** Javadoc */ style
534
+ if (lang === 'java' || lang === 'kotlin' || lang === 'php') {
535
+ let sibling = node.previousSibling;
536
+ while (sibling) {
537
+ if (sibling.type === 'comment' ||
538
+ sibling.type === 'multiline_comment' ||
539
+ sibling.type === 'block_comment') {
540
+ const text = sibling.text;
541
+ if (text.startsWith('/**')) {
542
+ return (text
543
+ .replace(/^\/\*\*/, '')
544
+ .replace(/\*\/$/, '')
545
+ .replace(/^\s*\* ?/gm, '')
546
+ .trim() || null);
547
+ }
548
+ }
549
+ // Skip modifiers/annotations to find doc comment
550
+ if (sibling.type === 'modifiers' ||
551
+ sibling.type === 'annotation' ||
552
+ sibling.type === 'marker_annotation') {
553
+ sibling = sibling.previousSibling;
554
+ continue;
555
+ }
556
+ break;
557
+ }
558
+ return null;
559
+ }
560
+ // Swift, Dart: /// or /** */ style
561
+ if (lang === 'swift' || lang === 'dart') {
562
+ const comments = [];
563
+ let sibling = node.previousSibling;
564
+ while (sibling) {
565
+ if (sibling.type === 'comment' ||
566
+ sibling.type === 'multiline_comment') {
567
+ const text = sibling.text;
568
+ if (text.startsWith('/**')) {
569
+ return (text
570
+ .replace(/^\/\*\*/, '')
571
+ .replace(/\*\/$/, '')
572
+ .replace(/^\s*\* ?/gm, '')
573
+ .trim() || null);
574
+ }
575
+ else if (text.startsWith('///')) {
576
+ comments.unshift(text.replace(/^\/\/\/\s*/, ''));
577
+ }
578
+ else {
579
+ break;
580
+ }
581
+ }
582
+ else {
583
+ break;
584
+ }
585
+ sibling = sibling.previousSibling;
586
+ }
587
+ return comments.length > 0 ? comments.join('\n').trim() : null;
588
+ }
589
+ // JS/TS: JSDoc /** */ style
590
+ let checkNode = node;
591
+ while (checkNode) {
592
+ const prev = checkNode.previousSibling;
593
+ if (prev?.type === 'comment') {
594
+ const text = prev.text;
595
+ if (text.startsWith('/**')) {
596
+ return (text
597
+ .replace(/^\/\*\*/, '')
598
+ .replace(/\*\/$/, '')
599
+ .replace(/^\s*\* ?/gm, '')
600
+ .trim() || null);
601
+ }
602
+ }
603
+ checkNode = checkNode.parent;
604
+ if (checkNode &&
605
+ !EXPORT_WRAPPER_TYPES.includes(checkNode.type) &&
606
+ checkNode.type !== 'variable_declarator' &&
607
+ checkNode.type !== 'variable_declaration') {
608
+ break;
609
+ }
610
+ }
611
+ return null;
612
+ }
613
+ /**
614
+ * Check if a node is exported/public.
615
+ */
616
+ extractIsExported(node, lang) {
617
+ // Python: Public if name doesn't start with underscore
618
+ if (lang === 'python') {
619
+ const name = this.extractName(node, lang);
620
+ return !name.startsWith('_');
621
+ }
622
+ // Dart: Same as Python - underscore prefix means private
623
+ if (lang === 'dart') {
624
+ const name = this.extractName(node, lang);
625
+ return !name.startsWith('_');
626
+ }
627
+ // Go: Exported if name starts with uppercase letter
628
+ if (lang === 'go') {
629
+ const name = this.extractName(node, lang);
630
+ return name.length > 0 && name[0] === name[0]?.toUpperCase();
631
+ }
632
+ // Rust: Look for 'pub' visibility modifier
633
+ if (lang === 'rust') {
634
+ return this.hasVisibilityModifier(node, 'pub');
635
+ }
636
+ // Java, C#, Swift: Look for 'public' keyword
637
+ if (lang === 'java' || lang === 'csharp' || lang === 'swift') {
638
+ return this.hasVisibilityModifier(node, 'public');
639
+ }
640
+ // Kotlin: Default is public unless marked private/internal/protected
641
+ if (lang === 'kotlin') {
642
+ return (!this.hasVisibilityModifier(node, 'private') &&
643
+ !this.hasVisibilityModifier(node, 'internal') &&
644
+ !this.hasVisibilityModifier(node, 'protected'));
645
+ }
646
+ // PHP: Public if has 'public' modifier or no visibility modifier (for functions)
647
+ if (lang === 'php') {
648
+ if (this.hasVisibilityModifier(node, 'public'))
649
+ return true;
650
+ // Top-level functions are always public
651
+ if (node.type === 'function_definition' &&
652
+ node.parent?.type === 'program') {
653
+ return true;
654
+ }
655
+ // Methods: check for visibility - no modifier means package-private
656
+ return (!this.hasVisibilityModifier(node, 'private') &&
657
+ !this.hasVisibilityModifier(node, 'protected'));
658
+ }
659
+ // JS/TS: Check for export keyword in node or parent
660
+ let checkNode = node;
661
+ while (checkNode) {
662
+ // Check if this node has export modifier
663
+ if (checkNode.type === 'export_statement') {
664
+ return true;
665
+ }
666
+ // Check for 'export' child (for class/function declarations)
667
+ for (let i = 0; i < checkNode.childCount; i++) {
668
+ const child = checkNode.child(i);
669
+ if (child && (child.type === 'export' || child.text === 'export')) {
670
+ return true;
671
+ }
672
+ }
673
+ // Walk up through wrappers
674
+ const parent = checkNode.parent;
675
+ if (parent &&
676
+ (parent.type === 'export_statement' ||
677
+ parent.type === 'variable_declaration' ||
678
+ parent.type === 'lexical_declaration')) {
679
+ checkNode = parent;
680
+ }
681
+ else {
682
+ break;
683
+ }
684
+ }
685
+ return false;
686
+ }
687
+ /**
688
+ * Helper to check for visibility modifiers in a node.
689
+ */
690
+ hasVisibilityModifier(node, modifier) {
691
+ // Check direct children for visibility modifiers
692
+ for (let i = 0; i < node.childCount; i++) {
693
+ const child = node.child(i);
694
+ if (!child)
695
+ continue;
696
+ // Check common modifier node types
697
+ if (child.type === 'visibility_modifier' ||
698
+ child.type === 'modifier' ||
699
+ child.type === 'modifiers' ||
700
+ child.type === modifier) {
701
+ if (child.text === modifier || child.text?.includes(modifier)) {
702
+ return true;
703
+ }
704
+ // Check nested modifiers
705
+ for (let j = 0; j < child.childCount; j++) {
706
+ const grandchild = child.child(j);
707
+ if (grandchild &&
708
+ (grandchild.text === modifier || grandchild.type === modifier)) {
709
+ return true;
710
+ }
711
+ }
712
+ }
713
+ // Direct text match
714
+ if (child.text === modifier) {
715
+ return true;
716
+ }
717
+ }
718
+ // Check parent for modifiers (for wrapped declarations)
719
+ if (node.parent) {
720
+ for (let i = 0; i < node.parent.childCount; i++) {
721
+ const sibling = node.parent.child(i);
722
+ if (!sibling || sibling.equals(node))
723
+ continue;
724
+ if (sibling.type === 'visibility_modifier' ||
725
+ sibling.type === 'modifier' ||
726
+ sibling.type === 'modifiers') {
727
+ if (sibling.text === modifier || sibling.text?.includes(modifier)) {
728
+ return true;
729
+ }
730
+ }
731
+ if (sibling.text === modifier) {
732
+ return true;
733
+ }
734
+ }
735
+ }
736
+ return false;
737
+ }
738
+ /**
739
+ * Extract decorator names from a node.
740
+ */
741
+ extractDecoratorNames(node, lang) {
742
+ const decorators = [];
743
+ // Python: @decorator syntax
744
+ if (lang === 'python') {
745
+ let sibling = node.previousSibling;
746
+ while (sibling) {
747
+ if (sibling.type === 'decorator') {
748
+ const nameNode = this.findChildOfType(sibling, [
749
+ 'identifier',
750
+ 'call',
751
+ ]);
752
+ if (nameNode) {
753
+ if (nameNode.type === 'call') {
754
+ const funcNode = nameNode.childForFieldName('function');
755
+ if (funcNode) {
756
+ decorators.unshift(funcNode.text);
757
+ }
758
+ }
759
+ else {
760
+ decorators.unshift(nameNode.text);
761
+ }
762
+ }
763
+ }
764
+ else if (sibling.type !== 'comment') {
765
+ break;
766
+ }
767
+ sibling = sibling.previousSibling;
768
+ }
769
+ }
770
+ // Rust: #[attribute] syntax
771
+ else if (lang === 'rust') {
772
+ let sibling = node.previousSibling;
773
+ while (sibling) {
774
+ if (sibling.type === 'attribute_item' || sibling.type === 'attribute') {
775
+ // Extract attribute name from #[name] or #[name(...)]
776
+ const attrNode = this.findChildOfType(sibling, [
777
+ 'attribute',
778
+ 'meta_item',
779
+ ]);
780
+ const target = attrNode || sibling;
781
+ const pathNode = this.findChildOfType(target, [
782
+ 'path',
783
+ 'identifier',
784
+ 'scoped_identifier',
785
+ ]);
786
+ if (pathNode) {
787
+ decorators.unshift(pathNode.text);
788
+ }
789
+ }
790
+ else if (sibling.type !== 'line_comment' &&
791
+ sibling.type !== 'block_comment') {
792
+ break;
793
+ }
794
+ sibling = sibling.previousSibling;
795
+ }
796
+ }
797
+ // Java, Kotlin: @Annotation syntax
798
+ else if (lang === 'java' || lang === 'kotlin') {
799
+ let sibling = node.previousSibling;
800
+ while (sibling) {
801
+ if (sibling.type === 'annotation' ||
802
+ sibling.type === 'marker_annotation') {
803
+ const nameNode = this.findChildOfType(sibling, [
804
+ 'identifier',
805
+ 'scoped_identifier',
806
+ ]);
807
+ if (nameNode) {
808
+ decorators.unshift(nameNode.text);
809
+ }
810
+ }
811
+ else if (sibling.type === 'modifiers') {
812
+ // Annotations may be inside modifiers node
813
+ for (let i = 0; i < sibling.childCount; i++) {
814
+ const child = sibling.child(i);
815
+ if (child &&
816
+ (child.type === 'annotation' ||
817
+ child.type === 'marker_annotation')) {
818
+ const nameNode = this.findChildOfType(child, ['identifier']);
819
+ if (nameNode) {
820
+ decorators.unshift(nameNode.text);
821
+ }
822
+ }
823
+ }
824
+ }
825
+ else if (sibling.type !== 'comment' &&
826
+ sibling.type !== 'multiline_comment') {
827
+ break;
828
+ }
829
+ sibling = sibling.previousSibling;
830
+ }
831
+ }
832
+ // C#: [Attribute] syntax
833
+ else if (lang === 'csharp') {
834
+ let sibling = node.previousSibling;
835
+ while (sibling) {
836
+ if (sibling.type === 'attribute_list') {
837
+ for (let i = 0; i < sibling.childCount; i++) {
838
+ const attrNode = sibling.child(i);
839
+ if (attrNode?.type === 'attribute') {
840
+ const nameNode = this.findChildOfType(attrNode, [
841
+ 'identifier',
842
+ 'qualified_name',
843
+ 'name',
844
+ ]);
845
+ if (nameNode) {
846
+ decorators.unshift(nameNode.text);
847
+ }
848
+ }
849
+ }
850
+ }
851
+ else if (sibling.type !== 'comment') {
852
+ break;
853
+ }
854
+ sibling = sibling.previousSibling;
855
+ }
856
+ }
857
+ // Swift: @attribute syntax
858
+ else if (lang === 'swift') {
859
+ let sibling = node.previousSibling;
860
+ while (sibling) {
861
+ if (sibling.type === 'attribute') {
862
+ const nameNode = this.findChildOfType(sibling, [
863
+ 'user_type',
864
+ 'simple_identifier',
865
+ 'identifier',
866
+ ]);
867
+ if (nameNode) {
868
+ decorators.unshift(nameNode.text);
869
+ }
870
+ }
871
+ else if (sibling.type !== 'comment' &&
872
+ sibling.type !== 'multiline_comment') {
873
+ break;
874
+ }
875
+ sibling = sibling.previousSibling;
876
+ }
877
+ }
878
+ // Dart: @annotation syntax
879
+ else if (lang === 'dart') {
880
+ let sibling = node.previousSibling;
881
+ while (sibling) {
882
+ if (sibling.type === 'annotation') {
883
+ const nameNode = this.findChildOfType(sibling, [
884
+ 'identifier',
885
+ 'qualified',
886
+ ]);
887
+ if (nameNode) {
888
+ decorators.unshift(nameNode.text);
889
+ }
890
+ }
891
+ else if (sibling.type !== 'comment') {
892
+ break;
893
+ }
894
+ sibling = sibling.previousSibling;
895
+ }
896
+ }
897
+ // PHP: #[Attribute] syntax (PHP 8+)
898
+ else if (lang === 'php') {
899
+ let sibling = node.previousSibling;
900
+ while (sibling) {
901
+ if (sibling.type === 'attribute_group' ||
902
+ sibling.type === 'attribute_list') {
903
+ for (let i = 0; i < sibling.childCount; i++) {
904
+ const attrNode = sibling.child(i);
905
+ if (attrNode?.type === 'attribute') {
906
+ const nameNode = this.findChildOfType(attrNode, [
907
+ 'name',
908
+ 'qualified_name',
909
+ 'identifier',
910
+ ]);
911
+ if (nameNode) {
912
+ decorators.unshift(nameNode.text);
913
+ }
914
+ }
915
+ }
916
+ }
917
+ else if (sibling.type !== 'comment') {
918
+ break;
919
+ }
920
+ sibling = sibling.previousSibling;
921
+ }
922
+ }
923
+ // Go: No decorators (uses comments like //go:embed but not proper decorators)
924
+ // JS/TS: @decorator syntax
925
+ else if (lang === 'javascript' || lang === 'typescript' || lang === 'tsx') {
926
+ let checkNode = node;
927
+ while (checkNode) {
928
+ for (let i = 0; i < checkNode.childCount; i++) {
929
+ const child = checkNode.child(i);
930
+ if (child?.type === 'decorator') {
931
+ const expr = this.findChildOfType(child, [
932
+ 'call_expression',
933
+ 'identifier',
934
+ ]);
935
+ if (expr) {
936
+ if (expr.type === 'call_expression') {
937
+ const func = expr.childForFieldName('function');
938
+ if (func) {
939
+ decorators.push(func.text);
940
+ }
941
+ }
942
+ else {
943
+ decorators.push(expr.text);
944
+ }
945
+ }
946
+ }
947
+ }
948
+ let sibling = checkNode.previousSibling;
949
+ while (sibling) {
950
+ if (sibling.type === 'decorator') {
951
+ const expr = this.findChildOfType(sibling, [
952
+ 'call_expression',
953
+ 'identifier',
954
+ ]);
955
+ if (expr) {
956
+ if (expr.type === 'call_expression') {
957
+ const func = expr.childForFieldName('function');
958
+ if (func) {
959
+ decorators.unshift(func.text);
960
+ }
961
+ }
962
+ else {
963
+ decorators.unshift(expr.text);
964
+ }
965
+ }
966
+ }
967
+ else if (sibling.type !== 'comment') {
968
+ break;
969
+ }
970
+ sibling = sibling.previousSibling;
971
+ }
972
+ const parent = checkNode.parent;
973
+ if (parent && EXPORT_WRAPPER_TYPES.includes(parent.type)) {
974
+ checkNode = parent;
975
+ }
976
+ else {
977
+ break;
978
+ }
979
+ }
980
+ }
981
+ return decorators.length > 0 ? decorators.join(',') : null;
982
+ }
983
+ /**
984
+ * Helper to find a child node of specific types.
985
+ */
986
+ findChildOfType(node, types) {
987
+ for (let i = 0; i < node.childCount; i++) {
988
+ const child = node.child(i);
989
+ if (child && types.includes(child.type)) {
990
+ return child;
991
+ }
992
+ }
993
+ return null;
994
+ }
995
+ /**
996
+ * Build a context header for a chunk.
997
+ */
998
+ buildContextHeader(filepath, parentClassName, functionName, isContinuation) {
999
+ const parts = [`// File: ${filepath}`];
1000
+ if (parentClassName) {
1001
+ parts.push(`Class: ${parentClassName}`);
1002
+ }
1003
+ if (functionName) {
1004
+ parts.push(`Function: ${functionName}`);
1005
+ }
1006
+ if (isContinuation) {
1007
+ parts.push('(continued)');
1008
+ }
1009
+ return parts.join(', ');
1010
+ }
1011
+ /**
1012
+ * Extract the name of a function/class/method from its node.
1013
+ */
1014
+ extractName(node, _lang) {
1015
+ // Try to get name via field first (works for many languages)
1016
+ const nameField = node.childForFieldName('name');
1017
+ if (nameField) {
1018
+ return nameField.text;
1019
+ }
1020
+ // Look for common identifier node types
1021
+ for (let i = 0; i < node.childCount; i++) {
1022
+ const child = node.child(i);
1023
+ if (!child)
1024
+ continue;
1025
+ // Common identifier types across languages
1026
+ if (child.type === 'identifier' ||
1027
+ child.type === 'name' ||
1028
+ child.type === 'simple_identifier' || // Kotlin, Swift
1029
+ child.type === 'type_identifier' // Rust, Go struct types
1030
+ ) {
1031
+ return child.text;
1032
+ }
1033
+ // JS/TS method names
1034
+ if (child.type === 'property_identifier') {
1035
+ return child.text;
1036
+ }
1037
+ // Go type declarations (type Foo struct { })
1038
+ if (child.type === 'type_spec') {
1039
+ const specName = child.childForFieldName('name');
1040
+ if (specName) {
1041
+ return specName.text;
1042
+ }
1043
+ }
1044
+ }
1045
+ // For JS/TS variable declarations with arrow functions
1046
+ if (node.parent?.type === 'variable_declarator') {
1047
+ const varName = node.parent.childForFieldName('name');
1048
+ if (varName) {
1049
+ return varName.text;
1050
+ }
1051
+ }
1052
+ // For Rust impl blocks, try to get the type name
1053
+ if (node.type === 'impl_item') {
1054
+ const typeNode = node.childForFieldName('type');
1055
+ if (typeNode) {
1056
+ const typeId = this.findChildOfType(typeNode, [
1057
+ 'type_identifier',
1058
+ 'identifier',
1059
+ ]);
1060
+ if (typeId) {
1061
+ return `impl ${typeId.text}`;
1062
+ }
1063
+ }
1064
+ }
1065
+ return '';
1066
+ }
1067
+ /**
1068
+ * Create a module-level chunk for the entire file.
1069
+ */
1070
+ createModuleChunk(filepath, content) {
1071
+ const lines = content.split('\n');
1072
+ const contextHeader = this.buildContextHeader(filepath, null, null, false);
1073
+ const fullText = `${contextHeader}\n${content}`;
1074
+ return {
1075
+ text: content,
1076
+ contextHeader,
1077
+ type: 'module',
1078
+ name: '',
1079
+ startLine: 1,
1080
+ endLine: lines.length,
1081
+ contentHash: computeStringHash(fullText),
1082
+ // Module chunks don't have these metadata fields
1083
+ signature: null,
1084
+ docstring: null,
1085
+ isExported: true, // Entire module is implicitly "exported"
1086
+ decoratorNames: null,
1087
+ };
1088
+ }
1089
+ /**
1090
+ * Chunk markdown files with heading-aware splitting and overlap.
1091
+ *
1092
+ * Strategy:
1093
+ * 1. Try to split at heading boundaries (# lines)
1094
+ * 2. Use sliding window with overlap between chunks
1095
+ * 3. Merge small final chunks to avoid orphans
1096
+ */
1097
+ chunkMarkdown(filepath, content, maxChunkSize) {
1098
+ const lines = content.split('\n');
1099
+ // If file is small enough, return as single module chunk
1100
+ if (content.length <= maxChunkSize &&
1101
+ lines.length <= MARKDOWN_TARGET_LINES * 1.5) {
1102
+ return [this.createModuleChunk(filepath, content)];
1103
+ }
1104
+ const chunks = [];
1105
+ let currentStartLine = 0; // 0-indexed for array access
1106
+ let chunkIndex = 0;
1107
+ while (currentStartLine < lines.length) {
1108
+ // Calculate target end (before overlap)
1109
+ const targetEnd = Math.min(currentStartLine + MARKDOWN_TARGET_LINES, lines.length);
1110
+ // Look for a heading boundary near the target to split cleanly
1111
+ let actualEnd = targetEnd;
1112
+ const searchStart = Math.max(targetEnd - 15, currentStartLine + Math.floor(MARKDOWN_TARGET_LINES / 3));
1113
+ // Search backwards from target for a heading
1114
+ for (let i = targetEnd; i >= searchStart && i > currentStartLine; i--) {
1115
+ const line = lines[i];
1116
+ if (line && /^#{1,6}\s/.test(line)) {
1117
+ // Found a heading - split before it
1118
+ actualEnd = i;
1119
+ break;
1120
+ }
1121
+ }
1122
+ // If no heading found and we're at the end, take remaining lines
1123
+ if (actualEnd >= lines.length) {
1124
+ actualEnd = lines.length;
1125
+ }
1126
+ // Extract chunk lines
1127
+ const chunkLines = lines.slice(currentStartLine, actualEnd);
1128
+ const chunkText = chunkLines.join('\n');
1129
+ // Skip if chunk is too small and not at the end (will merge later)
1130
+ if (chunkText.trim().length < MIN_CHUNK_SIZE &&
1131
+ chunks.length > 0 &&
1132
+ actualEnd < lines.length) {
1133
+ // Move forward and let the next iteration include these lines
1134
+ currentStartLine = actualEnd;
1135
+ continue;
1136
+ }
1137
+ const chunk = this.createMarkdownChunk(filepath, chunkText, currentStartLine + 1, // 1-indexed
1138
+ currentStartLine + chunkLines.length, // 1-indexed
1139
+ chunkIndex > 0);
1140
+ chunks.push(chunk);
1141
+ chunkIndex++;
1142
+ // Calculate next start with overlap
1143
+ const nextStart = actualEnd - MARKDOWN_OVERLAP_LINES;
1144
+ // Ensure we make progress
1145
+ if (nextStart <= currentStartLine) {
1146
+ currentStartLine = actualEnd;
1147
+ }
1148
+ else {
1149
+ currentStartLine = nextStart;
1150
+ }
1151
+ // Check if we've reached the end
1152
+ if (actualEnd >= lines.length) {
1153
+ break;
1154
+ }
1155
+ }
1156
+ // If final chunk is too small, merge with previous
1157
+ if (chunks.length >= 2) {
1158
+ const lastChunk = chunks[chunks.length - 1];
1159
+ if (lastChunk.text.length < MIN_CHUNK_SIZE) {
1160
+ const prevChunk = chunks[chunks.length - 2];
1161
+ // Merge last into previous
1162
+ const mergedText = prevChunk.text + '\n' + lastChunk.text;
1163
+ if (mergedText.length <= maxChunkSize * 1.5) {
1164
+ // Allow some overflow for merging
1165
+ const contextHeader = this.buildContextHeader(filepath, null, null, false);
1166
+ const fullText = `${contextHeader}\n${mergedText}`;
1167
+ chunks[chunks.length - 2] = {
1168
+ ...prevChunk,
1169
+ text: mergedText,
1170
+ endLine: lastChunk.endLine,
1171
+ contentHash: computeStringHash(fullText),
1172
+ };
1173
+ chunks.pop();
1174
+ }
1175
+ }
1176
+ }
1177
+ return chunks;
1178
+ }
1179
+ /**
1180
+ * Create a chunk from markdown content.
1181
+ */
1182
+ createMarkdownChunk(filepath, text, startLine, endLine, isContinuation) {
1183
+ const contextHeader = this.buildContextHeader(filepath, null, null, isContinuation);
1184
+ const fullText = `${contextHeader}\n${text}`;
1185
+ return {
1186
+ text,
1187
+ contextHeader,
1188
+ type: 'module',
1189
+ name: '',
1190
+ startLine,
1191
+ endLine,
1192
+ contentHash: computeStringHash(fullText),
1193
+ signature: null,
1194
+ docstring: null,
1195
+ isExported: true,
1196
+ decoratorNames: null,
1197
+ };
1198
+ }
1199
+ /**
1200
+ * Enforce size limits: split oversized chunks and merge tiny ones.
1201
+ *
1202
+ * @param overlapLines - Number of lines to overlap between chunks (for context continuity)
1203
+ */
1204
+ enforceSizeLimits(chunks, maxSize, content, _lang, // Reserved for future AST-based splitting
1205
+ filepath, overlapLines = 0) {
1206
+ const lines = content.split('\n');
1207
+ const result = [];
1208
+ for (const chunk of chunks) {
1209
+ if (chunk.text.length <= maxSize) {
1210
+ result.push(chunk);
1211
+ }
1212
+ else {
1213
+ // Split oversized chunk by lines
1214
+ const splitChunks = this.splitChunkByLines(chunk, maxSize, lines, filepath, overlapLines);
1215
+ result.push(...splitChunks);
1216
+ }
1217
+ }
1218
+ // Merge small adjacent chunks of the same type
1219
+ return this.mergeSmallChunks(result, maxSize);
1220
+ }
1221
+ /**
1222
+ * Split an oversized chunk by lines.
1223
+ * Tries to split at natural boundaries (empty lines, statement ends).
1224
+ *
1225
+ * @param overlapLines - Number of lines from previous chunk to include for context
1226
+ */
1227
+ splitChunkByLines(chunk, maxSize, allLines, filepath, overlapLines = 0) {
1228
+ const chunkLines = allLines.slice(chunk.startLine - 1, chunk.endLine);
1229
+ const result = [];
1230
+ let currentLines = [];
1231
+ let currentStartLine = chunk.startLine;
1232
+ let currentSize = 0;
1233
+ let partIndex = 0;
1234
+ for (let i = 0; i < chunkLines.length; i++) {
1235
+ const line = chunkLines[i];
1236
+ const lineSize = line.length + 1; // +1 for newline
1237
+ // Check if adding this line would exceed max size
1238
+ if (currentSize + lineSize > maxSize && currentLines.length > 0) {
1239
+ // Flush current chunk
1240
+ const chunkEndLine = currentStartLine + currentLines.length - 1;
1241
+ result.push(this.createSplitChunk(chunk, currentLines, currentStartLine, chunkEndLine, filepath, partIndex > 0));
1242
+ partIndex++;
1243
+ // Start next chunk with overlap from the end of previous chunk
1244
+ if (overlapLines > 0 && currentLines.length > overlapLines) {
1245
+ // Include last N lines from previous chunk as overlap
1246
+ const overlapStart = currentLines.length - overlapLines;
1247
+ currentLines = currentLines.slice(overlapStart);
1248
+ currentStartLine = chunkEndLine - overlapLines + 1;
1249
+ currentSize = currentLines.reduce((sum, l) => sum + l.length + 1, 0);
1250
+ }
1251
+ else {
1252
+ // No overlap or previous chunk too small
1253
+ currentLines = [];
1254
+ currentStartLine = chunk.startLine + i;
1255
+ currentSize = 0;
1256
+ }
1257
+ }
1258
+ currentLines.push(line);
1259
+ currentSize += lineSize;
1260
+ }
1261
+ // Flush remaining lines
1262
+ if (currentLines.length > 0) {
1263
+ result.push(this.createSplitChunk(chunk, currentLines, currentStartLine, currentStartLine + currentLines.length - 1, filepath, partIndex > 0));
1264
+ }
1265
+ return result;
1266
+ }
1267
+ /**
1268
+ * Create a chunk from a split portion.
1269
+ */
1270
+ createSplitChunk(original, lines, startLine, endLine, filepath, isContinuation) {
1271
+ const text = lines.join('\n');
1272
+ // Build context header with continuation marker if needed
1273
+ const parentClass = original.type === 'method'
1274
+ ? this.extractClassFromContext(original.contextHeader)
1275
+ : null;
1276
+ const functionName = original.type === 'function' || original.type === 'method'
1277
+ ? original.name
1278
+ : null;
1279
+ const contextHeader = this.buildContextHeader(filepath, parentClass, functionName, isContinuation);
1280
+ const fullText = `${contextHeader}\n${text}`;
1281
+ return {
1282
+ text,
1283
+ contextHeader,
1284
+ type: original.type,
1285
+ name: original.name,
1286
+ startLine,
1287
+ endLine,
1288
+ contentHash: computeStringHash(fullText),
1289
+ // Inherit metadata from original chunk
1290
+ // Only first part gets the signature; continuations get null
1291
+ signature: isContinuation ? null : original.signature,
1292
+ docstring: isContinuation ? null : original.docstring,
1293
+ isExported: original.isExported,
1294
+ decoratorNames: isContinuation ? null : original.decoratorNames,
1295
+ };
1296
+ }
1297
+ /**
1298
+ * Extract class name from a context header string.
1299
+ */
1300
+ extractClassFromContext(contextHeader) {
1301
+ const match = contextHeader.match(/Class: ([^,)]+)/);
1302
+ return match ? match[1] : null;
1303
+ }
1304
+ /**
1305
+ * Merge small adjacent chunks of the same type to avoid fragment explosion.
1306
+ */
1307
+ mergeSmallChunks(chunks, maxSize) {
1308
+ if (chunks.length <= 1)
1309
+ return chunks;
1310
+ const result = [];
1311
+ let current = chunks[0];
1312
+ for (let i = 1; i < chunks.length; i++) {
1313
+ const next = chunks[i];
1314
+ // Check if we should merge: both small, same type, adjacent
1315
+ const canMerge = current.text.length < MIN_CHUNK_SIZE &&
1316
+ next.text.length < MIN_CHUNK_SIZE &&
1317
+ current.type === next.type &&
1318
+ current.endLine + 1 >= next.startLine &&
1319
+ current.text.length + next.text.length + 1 <= maxSize;
1320
+ if (canMerge) {
1321
+ // Merge chunks
1322
+ const mergedText = current.text + '\n' + next.text;
1323
+ const fullText = `${current.contextHeader}\n${mergedText}`;
1324
+ current = {
1325
+ ...current,
1326
+ text: mergedText,
1327
+ endLine: next.endLine,
1328
+ contentHash: computeStringHash(fullText),
1329
+ };
1330
+ }
1331
+ else {
1332
+ result.push(current);
1333
+ current = next;
1334
+ }
1335
+ }
1336
+ result.push(current);
1337
+ return result;
1338
+ }
1339
+ /**
1340
+ * Close the parser and free resources.
1341
+ */
1342
+ close() {
1343
+ // Delete the parser to free WASM memory
1344
+ if (this.parser) {
1345
+ this.parser.delete();
1346
+ this.parser = null;
1347
+ }
1348
+ // Clear the language cache
1349
+ this.languages.clear();
1350
+ this.initialized = false;
1351
+ }
1352
+ }