@cue-dev/retrieval-core 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1187 @@
1
+ import Parser from "tree-sitter";
2
+ import Go from "tree-sitter-go";
3
+ import JavaScriptV023 from "tree-sitter-javascript-v023";
4
+ import Java from "tree-sitter-java";
5
+ import PythonV023 from "tree-sitter-python-v023";
6
+ import Rust from "tree-sitter-rust";
7
+ import TypeScript from "tree-sitter-typescript";
8
+
9
+ export type ChunkingStrategy = "language_aware" | "sliding";
10
+ export type ChunkingFallbackReason =
11
+ | "unsupported_language"
12
+ | "parser_unavailable"
13
+ | "parse_error"
14
+ | "parse_timeout_exceeded"
15
+ | "empty_language_boundaries";
16
+
17
+ export interface ChunkingConfig {
18
+ strategy: ChunkingStrategy;
19
+ fallback_strategy: "sliding";
20
+ target_chunk_tokens: number;
21
+ chunk_overlap_tokens: number;
22
+ budget_tokenizer: "ranking" | "lightweight";
23
+ boundary_strictness: "legacy" | "semantic_js_ts";
24
+ max_chunks_per_file: number;
25
+ parse_timeout_ms: number;
26
+ enabled_languages: string[];
27
+ recursive_semantic_chunking_enabled?: boolean;
28
+ semantic_merge_gap_lines?: number;
29
+ semantic_merge_max_span_lines?: number;
30
+ comment_forward_absorb_enabled?: boolean;
31
+ }
32
+
33
+ export interface ChunkingRawFile {
34
+ path: string;
35
+ content: string;
36
+ language?: string;
37
+ }
38
+
39
+ export interface ChunkingOutput {
40
+ start_line: number;
41
+ end_line: number;
42
+ snippet: string;
43
+ }
44
+
45
+ export interface ChunkingResult {
46
+ chunks: ChunkingOutput[];
47
+ strategy: ChunkingStrategy;
48
+ fallback_reason?: ChunkingFallbackReason;
49
+ parse_latency_ms?: number;
50
+ language_aware_attempt_latency_ms?: number;
51
+ fallback_path_latency_ms?: number;
52
+ language?: string;
53
+ recursive_semantic_chunking_used?: boolean;
54
+ }
55
+
56
+ export type ParserLanguage = "typescript" | "tsx" | "javascript" | "jsx" | "python" | "go" | "rust" | "java";
57
+
58
+ type ParserAvailabilityStatus = "available" | "unavailable";
59
+
60
+ interface ParserAvailability {
61
+ status: ParserAvailabilityStatus;
62
+ parser?: Parser;
63
+ error?: string;
64
+ }
65
+
66
+ export interface ChunkingParserAvailabilitySnapshotEntry {
67
+ language: string;
68
+ status: ParserAvailabilityStatus;
69
+ error?: string;
70
+ }
71
+
72
+ const DEFAULT_BOUNDARY_NODE_TYPES_LEGACY: Record<ParserLanguage, Set<string>> = {
73
+ typescript: new Set([
74
+ "function_declaration",
75
+ "generator_function_declaration",
76
+ "class_declaration",
77
+ "interface_declaration",
78
+ "enum_declaration",
79
+ "type_alias_declaration"
80
+ ]),
81
+ tsx: new Set([
82
+ "function_declaration",
83
+ "generator_function_declaration",
84
+ "class_declaration",
85
+ "interface_declaration",
86
+ "enum_declaration",
87
+ "type_alias_declaration"
88
+ ]),
89
+ javascript: new Set([
90
+ "function_declaration",
91
+ "generator_function_declaration",
92
+ "class_declaration",
93
+ "function_expression",
94
+ "arrow_function"
95
+ ]),
96
+ jsx: new Set([
97
+ "function_declaration",
98
+ "generator_function_declaration",
99
+ "class_declaration",
100
+ "function_expression",
101
+ "arrow_function"
102
+ ]),
103
+ python: new Set(["function_definition", "class_definition"]),
104
+ go: new Set(["function_declaration", "method_declaration", "type_declaration"]),
105
+ rust: new Set(["function_item", "impl_item", "trait_item", "struct_item", "enum_item"]),
106
+ java: new Set(["class_declaration", "interface_declaration", "enum_declaration", "method_declaration"])
107
+ };
108
+ const DEFAULT_BOUNDARY_NODE_TYPES_SEMANTIC_JS_TS: Record<ParserLanguage, Set<string>> = {
109
+ ...DEFAULT_BOUNDARY_NODE_TYPES_LEGACY,
110
+ typescript: new Set([
111
+ ...DEFAULT_BOUNDARY_NODE_TYPES_LEGACY.typescript,
112
+ "function_expression",
113
+ "arrow_function",
114
+ "method_definition",
115
+ "class"
116
+ ]),
117
+ tsx: new Set([
118
+ ...DEFAULT_BOUNDARY_NODE_TYPES_LEGACY.tsx,
119
+ "function_expression",
120
+ "arrow_function",
121
+ "method_definition",
122
+ "class"
123
+ ]),
124
+ javascript: new Set([...DEFAULT_BOUNDARY_NODE_TYPES_LEGACY.javascript, "method_definition", "class"]),
125
+ jsx: new Set([...DEFAULT_BOUNDARY_NODE_TYPES_LEGACY.jsx, "method_definition", "class"])
126
+ };
127
+
128
+ const parserAvailabilityCache = new Map<ParserLanguage, ParserAvailability>();
129
+ const parserInitAttempts = new Map<ParserLanguage, number>();
130
+ const parserLanguageLoaderOverrides = new Map<ParserLanguage, () => Parser.Language>();
131
+ const CANONICAL_TO_PARSER_LANGUAGE: Record<string, ParserLanguage> = {
132
+ typescript: "typescript",
133
+ javascript: "javascript",
134
+ python: "python",
135
+ go: "go",
136
+ rust: "rust",
137
+ java: "java"
138
+ };
139
+ const JAVASCRIPT_EXPRESSION_BOUNDARY_PARENT_TYPES = new Set([
140
+ "assignment_expression",
141
+ "variable_declarator",
142
+ "pair",
143
+ "export_statement",
144
+ "public_field_definition",
145
+ "property_definition"
146
+ ]);
147
+ const JAVASCRIPT_EXPRESSION_BOUNDARY_NODE_TYPES = new Set(["function_expression", "arrow_function", "class"]);
148
+ const JAVASCRIPT_EXPRESSION_BOUNDARY_WRAPPER_TYPES = new Set([
149
+ "parenthesized_expression",
150
+ "as_expression",
151
+ "satisfies_expression",
152
+ "type_assertion",
153
+ "non_null_expression"
154
+ ]);
155
+ const SEMANTIC_JS_TS_SOFT_MAX_MULTIPLIER = 1.35;
156
+
157
+ function parserLanguageToCanonical(language: ParserLanguage): string {
158
+ if (language === "tsx") {
159
+ return "typescript";
160
+ }
161
+ if (language === "jsx") {
162
+ return "javascript";
163
+ }
164
+ return language;
165
+ }
166
+
167
+ function normalizeLanguageAlias(language: string): ParserLanguage | undefined {
168
+ const normalized = language.trim().toLowerCase();
169
+ if (normalized === "typescript" || normalized === "ts") {
170
+ return "typescript";
171
+ }
172
+ if (normalized === "tsx") {
173
+ return "tsx";
174
+ }
175
+ if (normalized === "javascript" || normalized === "js") {
176
+ return "javascript";
177
+ }
178
+ if (normalized === "jsx") {
179
+ return "jsx";
180
+ }
181
+ if (normalized === "python" || normalized === "py") {
182
+ return "python";
183
+ }
184
+ if (normalized === "go" || normalized === "golang") {
185
+ return "go";
186
+ }
187
+ if (normalized === "rust" || normalized === "rs") {
188
+ return "rust";
189
+ }
190
+ if (normalized === "java") {
191
+ return "java";
192
+ }
193
+ return undefined;
194
+ }
195
+
196
+ function parserLanguageFromPath(path: string): ParserLanguage | undefined {
197
+ const normalized = path.toLowerCase();
198
+ if (normalized.endsWith(".tsx")) {
199
+ return "tsx";
200
+ }
201
+ if (normalized.endsWith(".ts") || normalized.endsWith(".mts") || normalized.endsWith(".cts")) {
202
+ return "typescript";
203
+ }
204
+ if (normalized.endsWith(".jsx")) {
205
+ return "jsx";
206
+ }
207
+ if (normalized.endsWith(".js") || normalized.endsWith(".mjs") || normalized.endsWith(".cjs")) {
208
+ return "javascript";
209
+ }
210
+ if (normalized.endsWith(".py")) {
211
+ return "python";
212
+ }
213
+ if (normalized.endsWith(".go")) {
214
+ return "go";
215
+ }
216
+ if (normalized.endsWith(".rs")) {
217
+ return "rust";
218
+ }
219
+ if (normalized.endsWith(".java")) {
220
+ return "java";
221
+ }
222
+ return undefined;
223
+ }
224
+
225
+ function resolveParserLanguage(file: ChunkingRawFile): ParserLanguage | undefined {
226
+ const explicit = file.language ? normalizeLanguageAlias(file.language) : undefined;
227
+ if (explicit) {
228
+ return explicit;
229
+ }
230
+ return parserLanguageFromPath(file.path);
231
+ }
232
+
233
+ function formatErrorMessage(error: unknown): string {
234
+ if (error instanceof Error && error.message) {
235
+ return error.message;
236
+ }
237
+ return String(error);
238
+ }
239
+
240
+ function resolveTreeSitterLanguageHandle(moduleValue: unknown): Parser.Language {
241
+ if (moduleValue && typeof moduleValue === "object" && "language" in moduleValue) {
242
+ return (moduleValue as { language: unknown }).language as Parser.Language;
243
+ }
244
+ return moduleValue as Parser.Language;
245
+ }
246
+
247
+ function loadParserLanguage(language: ParserLanguage): Parser.Language {
248
+ const overrideLoader = parserLanguageLoaderOverrides.get(language);
249
+ if (overrideLoader) {
250
+ return overrideLoader();
251
+ }
252
+ if (language === "typescript") {
253
+ return (TypeScript as unknown as { typescript: unknown }).typescript as Parser.Language;
254
+ }
255
+ if (language === "tsx") {
256
+ return (TypeScript as unknown as { tsx: unknown }).tsx as Parser.Language;
257
+ }
258
+ if (language === "javascript" || language === "jsx") {
259
+ // Bun is currently most reliable with tree-sitter 0.23-compatible JS/Python grammars.
260
+ return resolveTreeSitterLanguageHandle(JavaScriptV023);
261
+ }
262
+ if (language === "python") {
263
+ return resolveTreeSitterLanguageHandle(PythonV023);
264
+ }
265
+ if (language === "rust") {
266
+ return resolveTreeSitterLanguageHandle(Rust);
267
+ }
268
+ if (language === "java") {
269
+ return resolveTreeSitterLanguageHandle(Java);
270
+ }
271
+ return resolveTreeSitterLanguageHandle(Go);
272
+ }
273
+
274
+ function getParserAvailability(language: ParserLanguage): ParserAvailability {
275
+ const cached = parserAvailabilityCache.get(language);
276
+ if (cached) {
277
+ return cached;
278
+ }
279
+ parserInitAttempts.set(language, (parserInitAttempts.get(language) ?? 0) + 1);
280
+ try {
281
+ const parser = new Parser();
282
+ parser.setLanguage(loadParserLanguage(language));
283
+ const availability: ParserAvailability = {
284
+ status: "available",
285
+ parser
286
+ };
287
+ parserAvailabilityCache.set(language, availability);
288
+ return availability;
289
+ } catch (error) {
290
+ const availability: ParserAvailability = {
291
+ status: "unavailable",
292
+ error: formatErrorMessage(error)
293
+ };
294
+ parserAvailabilityCache.set(language, availability);
295
+ return availability;
296
+ }
297
+ }
298
+
299
+ function getParser(language: ParserLanguage): Parser | undefined {
300
+ const availability = getParserAvailability(language);
301
+ if (availability.status !== "available") {
302
+ return undefined;
303
+ }
304
+ return availability.parser;
305
+ }
306
+
307
+ function toChunkingParserLanguage(canonicalLanguage: string): ParserLanguage | undefined {
308
+ return CANONICAL_TO_PARSER_LANGUAGE[canonicalLanguage];
309
+ }
310
+
311
+ export function getChunkingParserAvailabilitySnapshot(input?: {
312
+ enabled_languages?: string[];
313
+ }): ChunkingParserAvailabilitySnapshotEntry[] {
314
+ const canonicalLanguages = new Set<string>();
315
+ if (input?.enabled_languages && input.enabled_languages.length > 0) {
316
+ for (const language of input.enabled_languages) {
317
+ const parsed = normalizeLanguageAlias(language);
318
+ if (!parsed) {
319
+ continue;
320
+ }
321
+ const canonical = parserLanguageToCanonical(parsed);
322
+ if (toChunkingParserLanguage(canonical)) {
323
+ canonicalLanguages.add(canonical);
324
+ }
325
+ }
326
+ }
327
+ if (canonicalLanguages.size === 0) {
328
+ for (const canonical of Object.keys(CANONICAL_TO_PARSER_LANGUAGE)) {
329
+ canonicalLanguages.add(canonical);
330
+ }
331
+ }
332
+
333
+ return [...canonicalLanguages]
334
+ .sort((a, b) => a.localeCompare(b))
335
+ .map((canonicalLanguage) => {
336
+ const parserLanguage = toChunkingParserLanguage(canonicalLanguage);
337
+ if (!parserLanguage) {
338
+ return {
339
+ language: canonicalLanguage,
340
+ status: "unavailable" as const,
341
+ error: "no parser mapping for language"
342
+ };
343
+ }
344
+ const availability = getParserAvailability(parserLanguage);
345
+ return {
346
+ language: canonicalLanguage,
347
+ status: availability.status,
348
+ ...(availability.error ? { error: availability.error } : {})
349
+ };
350
+ });
351
+ }
352
+
353
+ export function __resetChunkingParserStateForTests(): void {
354
+ parserAvailabilityCache.clear();
355
+ parserInitAttempts.clear();
356
+ parserLanguageLoaderOverrides.clear();
357
+ }
358
+
359
+ export function __setChunkingParserLanguageLoaderForTests(
360
+ language: ParserLanguage,
361
+ loader: (() => Parser.Language) | undefined
362
+ ): void {
363
+ if (loader) {
364
+ parserLanguageLoaderOverrides.set(language, loader);
365
+ } else {
366
+ parserLanguageLoaderOverrides.delete(language);
367
+ }
368
+ parserAvailabilityCache.delete(language);
369
+ }
370
+
371
+ export function __getChunkingParserInitAttemptsForTests(): Record<string, number> {
372
+ return Object.fromEntries(parserInitAttempts.entries());
373
+ }
374
+
375
+ function toInclusiveEndRow(node: Parser.SyntaxNode): number {
376
+ const end = node.endPosition;
377
+ if (end.column === 0 && end.row > node.startPosition.row) {
378
+ return end.row - 1;
379
+ }
380
+ return end.row;
381
+ }
382
+
383
+ function trimLineRange(lines: string[], startRow: number, endRow: number): { start: number; end: number } | undefined {
384
+ let start = startRow;
385
+ let end = endRow;
386
+ while (start <= end && (lines[start] ?? "").trim().length === 0) {
387
+ start += 1;
388
+ }
389
+ while (end >= start && (lines[end] ?? "").trim().length === 0) {
390
+ end -= 1;
391
+ }
392
+ if (end < start) {
393
+ return undefined;
394
+ }
395
+ return { start, end };
396
+ }
397
+
398
+ function splitRangeWithBudget(input: {
399
+ lines: string[];
400
+ lineTokenCounts: number[];
401
+ startRow: number;
402
+ endRow: number;
403
+ targetChunkTokens: number;
404
+ overlapTokens: number;
405
+ maxChunks: number;
406
+ preferSafeBoundarySplit?: boolean;
407
+ softMaxChunkTokens?: number;
408
+ }): Array<{ startRow: number; endRow: number }> {
409
+ const rangeTokenCount = (startRow: number, endRow: number): number => {
410
+ let total = 0;
411
+ for (let row = startRow; row <= endRow; row += 1) {
412
+ total += input.lineTokenCounts[row] ?? 0;
413
+ }
414
+ return total;
415
+ };
416
+ const isSafeSplitBoundaryLine = (line: string): boolean => {
417
+ const trimmed = line.trim();
418
+ if (trimmed.length === 0) {
419
+ return true;
420
+ }
421
+ return trimmed.endsWith(";") || trimmed.endsWith("}") || trimmed.endsWith("{");
422
+ };
423
+ const segments: Array<{ startRow: number; endRow: number }> = [];
424
+ let start = input.startRow;
425
+
426
+ while (start <= input.endRow && segments.length < input.maxChunks) {
427
+ let tokens = 0;
428
+ let end = start - 1;
429
+ while (end < input.endRow) {
430
+ const nextEnd = end + 1;
431
+ tokens += input.lineTokenCounts[nextEnd] ?? 0;
432
+ end = nextEnd;
433
+ if (tokens >= input.targetChunkTokens && end >= start) {
434
+ break;
435
+ }
436
+ }
437
+
438
+ let safeEnd = Math.min(Math.max(start, end), input.endRow);
439
+ if (input.preferSafeBoundarySplit && safeEnd > start) {
440
+ let adjusted = safeEnd;
441
+ for (let row = safeEnd; row > start; row -= 1) {
442
+ if (isSafeSplitBoundaryLine(input.lines[row] ?? "")) {
443
+ adjusted = row;
444
+ break;
445
+ }
446
+ }
447
+ if (adjusted === safeEnd && typeof input.softMaxChunkTokens === "number" && input.softMaxChunkTokens > input.targetChunkTokens) {
448
+ for (let row = safeEnd + 1; row <= input.endRow; row += 1) {
449
+ if (rangeTokenCount(start, row) > input.softMaxChunkTokens) {
450
+ break;
451
+ }
452
+ if (isSafeSplitBoundaryLine(input.lines[row] ?? "")) {
453
+ adjusted = row;
454
+ break;
455
+ }
456
+ }
457
+ }
458
+ safeEnd = Math.max(start, adjusted);
459
+ }
460
+ if (safeEnd >= start) {
461
+ segments.push({ startRow: start, endRow: safeEnd });
462
+ }
463
+
464
+ if (safeEnd >= input.endRow) {
465
+ break;
466
+ }
467
+
468
+ let nextStart = safeEnd + 1;
469
+ if (input.overlapTokens > 0) {
470
+ let overlap = 0;
471
+ let cursor = safeEnd;
472
+ while (cursor >= start && overlap < input.overlapTokens) {
473
+ overlap += input.lineTokenCounts[cursor] ?? 0;
474
+ cursor -= 1;
475
+ }
476
+ nextStart = Math.max(start + 1, cursor + 1);
477
+ }
478
+ start = Math.max(start + 1, nextStart);
479
+ }
480
+
481
+ return segments;
482
+ }
483
+
484
+ function buildSlidingChunks(input: {
485
+ lines: string[];
486
+ tokenize: (text: string) => string[];
487
+ targetChunkTokens: number;
488
+ overlapTokens: number;
489
+ maxChunks: number;
490
+ lineTokenCounts?: number[];
491
+ }): ChunkingOutput[] {
492
+ const lineTokenCounts = input.lineTokenCounts ?? computeLineTokenCounts(input.lines, input.tokenize);
493
+ const rawSegments = splitRangeWithBudget({
494
+ lines: input.lines,
495
+ lineTokenCounts,
496
+ startRow: 0,
497
+ endRow: Math.max(0, input.lines.length - 1),
498
+ targetChunkTokens: input.targetChunkTokens,
499
+ overlapTokens: input.overlapTokens,
500
+ maxChunks: input.maxChunks
501
+ });
502
+ const chunks: ChunkingOutput[] = [];
503
+ for (const segment of rawSegments) {
504
+ const trimmed = trimLineRange(input.lines, segment.startRow, segment.endRow);
505
+ if (!trimmed) {
506
+ continue;
507
+ }
508
+ chunks.push({
509
+ start_line: trimmed.start + 1,
510
+ end_line: trimmed.end + 1,
511
+ snippet: input.lines.slice(trimmed.start, trimmed.end + 1).join("\n")
512
+ });
513
+ }
514
+ return chunks;
515
+ }
516
+
517
+ function hasBoundaryAncestor(node: Parser.SyntaxNode, boundaryTypes: Set<string>): boolean {
518
+ let current = node.parent;
519
+ while (current) {
520
+ if (boundaryTypes.has(current.type)) {
521
+ return true;
522
+ }
523
+ current = current.parent;
524
+ }
525
+ return false;
526
+ }
527
+
528
+ function getBoundaryTypes(
529
+ parserLanguage: ParserLanguage,
530
+ boundaryStrictness: "legacy" | "semantic_js_ts"
531
+ ): Set<string> {
532
+ if (boundaryStrictness === "semantic_js_ts") {
533
+ return DEFAULT_BOUNDARY_NODE_TYPES_SEMANTIC_JS_TS[parserLanguage];
534
+ }
535
+ return DEFAULT_BOUNDARY_NODE_TYPES_LEGACY[parserLanguage];
536
+ }
537
+
538
+ function isExpressionBoundaryLanguage(parserLanguage: ParserLanguage, boundaryStrictness: "legacy" | "semantic_js_ts"): boolean {
539
+ if (boundaryStrictness === "semantic_js_ts") {
540
+ return (
541
+ parserLanguage === "javascript" ||
542
+ parserLanguage === "jsx" ||
543
+ parserLanguage === "typescript" ||
544
+ parserLanguage === "tsx"
545
+ );
546
+ }
547
+ return parserLanguage === "javascript" || parserLanguage === "jsx";
548
+ }
549
+
550
+ function isLanguageBoundaryCandidate(
551
+ parserLanguage: ParserLanguage,
552
+ node: Parser.SyntaxNode,
553
+ boundaryStrictness: "legacy" | "semantic_js_ts"
554
+ ): boolean {
555
+ if (!isExpressionBoundaryLanguage(parserLanguage, boundaryStrictness)) {
556
+ return true;
557
+ }
558
+ if (!JAVASCRIPT_EXPRESSION_BOUNDARY_NODE_TYPES.has(node.type)) {
559
+ return true;
560
+ }
561
+ let owner = node.parent;
562
+ while (owner && JAVASCRIPT_EXPRESSION_BOUNDARY_WRAPPER_TYPES.has(owner.type)) {
563
+ owner = owner.parent;
564
+ }
565
+ const ownerType = owner?.type;
566
+ if (!ownerType) {
567
+ return false;
568
+ }
569
+ return JAVASCRIPT_EXPRESSION_BOUNDARY_PARENT_TYPES.has(ownerType);
570
+ }
571
+
572
+ export function __isChunkingBoundaryCandidateForTests(input: {
573
+ parserLanguage: ParserLanguage;
574
+ nodeType: string;
575
+ parentType?: string;
576
+ ancestorTypes?: string[];
577
+ boundaryStrictness?: "legacy" | "semantic_js_ts";
578
+ }): boolean {
579
+ const strictness = input.boundaryStrictness ?? "legacy";
580
+ if (!isExpressionBoundaryLanguage(input.parserLanguage, strictness)) {
581
+ return true;
582
+ }
583
+ if (!JAVASCRIPT_EXPRESSION_BOUNDARY_NODE_TYPES.has(input.nodeType)) {
584
+ return true;
585
+ }
586
+ const ancestorChain: string[] = [];
587
+ if (input.parentType) {
588
+ ancestorChain.push(input.parentType);
589
+ }
590
+ if (input.ancestorTypes && input.ancestorTypes.length > 0) {
591
+ ancestorChain.push(...input.ancestorTypes);
592
+ }
593
+ const ownerType = ancestorChain.find((type) => !JAVASCRIPT_EXPRESSION_BOUNDARY_WRAPPER_TYPES.has(type));
594
+ if (!ownerType) {
595
+ return false;
596
+ }
597
+ return JAVASCRIPT_EXPRESSION_BOUNDARY_PARENT_TYPES.has(ownerType);
598
+ }
599
+
600
+ function computeLineTokenCounts(lines: string[], tokenize: (text: string) => string[]): number[] {
601
+ return lines.map((line) => tokenize(line ?? "").length);
602
+ }
603
+
604
+ function rangeTokenCount(lineTokenCounts: number[], startRow: number, endRow: number): number {
605
+ let total = 0;
606
+ for (let row = startRow; row <= endRow; row += 1) {
607
+ total += lineTokenCounts[row] ?? 0;
608
+ }
609
+ return total;
610
+ }
611
+
612
+ function listNamedChildren(node: Parser.SyntaxNode): Parser.SyntaxNode[] {
613
+ const children: Parser.SyntaxNode[] = [];
614
+ for (let index = 0; index < node.namedChildCount; index += 1) {
615
+ const child = node.namedChild(index);
616
+ if (child) {
617
+ children.push(child);
618
+ }
619
+ }
620
+ return children;
621
+ }
622
+
623
+ function normalizeNodeWindow(input: {
624
+ node: Parser.SyntaxNode;
625
+ lines: string[];
626
+ lastRow: number;
627
+ }): { startRow: number; endRow: number } | undefined {
628
+ const startRow = Math.max(0, Math.min(input.lastRow, input.node.startPosition.row));
629
+ const endRow = Math.max(startRow, Math.min(input.lastRow, toInclusiveEndRow(input.node)));
630
+ const trimmed = trimLineRange(input.lines, startRow, endRow);
631
+ if (!trimmed) {
632
+ return undefined;
633
+ }
634
+ return {
635
+ startRow: trimmed.start,
636
+ endRow: trimmed.end
637
+ };
638
+ }
639
+
640
+ function buildRecursiveSemanticWindows(input: {
641
+ root: Parser.SyntaxNode;
642
+ lines: string[];
643
+ lineTokenCounts: number[];
644
+ targetChunkTokens: number;
645
+ maxChunks: number;
646
+ boundaryStrictness: "legacy" | "semantic_js_ts";
647
+ }): Array<{ startRow: number; endRow: number }> {
648
+ const lastRow = Math.max(0, input.lines.length - 1);
649
+ const windows: Array<{ startRow: number; endRow: number }> = [];
650
+ const softMaxChunkTokens = Math.floor(input.targetChunkTokens * SEMANTIC_JS_TS_SOFT_MAX_MULTIPLIER);
651
+ const seen = new Set<string>();
652
+
653
+ const pushSplitWindows = (startRow: number, endRow: number): void => {
654
+ if (startRow > endRow || windows.length >= input.maxChunks) {
655
+ return;
656
+ }
657
+ const segments = splitRangeWithBudget({
658
+ lines: input.lines,
659
+ lineTokenCounts: input.lineTokenCounts,
660
+ startRow,
661
+ endRow,
662
+ targetChunkTokens: input.targetChunkTokens,
663
+ overlapTokens: 0,
664
+ maxChunks: input.maxChunks - windows.length,
665
+ preferSafeBoundarySplit: input.boundaryStrictness === "semantic_js_ts",
666
+ softMaxChunkTokens
667
+ });
668
+ for (const segment of segments) {
669
+ const trimmed = trimLineRange(input.lines, segment.startRow, segment.endRow);
670
+ if (!trimmed) {
671
+ continue;
672
+ }
673
+ const key = `${trimmed.start}:${trimmed.end}`;
674
+ if (seen.has(key)) {
675
+ continue;
676
+ }
677
+ seen.add(key);
678
+ windows.push({ startRow: trimmed.start, endRow: trimmed.end });
679
+ if (windows.length >= input.maxChunks) {
680
+ return;
681
+ }
682
+ }
683
+ };
684
+
685
+ const visitNode = (node: Parser.SyntaxNode): void => {
686
+ if (windows.length >= input.maxChunks) {
687
+ return;
688
+ }
689
+ const range = normalizeNodeWindow({
690
+ node,
691
+ lines: input.lines,
692
+ lastRow
693
+ });
694
+ if (!range) {
695
+ return;
696
+ }
697
+ const tokenCount = rangeTokenCount(input.lineTokenCounts, range.startRow, range.endRow);
698
+ if (tokenCount <= input.targetChunkTokens) {
699
+ const key = `${range.startRow}:${range.endRow}`;
700
+ if (!seen.has(key)) {
701
+ seen.add(key);
702
+ windows.push(range);
703
+ }
704
+ return;
705
+ }
706
+
707
+ const children = listNamedChildren(node)
708
+ .map((child) => ({
709
+ node: child,
710
+ range: normalizeNodeWindow({
711
+ node: child,
712
+ lines: input.lines,
713
+ lastRow
714
+ })
715
+ }))
716
+ .filter((child): child is { node: Parser.SyntaxNode; range: { startRow: number; endRow: number } } =>
717
+ Boolean(child.range)
718
+ )
719
+ .sort((a, b) => a.range.startRow - b.range.startRow || a.range.endRow - b.range.endRow);
720
+
721
+ if (children.length === 0) {
722
+ pushSplitWindows(range.startRow, range.endRow);
723
+ return;
724
+ }
725
+
726
+ let cursor = range.startRow;
727
+ for (const child of children) {
728
+ if (windows.length >= input.maxChunks) {
729
+ return;
730
+ }
731
+ if (child.range.endRow < cursor) {
732
+ continue;
733
+ }
734
+ if (child.range.startRow > cursor) {
735
+ pushSplitWindows(cursor, child.range.startRow - 1);
736
+ }
737
+
738
+ visitNode(child.node);
739
+ cursor = Math.max(cursor, child.range.endRow + 1);
740
+ if (cursor > range.endRow) {
741
+ return;
742
+ }
743
+ }
744
+ if (cursor <= range.endRow) {
745
+ pushSplitWindows(cursor, range.endRow);
746
+ }
747
+ };
748
+
749
+ visitNode(input.root);
750
+ return windows.sort((a, b) => a.startRow - b.startRow || a.endRow - b.endRow);
751
+ }
752
+
753
+ function mergeSemanticWindows(input: {
754
+ windows: Array<{ startRow: number; endRow: number }>;
755
+ lineTokenCounts: number[];
756
+ targetChunkTokens: number;
757
+ semanticMergeGapLines: number;
758
+ semanticMergeMaxSpanLines: number;
759
+ }): Array<{ startRow: number; endRow: number }> {
760
+ if (input.windows.length <= 1) {
761
+ return [...input.windows];
762
+ }
763
+ const ordered = [...input.windows].sort((a, b) => a.startRow - b.startRow || a.endRow - b.endRow);
764
+ const merged: Array<{ startRow: number; endRow: number }> = [];
765
+ const mergeTokenBudget = Math.floor(input.targetChunkTokens * SEMANTIC_JS_TS_SOFT_MAX_MULTIPLIER);
766
+ for (const window of ordered) {
767
+ const last = merged[merged.length - 1];
768
+ if (!last) {
769
+ merged.push({ ...window });
770
+ continue;
771
+ }
772
+ const gapLines = Math.max(0, window.startRow - last.endRow - 1);
773
+ const nextStartRow = Math.min(last.startRow, window.startRow);
774
+ const nextEndRow = Math.max(last.endRow, window.endRow);
775
+ const nextSpanLines = nextEndRow - nextStartRow + 1;
776
+ const mergedTokenCount = rangeTokenCount(input.lineTokenCounts, nextStartRow, nextEndRow);
777
+ const canMerge =
778
+ gapLines <= input.semanticMergeGapLines &&
779
+ nextSpanLines <= input.semanticMergeMaxSpanLines &&
780
+ mergedTokenCount <= mergeTokenBudget;
781
+ if (!canMerge) {
782
+ merged.push({ ...window });
783
+ continue;
784
+ }
785
+ last.startRow = nextStartRow;
786
+ last.endRow = nextEndRow;
787
+ }
788
+ return merged;
789
+ }
790
+
791
+ function isCommentOnlyLine(line: string): boolean {
792
+ const trimmed = line.trim();
793
+ if (trimmed.length === 0) {
794
+ return true;
795
+ }
796
+ return (
797
+ trimmed.startsWith("//") ||
798
+ trimmed.startsWith("/*") ||
799
+ trimmed.startsWith("*") ||
800
+ trimmed.startsWith("*/") ||
801
+ trimmed.startsWith("#")
802
+ );
803
+ }
804
+
805
+ function windowLooksCommentOnly(input: { lines: string[]; startRow: number; endRow: number }): boolean {
806
+ for (let row = input.startRow; row <= input.endRow; row += 1) {
807
+ if (!isCommentOnlyLine(input.lines[row] ?? "")) {
808
+ return false;
809
+ }
810
+ }
811
+ return true;
812
+ }
813
+
814
+ function absorbForwardCommentWindows(input: {
815
+ windows: Array<{ startRow: number; endRow: number }>;
816
+ lines: string[];
817
+ lineTokenCounts: number[];
818
+ targetChunkTokens: number;
819
+ semanticMergeMaxSpanLines: number;
820
+ }): Array<{ startRow: number; endRow: number }> {
821
+ if (input.windows.length <= 1) {
822
+ return [...input.windows];
823
+ }
824
+ const output: Array<{ startRow: number; endRow: number }> = [];
825
+ const mergeTokenBudget = Math.floor(input.targetChunkTokens * SEMANTIC_JS_TS_SOFT_MAX_MULTIPLIER);
826
+ for (let index = 0; index < input.windows.length; index += 1) {
827
+ const current = input.windows[index];
828
+ const next = input.windows[index + 1];
829
+ if (!current) {
830
+ continue;
831
+ }
832
+ if (!next) {
833
+ output.push({ ...current });
834
+ continue;
835
+ }
836
+ if (!windowLooksCommentOnly({ lines: input.lines, startRow: current.startRow, endRow: current.endRow })) {
837
+ output.push({ ...current });
838
+ continue;
839
+ }
840
+ const gapLines = Math.max(0, next.startRow - current.endRow - 1);
841
+ const nextSpanLines = next.endRow - current.startRow + 1;
842
+ const mergedTokenCount = rangeTokenCount(input.lineTokenCounts, current.startRow, next.endRow);
843
+ const canAbsorb =
844
+ gapLines <= 1 && nextSpanLines <= input.semanticMergeMaxSpanLines && mergedTokenCount <= mergeTokenBudget;
845
+ if (!canAbsorb) {
846
+ output.push({ ...current });
847
+ continue;
848
+ }
849
+ output.push({
850
+ startRow: current.startRow,
851
+ endRow: next.endRow
852
+ });
853
+ index += 1;
854
+ }
855
+ return output;
856
+ }
857
+
858
+ function windowsToChunks(input: {
859
+ windows: Array<{ startRow: number; endRow: number }>;
860
+ lines: string[];
861
+ maxChunks: number;
862
+ }): ChunkingOutput[] {
863
+ const chunks: ChunkingOutput[] = [];
864
+ for (const window of input.windows) {
865
+ if (chunks.length >= input.maxChunks) {
866
+ break;
867
+ }
868
+ const trimmed = trimLineRange(input.lines, window.startRow, window.endRow);
869
+ if (!trimmed) {
870
+ continue;
871
+ }
872
+ chunks.push({
873
+ start_line: trimmed.start + 1,
874
+ end_line: trimmed.end + 1,
875
+ snippet: input.lines.slice(trimmed.start, trimmed.end + 1).join("\n")
876
+ });
877
+ }
878
+ return chunks;
879
+ }
880
+
881
+ function buildLanguageAwareChunks(input: {
882
+ file: ChunkingRawFile;
883
+ lines: string[];
884
+ parserLanguage: ParserLanguage;
885
+ config: ChunkingConfig;
886
+ tokenize: (text: string) => string[];
887
+ }): ChunkingResult {
888
+ const languageAwareAttemptStart = Date.now();
889
+ const lineTokenCounts = computeLineTokenCounts(input.lines, input.tokenize);
890
+ const parser = getParser(input.parserLanguage);
891
+ if (!parser) {
892
+ const fallbackStart = Date.now();
893
+ const chunks = buildSlidingChunks({
894
+ lines: input.lines,
895
+ tokenize: input.tokenize,
896
+ targetChunkTokens: input.config.target_chunk_tokens,
897
+ overlapTokens: input.config.chunk_overlap_tokens,
898
+ maxChunks: input.config.max_chunks_per_file,
899
+ lineTokenCounts
900
+ });
901
+ return {
902
+ chunks,
903
+ strategy: "sliding",
904
+ fallback_reason: "parser_unavailable",
905
+ language_aware_attempt_latency_ms: Date.now() - languageAwareAttemptStart,
906
+ fallback_path_latency_ms: Date.now() - fallbackStart,
907
+ language: parserLanguageToCanonical(input.parserLanguage)
908
+ };
909
+ }
910
+
911
+ try {
912
+ parser.setTimeoutMicros(input.config.parse_timeout_ms * 1_000);
913
+ const parseStart = Date.now();
914
+ const tree = parser.parse(input.file.content);
915
+ const parseLatencyMs = Date.now() - parseStart;
916
+ if (parseLatencyMs > input.config.parse_timeout_ms) {
917
+ const fallbackStart = Date.now();
918
+ const chunks = buildSlidingChunks({
919
+ lines: input.lines,
920
+ tokenize: input.tokenize,
921
+ targetChunkTokens: input.config.target_chunk_tokens,
922
+ overlapTokens: input.config.chunk_overlap_tokens,
923
+ maxChunks: input.config.max_chunks_per_file,
924
+ lineTokenCounts
925
+ });
926
+ return {
927
+ chunks,
928
+ strategy: "sliding",
929
+ fallback_reason: "parse_timeout_exceeded",
930
+ parse_latency_ms: parseLatencyMs,
931
+ language_aware_attempt_latency_ms: Date.now() - languageAwareAttemptStart,
932
+ fallback_path_latency_ms: Date.now() - fallbackStart,
933
+ language: parserLanguageToCanonical(input.parserLanguage)
934
+ };
935
+ }
936
+
937
+ const root = (tree as Parser.Tree | null)?.rootNode;
938
+ if (!root) {
939
+ const fallbackStart = Date.now();
940
+ const chunks = buildSlidingChunks({
941
+ lines: input.lines,
942
+ tokenize: input.tokenize,
943
+ targetChunkTokens: input.config.target_chunk_tokens,
944
+ overlapTokens: input.config.chunk_overlap_tokens,
945
+ maxChunks: input.config.max_chunks_per_file,
946
+ lineTokenCounts
947
+ });
948
+ return {
949
+ chunks,
950
+ strategy: "sliding",
951
+ fallback_reason: "parse_error",
952
+ parse_latency_ms: parseLatencyMs,
953
+ language_aware_attempt_latency_ms: Date.now() - languageAwareAttemptStart,
954
+ fallback_path_latency_ms: Date.now() - fallbackStart,
955
+ language: parserLanguageToCanonical(input.parserLanguage)
956
+ };
957
+ }
958
+
959
+ let chunks: ChunkingOutput[] = [];
960
+ let recursiveSemanticChunkingUsed = false;
961
+
962
+ if (input.config.recursive_semantic_chunking_enabled) {
963
+ const semanticMergeGapLines = input.config.semantic_merge_gap_lines ?? 6;
964
+ const semanticMergeMaxSpanLines = input.config.semantic_merge_max_span_lines ?? 220;
965
+ const recursiveWindows = buildRecursiveSemanticWindows({
966
+ root,
967
+ lines: input.lines,
968
+ lineTokenCounts,
969
+ targetChunkTokens: input.config.target_chunk_tokens,
970
+ maxChunks: input.config.max_chunks_per_file,
971
+ boundaryStrictness: input.config.boundary_strictness
972
+ });
973
+ const mergedWindows = mergeSemanticWindows({
974
+ windows: recursiveWindows,
975
+ lineTokenCounts,
976
+ targetChunkTokens: input.config.target_chunk_tokens,
977
+ semanticMergeGapLines,
978
+ semanticMergeMaxSpanLines
979
+ });
980
+ const absorbedWindows =
981
+ input.config.comment_forward_absorb_enabled === false
982
+ ? mergedWindows
983
+ : absorbForwardCommentWindows({
984
+ windows: mergedWindows,
985
+ lines: input.lines,
986
+ lineTokenCounts,
987
+ targetChunkTokens: input.config.target_chunk_tokens,
988
+ semanticMergeMaxSpanLines
989
+ });
990
+ chunks = windowsToChunks({
991
+ windows: absorbedWindows,
992
+ lines: input.lines,
993
+ maxChunks: input.config.max_chunks_per_file
994
+ });
995
+ recursiveSemanticChunkingUsed = chunks.length > 0;
996
+ } else {
997
+ const boundaryTypes = getBoundaryTypes(input.parserLanguage, input.config.boundary_strictness);
998
+ const candidates = root.descendantsOfType([...boundaryTypes]);
999
+ const boundaryNodes = candidates
1000
+ .filter((node) => !hasBoundaryAncestor(node, boundaryTypes))
1001
+ .filter((node) => isLanguageBoundaryCandidate(input.parserLanguage, node, input.config.boundary_strictness))
1002
+ .sort((a, b) => a.startPosition.row - b.startPosition.row || a.startPosition.column - b.startPosition.column);
1003
+
1004
+ if (boundaryNodes.length === 0) {
1005
+ const fallbackStart = Date.now();
1006
+ const fallbackChunks = buildSlidingChunks({
1007
+ lines: input.lines,
1008
+ tokenize: input.tokenize,
1009
+ targetChunkTokens: input.config.target_chunk_tokens,
1010
+ overlapTokens: input.config.chunk_overlap_tokens,
1011
+ maxChunks: input.config.max_chunks_per_file,
1012
+ lineTokenCounts
1013
+ });
1014
+ return {
1015
+ chunks: fallbackChunks,
1016
+ strategy: "sliding",
1017
+ fallback_reason: "empty_language_boundaries",
1018
+ parse_latency_ms: parseLatencyMs,
1019
+ language_aware_attempt_latency_ms: Date.now() - languageAwareAttemptStart,
1020
+ fallback_path_latency_ms: Date.now() - fallbackStart,
1021
+ language: parserLanguageToCanonical(input.parserLanguage)
1022
+ };
1023
+ }
1024
+
1025
+ const segments: Array<{ startRow: number; endRow: number; boundary: boolean }> = [];
1026
+ let cursor = 0;
1027
+ const lastRow = Math.max(0, input.lines.length - 1);
1028
+ for (const node of boundaryNodes) {
1029
+ const startRow = Math.max(0, Math.min(lastRow, node.startPosition.row));
1030
+ const endRow = Math.max(startRow, Math.min(lastRow, toInclusiveEndRow(node)));
1031
+ if (startRow > cursor) {
1032
+ segments.push({ startRow: cursor, endRow: startRow - 1, boundary: false });
1033
+ }
1034
+ segments.push({ startRow, endRow, boundary: true });
1035
+ cursor = endRow + 1;
1036
+ if (cursor > lastRow) {
1037
+ break;
1038
+ }
1039
+ }
1040
+ if (cursor <= lastRow) {
1041
+ segments.push({ startRow: cursor, endRow: lastRow, boundary: false });
1042
+ }
1043
+
1044
+ for (const segment of segments) {
1045
+ if (segment.endRow < segment.startRow || chunks.length >= input.config.max_chunks_per_file) {
1046
+ continue;
1047
+ }
1048
+ const segmentTokenCount = lineTokenCounts
1049
+ .slice(segment.startRow, segment.endRow + 1)
1050
+ .reduce((sum, value) => sum + value, 0);
1051
+ const enableSemanticBoundarySplits =
1052
+ input.config.boundary_strictness === "semantic_js_ts" &&
1053
+ (input.parserLanguage === "javascript" ||
1054
+ input.parserLanguage === "jsx" ||
1055
+ input.parserLanguage === "typescript" ||
1056
+ input.parserLanguage === "tsx") &&
1057
+ segment.boundary;
1058
+ const softMaxChunkTokens = Math.floor(input.config.target_chunk_tokens * SEMANTIC_JS_TS_SOFT_MAX_MULTIPLIER);
1059
+ const pieces =
1060
+ enableSemanticBoundarySplits && segmentTokenCount <= softMaxChunkTokens
1061
+ ? [{ startRow: segment.startRow, endRow: segment.endRow }]
1062
+ : splitRangeWithBudget({
1063
+ lines: input.lines,
1064
+ lineTokenCounts,
1065
+ startRow: segment.startRow,
1066
+ endRow: segment.endRow,
1067
+ targetChunkTokens: input.config.target_chunk_tokens,
1068
+ overlapTokens: input.config.chunk_overlap_tokens,
1069
+ maxChunks: input.config.max_chunks_per_file - chunks.length,
1070
+ preferSafeBoundarySplit: enableSemanticBoundarySplits,
1071
+ softMaxChunkTokens
1072
+ });
1073
+ for (const piece of pieces) {
1074
+ const trimmed = trimLineRange(input.lines, piece.startRow, piece.endRow);
1075
+ if (!trimmed) {
1076
+ continue;
1077
+ }
1078
+ chunks.push({
1079
+ start_line: trimmed.start + 1,
1080
+ end_line: trimmed.end + 1,
1081
+ snippet: input.lines.slice(trimmed.start, trimmed.end + 1).join("\n")
1082
+ });
1083
+ if (chunks.length >= input.config.max_chunks_per_file) {
1084
+ break;
1085
+ }
1086
+ }
1087
+ }
1088
+ }
1089
+
1090
+ if (chunks.length === 0) {
1091
+ const fallbackStart = Date.now();
1092
+ const slidingChunks = buildSlidingChunks({
1093
+ lines: input.lines,
1094
+ tokenize: input.tokenize,
1095
+ targetChunkTokens: input.config.target_chunk_tokens,
1096
+ overlapTokens: input.config.chunk_overlap_tokens,
1097
+ maxChunks: input.config.max_chunks_per_file,
1098
+ lineTokenCounts
1099
+ });
1100
+ return {
1101
+ chunks: slidingChunks,
1102
+ strategy: "sliding",
1103
+ fallback_reason: "empty_language_boundaries",
1104
+ parse_latency_ms: parseLatencyMs,
1105
+ language_aware_attempt_latency_ms: Date.now() - languageAwareAttemptStart,
1106
+ fallback_path_latency_ms: Date.now() - fallbackStart,
1107
+ language: parserLanguageToCanonical(input.parserLanguage)
1108
+ };
1109
+ }
1110
+
1111
+ return {
1112
+ chunks,
1113
+ strategy: "language_aware",
1114
+ parse_latency_ms: parseLatencyMs,
1115
+ language_aware_attempt_latency_ms: Date.now() - languageAwareAttemptStart,
1116
+ language: parserLanguageToCanonical(input.parserLanguage),
1117
+ recursive_semantic_chunking_used: recursiveSemanticChunkingUsed
1118
+ };
1119
+ } catch {
1120
+ const fallbackStart = Date.now();
1121
+ const chunks = buildSlidingChunks({
1122
+ lines: input.lines,
1123
+ tokenize: input.tokenize,
1124
+ targetChunkTokens: input.config.target_chunk_tokens,
1125
+ overlapTokens: input.config.chunk_overlap_tokens,
1126
+ maxChunks: input.config.max_chunks_per_file,
1127
+ lineTokenCounts
1128
+ });
1129
+ return {
1130
+ chunks,
1131
+ strategy: "sliding",
1132
+ fallback_reason: "parse_error",
1133
+ language_aware_attempt_latency_ms: Date.now() - languageAwareAttemptStart,
1134
+ fallback_path_latency_ms: Date.now() - fallbackStart,
1135
+ language: parserLanguageToCanonical(input.parserLanguage)
1136
+ };
1137
+ }
1138
+ }
1139
+
1140
+ export function buildChunksForFile(input: {
1141
+ file: ChunkingRawFile;
1142
+ config: ChunkingConfig;
1143
+ tokenize: (text: string) => string[];
1144
+ }): ChunkingResult {
1145
+ const lines = input.file.content.split("\n");
1146
+ const language = resolveParserLanguage(input.file);
1147
+ const enabledLanguageSet = new Set(input.config.enabled_languages.map((value) => value.trim().toLowerCase()));
1148
+
1149
+ if (input.config.strategy === "sliding") {
1150
+ return {
1151
+ chunks: buildSlidingChunks({
1152
+ lines,
1153
+ tokenize: input.tokenize,
1154
+ targetChunkTokens: input.config.target_chunk_tokens,
1155
+ overlapTokens: input.config.chunk_overlap_tokens,
1156
+ maxChunks: input.config.max_chunks_per_file
1157
+ }),
1158
+ strategy: "sliding",
1159
+ language: language ? parserLanguageToCanonical(language) : undefined
1160
+ };
1161
+ }
1162
+
1163
+ if (!language || !enabledLanguageSet.has(parserLanguageToCanonical(language))) {
1164
+ const fallbackStart = Date.now();
1165
+ const chunks = buildSlidingChunks({
1166
+ lines,
1167
+ tokenize: input.tokenize,
1168
+ targetChunkTokens: input.config.target_chunk_tokens,
1169
+ overlapTokens: input.config.chunk_overlap_tokens,
1170
+ maxChunks: input.config.max_chunks_per_file
1171
+ });
1172
+ return {
1173
+ chunks,
1174
+ strategy: "sliding",
1175
+ fallback_reason: "unsupported_language",
1176
+ fallback_path_latency_ms: Date.now() - fallbackStart
1177
+ };
1178
+ }
1179
+
1180
+ return buildLanguageAwareChunks({
1181
+ file: input.file,
1182
+ lines,
1183
+ parserLanguage: language,
1184
+ config: input.config,
1185
+ tokenize: input.tokenize
1186
+ });
1187
+ }