@copilotkit/pathfinder 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. package/.env.example +20 -0
  2. package/.superpowers/brainstorm/47098-1775507869/content/homepage-mockup.html +324 -0
  3. package/.superpowers/brainstorm/47098-1775507869/state/server-stopped +1 -0
  4. package/.superpowers/brainstorm/47098-1775507869/state/server.log +13 -0
  5. package/.superpowers/brainstorm/47098-1775507869/state/server.pid +1 -0
  6. package/.superpowers/brainstorm/82141-1775511032/content/migration-v2.html +340 -0
  7. package/.superpowers/brainstorm/82141-1775511032/content/migration.html +340 -0
  8. package/.superpowers/brainstorm/82141-1775511032/state/server-stopped +1 -0
  9. package/.superpowers/brainstorm/82141-1775511032/state/server.log +4 -0
  10. package/.superpowers/brainstorm/82141-1775511032/state/server.pid +1 -0
  11. package/CHANGELOG.md +26 -0
  12. package/LICENSE +21 -0
  13. package/README.md +284 -0
  14. package/dist/config.d.ts +32 -0
  15. package/dist/config.d.ts.map +1 -0
  16. package/dist/config.js +180 -0
  17. package/dist/config.js.map +1 -0
  18. package/dist/db/client.d.ts +22 -0
  19. package/dist/db/client.d.ts.map +1 -0
  20. package/dist/db/client.js +134 -0
  21. package/dist/db/client.js.map +1 -0
  22. package/dist/db/queries.d.ts +51 -0
  23. package/dist/db/queries.d.ts.map +1 -0
  24. package/dist/db/queries.js +271 -0
  25. package/dist/db/queries.js.map +1 -0
  26. package/dist/db/schema.d.ts +11 -0
  27. package/dist/db/schema.d.ts.map +1 -0
  28. package/dist/db/schema.js +63 -0
  29. package/dist/db/schema.js.map +1 -0
  30. package/dist/index.d.ts +2 -0
  31. package/dist/index.d.ts.map +1 -0
  32. package/dist/index.js +366 -0
  33. package/dist/index.js.map +1 -0
  34. package/dist/indexing/chunking/code.d.ts +17 -0
  35. package/dist/indexing/chunking/code.d.ts.map +1 -0
  36. package/dist/indexing/chunking/code.js +277 -0
  37. package/dist/indexing/chunking/code.js.map +1 -0
  38. package/dist/indexing/chunking/index.d.ts +6 -0
  39. package/dist/indexing/chunking/index.d.ts.map +1 -0
  40. package/dist/indexing/chunking/index.js +19 -0
  41. package/dist/indexing/chunking/index.js.map +1 -0
  42. package/dist/indexing/chunking/markdown.d.ts +16 -0
  43. package/dist/indexing/chunking/markdown.d.ts.map +1 -0
  44. package/dist/indexing/chunking/markdown.js +283 -0
  45. package/dist/indexing/chunking/markdown.js.map +1 -0
  46. package/dist/indexing/chunking/raw-text.d.ts +11 -0
  47. package/dist/indexing/chunking/raw-text.d.ts.map +1 -0
  48. package/dist/indexing/chunking/raw-text.js +59 -0
  49. package/dist/indexing/chunking/raw-text.js.map +1 -0
  50. package/dist/indexing/embeddings.d.ts +10 -0
  51. package/dist/indexing/embeddings.d.ts.map +1 -0
  52. package/dist/indexing/embeddings.js +78 -0
  53. package/dist/indexing/embeddings.js.map +1 -0
  54. package/dist/indexing/orchestrator.d.ts +69 -0
  55. package/dist/indexing/orchestrator.d.ts.map +1 -0
  56. package/dist/indexing/orchestrator.js +387 -0
  57. package/dist/indexing/orchestrator.js.map +1 -0
  58. package/dist/indexing/source-indexer.d.ts +68 -0
  59. package/dist/indexing/source-indexer.d.ts.map +1 -0
  60. package/dist/indexing/source-indexer.js +379 -0
  61. package/dist/indexing/source-indexer.js.map +1 -0
  62. package/dist/indexing/url-derivation.d.ts +7 -0
  63. package/dist/indexing/url-derivation.d.ts.map +1 -0
  64. package/dist/indexing/url-derivation.js +31 -0
  65. package/dist/indexing/url-derivation.js.map +1 -0
  66. package/dist/mcp/server.d.ts +10 -0
  67. package/dist/mcp/server.d.ts.map +1 -0
  68. package/dist/mcp/server.js +67 -0
  69. package/dist/mcp/server.js.map +1 -0
  70. package/dist/mcp/tools/bash-fs.d.ts +19 -0
  71. package/dist/mcp/tools/bash-fs.d.ts.map +1 -0
  72. package/dist/mcp/tools/bash-fs.js +134 -0
  73. package/dist/mcp/tools/bash-fs.js.map +1 -0
  74. package/dist/mcp/tools/bash-grep.d.ts +29 -0
  75. package/dist/mcp/tools/bash-grep.d.ts.map +1 -0
  76. package/dist/mcp/tools/bash-grep.js +153 -0
  77. package/dist/mcp/tools/bash-grep.js.map +1 -0
  78. package/dist/mcp/tools/bash-related.d.ts +14 -0
  79. package/dist/mcp/tools/bash-related.d.ts.map +1 -0
  80. package/dist/mcp/tools/bash-related.js +54 -0
  81. package/dist/mcp/tools/bash-related.js.map +1 -0
  82. package/dist/mcp/tools/bash-session.d.ts +23 -0
  83. package/dist/mcp/tools/bash-session.d.ts.map +1 -0
  84. package/dist/mcp/tools/bash-session.js +60 -0
  85. package/dist/mcp/tools/bash-session.js.map +1 -0
  86. package/dist/mcp/tools/bash-telemetry.d.ts +26 -0
  87. package/dist/mcp/tools/bash-telemetry.d.ts.map +1 -0
  88. package/dist/mcp/tools/bash-telemetry.js +53 -0
  89. package/dist/mcp/tools/bash-telemetry.js.map +1 -0
  90. package/dist/mcp/tools/bash-virtual-files.d.ts +3 -0
  91. package/dist/mcp/tools/bash-virtual-files.d.ts.map +1 -0
  92. package/dist/mcp/tools/bash-virtual-files.js +65 -0
  93. package/dist/mcp/tools/bash-virtual-files.js.map +1 -0
  94. package/dist/mcp/tools/bash.d.ts +25 -0
  95. package/dist/mcp/tools/bash.d.ts.map +1 -0
  96. package/dist/mcp/tools/bash.js +140 -0
  97. package/dist/mcp/tools/bash.js.map +1 -0
  98. package/dist/mcp/tools/collect.d.ts +13 -0
  99. package/dist/mcp/tools/collect.d.ts.map +1 -0
  100. package/dist/mcp/tools/collect.js +56 -0
  101. package/dist/mcp/tools/collect.js.map +1 -0
  102. package/dist/mcp/tools/search.d.ts +5 -0
  103. package/dist/mcp/tools/search.d.ts.map +1 -0
  104. package/dist/mcp/tools/search.js +68 -0
  105. package/dist/mcp/tools/search.js.map +1 -0
  106. package/dist/types.d.ts +1237 -0
  107. package/dist/types.d.ts.map +1 -0
  108. package/dist/types.js +163 -0
  109. package/dist/types.js.map +1 -0
  110. package/dist/webhooks/github.d.ts +12 -0
  111. package/dist/webhooks/github.d.ts.map +1 -0
  112. package/dist/webhooks/github.js +117 -0
  113. package/dist/webhooks/github.js.map +1 -0
  114. package/package.json +48 -0
@@ -0,0 +1,277 @@
1
+ // Line-based code splitter
2
+ const DEFAULT_TARGET_LINES = 80;
3
+ const DEFAULT_OVERLAP_LINES = 10;
4
+ /**
5
+ * Map file extension to language name.
6
+ */
7
+ function detectLanguage(filePath) {
8
+ const ext = filePath.split('.').pop()?.toLowerCase() || '';
9
+ const languageMap = {
10
+ ts: 'typescript',
11
+ tsx: 'typescript',
12
+ js: 'javascript',
13
+ jsx: 'javascript',
14
+ mjs: 'javascript',
15
+ cjs: 'javascript',
16
+ py: 'python',
17
+ rb: 'ruby',
18
+ go: 'go',
19
+ rs: 'rust',
20
+ java: 'java',
21
+ kt: 'kotlin',
22
+ swift: 'swift',
23
+ c: 'c',
24
+ cpp: 'cpp',
25
+ h: 'c',
26
+ hpp: 'cpp',
27
+ cs: 'csharp',
28
+ md: 'markdown',
29
+ mdx: 'markdown',
30
+ json: 'json',
31
+ yaml: 'yaml',
32
+ yml: 'yaml',
33
+ toml: 'toml',
34
+ sql: 'sql',
35
+ sh: 'shell',
36
+ bash: 'shell',
37
+ zsh: 'shell',
38
+ css: 'css',
39
+ scss: 'scss',
40
+ html: 'html',
41
+ xml: 'xml',
42
+ };
43
+ return languageMap[ext] || ext || 'text';
44
+ }
45
+ /**
46
+ * Check whether the character at `pos` is escaped by counting preceding
47
+ * backslashes. An odd number means the character is escaped.
48
+ */
49
+ function isEscaped(line, pos) {
50
+ let backslashes = 0;
51
+ for (let j = pos - 1; j >= 0 && line[j] === '\\'; j--) {
52
+ backslashes++;
53
+ }
54
+ return backslashes % 2 === 1;
55
+ }
56
+ /**
57
+ * Strip string literals and single-line comments from a line so that
58
+ * block-comment and template-string detection only fires on real syntax.
59
+ */
60
+ function stripStringsAndLineComments(line) {
61
+ let result = '';
62
+ let inSingle = false;
63
+ let inDouble = false;
64
+ for (let i = 0; i < line.length; i++) {
65
+ const ch = line[i];
66
+ if (!inSingle && !inDouble && ch === '/' && line[i + 1] === '/') {
67
+ break; // rest of line is a single-line comment
68
+ }
69
+ if (!inDouble && ch === "'" && !isEscaped(line, i)) {
70
+ inSingle = !inSingle;
71
+ }
72
+ else if (!inSingle && ch === '"' && !isEscaped(line, i)) {
73
+ inDouble = !inDouble;
74
+ }
75
+ if (!inSingle && !inDouble) {
76
+ result += ch;
77
+ }
78
+ }
79
+ return result;
80
+ }
81
+ function trackBlockState(line, state) {
82
+ const newState = { ...state };
83
+ const stripped = stripStringsAndLineComments(line);
84
+ if (newState.inBlockComment) {
85
+ if (stripped.includes('*/')) {
86
+ newState.inBlockComment = false;
87
+ }
88
+ return newState;
89
+ }
90
+ if (newState.inTemplateString) {
91
+ // Count unescaped backticks
92
+ const backticks = (stripped.match(/(?<!\\)`/g) || []).length;
93
+ if (backticks % 2 === 1) {
94
+ newState.inTemplateString = false;
95
+ }
96
+ return newState;
97
+ }
98
+ // Check for block comment start (not on same line as end)
99
+ if (stripped.includes('/*') && !stripped.includes('*/')) {
100
+ newState.inBlockComment = true;
101
+ }
102
+ else if (!newState.inBlockComment) {
103
+ // Only check template strings if we didn't just enter a block comment
104
+ const backticks = (stripped.match(/(?<!\\)`/g) || []).length;
105
+ if (backticks % 2 === 1) {
106
+ newState.inTemplateString = true;
107
+ }
108
+ }
109
+ // Python triple-quote strings — run on original line since stripping is JS-oriented
110
+ if (!newState.inBlockComment && !newState.inTemplateString) {
111
+ if (line.includes('"""') || line.includes("'''")) {
112
+ const tripleDouble = (line.match(/"""/g) || []).length;
113
+ const tripleSingle = (line.match(/'''/g) || []).length;
114
+ if ((tripleDouble % 2 === 1) || (tripleSingle % 2 === 1)) {
115
+ newState.inBlockComment = true; // reuse flag for python docstrings
116
+ }
117
+ }
118
+ }
119
+ return newState;
120
+ }
121
+ /**
122
+ * Determine safe split points: lines where we're not inside a block comment
123
+ * or string literal, and that represent logical boundaries.
124
+ */
125
+ function findSplitPoints(lines) {
126
+ const safePoints = new Set();
127
+ let state = { inBlockComment: false, inTemplateString: false };
128
+ for (let i = 0; i < lines.length; i++) {
129
+ const prevState = { ...state };
130
+ state = trackBlockState(lines[i], state);
131
+ // A double-newline boundary is a safe split point
132
+ if (i > 0 && lines[i].trim() === '' && lines[i - 1].trim() === '') {
133
+ if (!state.inBlockComment && !state.inTemplateString &&
134
+ !prevState.inBlockComment && !prevState.inTemplateString) {
135
+ safePoints.add(i);
136
+ }
137
+ }
138
+ // A single blank line is a secondary split point
139
+ if (lines[i].trim() === '' && !state.inBlockComment && !state.inTemplateString) {
140
+ safePoints.add(i);
141
+ }
142
+ }
143
+ return safePoints;
144
+ }
145
+ /**
146
+ * Format a range of lines with line numbers and a file breadcrumb.
147
+ */
148
+ function formatChunk(lines, startLine, filePath) {
149
+ const breadcrumb = `// File: ${filePath}`;
150
+ const maxLineNum = startLine + lines.length - 1;
151
+ const padWidth = String(maxLineNum).length;
152
+ const numbered = lines.map((line, i) => {
153
+ const lineNum = String(startLine + i).padStart(padWidth, ' ');
154
+ return `${lineNum} | ${line}`;
155
+ });
156
+ return breadcrumb + '\n' + numbered.join('\n');
157
+ }
158
+ /**
159
+ * Split lines into groups at double-newline boundaries, respecting block state.
160
+ */
161
+ function splitAtBoundaries(lines, targetLines) {
162
+ if (lines.length <= targetLines) {
163
+ return [{ start: 0, end: lines.length - 1 }];
164
+ }
165
+ const safePoints = findSplitPoints(lines);
166
+ const ranges = [];
167
+ let rangeStart = 0;
168
+ // Prefer double-newline boundaries first
169
+ const doubleNewlines = [];
170
+ for (let i = 1; i < lines.length; i++) {
171
+ if (lines[i].trim() === '' && lines[i - 1].trim() === '' && safePoints.has(i)) {
172
+ doubleNewlines.push(i);
173
+ }
174
+ }
175
+ // Try splitting on double-newline boundaries
176
+ if (doubleNewlines.length > 0) {
177
+ const splitPoints = selectSplitPoints(doubleNewlines, lines.length, targetLines);
178
+ for (const point of splitPoints) {
179
+ if (point > rangeStart) {
180
+ ranges.push({ start: rangeStart, end: point - 1 });
181
+ rangeStart = point;
182
+ }
183
+ }
184
+ ranges.push({ start: rangeStart, end: lines.length - 1 });
185
+ // Check if any range is still too large
186
+ const needsRefinement = ranges.some(r => (r.end - r.start + 1) > targetLines * 1.5);
187
+ if (!needsRefinement)
188
+ return ranges;
189
+ }
190
+ // Fall back to single blank line boundaries
191
+ const blankLines = Array.from(safePoints).sort((a, b) => a - b);
192
+ if (blankLines.length > 0) {
193
+ const refinedRanges = [];
194
+ rangeStart = 0;
195
+ const splitPoints = selectSplitPoints(blankLines, lines.length, targetLines);
196
+ for (const point of splitPoints) {
197
+ if (point > rangeStart) {
198
+ refinedRanges.push({ start: rangeStart, end: point - 1 });
199
+ rangeStart = point;
200
+ }
201
+ }
202
+ refinedRanges.push({ start: rangeStart, end: lines.length - 1 });
203
+ return refinedRanges;
204
+ }
205
+ // No good split points; split mechanically on line boundaries
206
+ const mechanicalRanges = [];
207
+ for (let i = 0; i < lines.length; i += targetLines) {
208
+ mechanicalRanges.push({
209
+ start: i,
210
+ end: Math.min(i + targetLines - 1, lines.length - 1),
211
+ });
212
+ }
213
+ return mechanicalRanges;
214
+ }
215
+ /**
216
+ * Select split points from candidates that best partition the content
217
+ * into chunks near the target size.
218
+ */
219
+ function selectSplitPoints(candidates, _totalLines, targetLines) {
220
+ const selected = [];
221
+ let lastSplit = 0;
222
+ for (const candidate of candidates) {
223
+ const distance = candidate - lastSplit;
224
+ if (distance >= targetLines) {
225
+ selected.push(candidate);
226
+ lastSplit = candidate;
227
+ }
228
+ }
229
+ return selected;
230
+ }
231
+ /**
232
+ * Split code content into embedding-friendly chunks with line numbers.
233
+ *
234
+ * @param content - The full source file content
235
+ * @param filePath - Path to the source file
236
+ * @returns Array of CodeChunk objects
237
+ */
238
+ export function chunkCode(content, filePath, config) {
239
+ if (!content || !content.trim()) {
240
+ return [];
241
+ }
242
+ const targetLines = config.chunk?.target_lines ?? DEFAULT_TARGET_LINES;
243
+ const overlapLines = config.chunk?.overlap_lines ?? DEFAULT_OVERLAP_LINES;
244
+ const language = detectLanguage(filePath);
245
+ const lines = content.split('\n');
246
+ // Remove trailing empty line if file ends with newline
247
+ if (lines.length > 0 && lines[lines.length - 1] === '') {
248
+ lines.pop();
249
+ }
250
+ if (lines.length === 0) {
251
+ return [];
252
+ }
253
+ // Split into ranges
254
+ const ranges = splitAtBoundaries(lines, targetLines);
255
+ // Apply overlap and build chunks
256
+ const chunks = [];
257
+ for (let i = 0; i < ranges.length; i++) {
258
+ let { start, end } = ranges[i];
259
+ // Apply overlap from previous chunk
260
+ if (i > 0 && overlapLines > 0) {
261
+ const overlapStart = Math.max(ranges[i - 1].end - overlapLines + 1, ranges[i - 1].start);
262
+ start = Math.min(start, overlapStart);
263
+ }
264
+ const chunkLines = lines.slice(start, end + 1);
265
+ const startLine = start + 1; // 1-indexed
266
+ const endLine = end + 1; // 1-indexed
267
+ chunks.push({
268
+ content: formatChunk(chunkLines, startLine, filePath),
269
+ startLine,
270
+ endLine,
271
+ language,
272
+ chunkIndex: chunks.length,
273
+ });
274
+ }
275
+ return chunks;
276
+ }
277
+ //# sourceMappingURL=code.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"code.js","sourceRoot":"","sources":["../../../src/indexing/chunking/code.ts"],"names":[],"mappings":"AAAA,2BAA2B;AAY3B,MAAM,oBAAoB,GAAG,EAAE,CAAC;AAChC,MAAM,qBAAqB,GAAG,EAAE,CAAC;AAEjC;;GAEG;AACH,SAAS,cAAc,CAAC,QAAgB;IACpC,MAAM,GAAG,GAAG,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;IAC3D,MAAM,WAAW,GAA2B;QACxC,EAAE,EAAE,YAAY;QAChB,GAAG,EAAE,YAAY;QACjB,EAAE,EAAE,YAAY;QAChB,GAAG,EAAE,YAAY;QACjB,GAAG,EAAE,YAAY;QACjB,GAAG,EAAE,YAAY;QACjB,EAAE,EAAE,QAAQ;QACZ,EAAE,EAAE,MAAM;QACV,EAAE,EAAE,IAAI;QACR,EAAE,EAAE,MAAM;QACV,IAAI,EAAE,MAAM;QACZ,EAAE,EAAE,QAAQ;QACZ,KAAK,EAAE,OAAO;QACd,CAAC,EAAE,GAAG;QACN,GAAG,EAAE,KAAK;QACV,CAAC,EAAE,GAAG;QACN,GAAG,EAAE,KAAK;QACV,EAAE,EAAE,QAAQ;QACZ,EAAE,EAAE,UAAU;QACd,GAAG,EAAE,UAAU;QACf,IAAI,EAAE,MAAM;QACZ,IAAI,EAAE,MAAM;QACZ,GAAG,EAAE,MAAM;QACX,IAAI,EAAE,MAAM;QACZ,GAAG,EAAE,KAAK;QACV,EAAE,EAAE,OAAO;QACX,IAAI,EAAE,OAAO;QACb,GAAG,EAAE,OAAO;QACZ,GAAG,EAAE,KAAK;QACV,IAAI,EAAE,MAAM;QACZ,IAAI,EAAE,MAAM;QACZ,GAAG,EAAE,KAAK;KACb,CAAC;IAEF,OAAO,WAAW,CAAC,GAAG,CAAC,IAAI,GAAG,IAAI,MAAM,CAAC;AAC7C,CAAC;AAWD;;;GAGG;AACH,SAAS,SAAS,CAAC,IAAY,EAAE,GAAW;IACxC,IAAI,WAAW,GAAG,CAAC,CAAC;IACpB,KAAK,IAAI,CAAC,GAAG,GAAG,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC,EAAE,EAAE,CAAC;QACpD,WAAW,EAAE,CAAC;IAClB,CAAC;IACD,OAAO,WAAW,GAAG,CAAC,KAAK,CAAC,CAAC;AACjC,CAAC;AAED;;;GAGG;AACH,SAAS,2BAA2B,CAAC,IAAY;IAC7C,IAAI,MAAM,GAAG,EAAE,CAAC;IAChB,IAAI,QAAQ,GAAG,KAAK,CAAC;IACrB,IAAI,QAAQ,GAAG,KAAK,CAAC;IAErB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACnC,MAAM,EAAE,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;QAEnB,IAAI,CAAC,QAAQ,IAAI,CAAC,QAAQ,IAAI,EAAE,KAAK,GAAG,IAAI,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,GAAG,EAAE,CAAC;YAC9D,MAAM,CAAC,wCAAwC;QACnD,CAAC;QAED,IAAI,CAAC,QAAQ,IAAI,EAAE,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC;YACjD,QAAQ,GAAG,CAAC,QAAQ,CAAC;QACzB,CAAC;aAAM,IAAI,CAAC,QAAQ,IAAI,EAAE,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC;YACxD,QAAQ,GAAG,CAAC,QAAQ,CAAC;QACzB,CAAC;QAED,IAAI,CAAC,QAAQ,IAAI,CAAC,QAAQ,EAAE,CAAC;YACzB,MAAM,IAAI,EAAE,CAAC;QACjB,CAAC;IACL,CAAC;IAED,OAAO,MAAM,CAAC;AAClB,CAAC;AAED,SAAS,eAAe,CAAC,IAAY,EAAE,KAAiB;IACpD,MAAM,QAAQ,GAAG,EAAE,GAAG,KAAK,EAAE,CAAC;IAC9B,MAAM,QAAQ,GAAG,2BAA2B,CAAC,IAAI,CAAC,CAAC;IAEnD,IAAI,QAAQ,CAAC,cAAc,EAAE,CAAC;QAC1B,IAAI,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;YAC1B,QAAQ,CAAC,cAAc,GAAG,KAAK,CAAC;QACpC,CAAC;QACD,OAAO,QAAQ,CAAC;IACpB,CAAC;IAED,IAAI,QAAQ,CAAC,gBAAgB,EAAE,CAAC;QAC5B,4BAA4B;QAC5B,MAAM,SAAS,GAAG,CAAC,QAAQ,CAAC,KAAK,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC;QAC7D,IAAI,SAAS,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;YACtB,QAAQ,CAAC,gBAAgB,GAAG,KAAK,CAAC;QACtC,CAAC;QACD,OAAO,QAAQ,CAAC;IACpB,CAAC;IAED,0DAA0D;IAC1D,IAAI,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;QACtD,QAAQ,CAAC,cAAc,GAAG,IAAI,CAAC;IACnC,CAAC;SAAM,IAAI,CAAC,QAAQ,CAAC,cAAc,EAAE,CAAC;QAClC,sEAAsE;QACtE,MAAM,SAAS,GAAG,CAAC,QAAQ,CAAC,KAAK,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC;QAC7D,IAAI,SAAS,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;YACtB,QAAQ,CAAC,gBAAgB,GAAG,IAAI,CAAC;QACrC,CAAC;IACL,CAAC;IAED,oFAAoF;IACpF,IAAI,CAAC,QAAQ,CAAC,cAAc,IAAI,CAAC,QAAQ,CAAC,gBAAgB,EAAE,CAAC;QACzD,IAAI,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;YAC/C,MAAM,YAAY,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC;YACvD,MAAM,YAAY,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC;YACvD,IAAI,CAAC,YAAY,GAAG,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,YAAY,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC;gBACvD,QAAQ,CAAC,cAAc,GAAG,IAAI,CAAC,CAAC,mCAAmC;YACvE,CAAC;QACL,CAAC;IACL,CAAC;IAED,OAAO,QAAQ,CAAC;AACpB,CAAC;AAED;;;GAGG;AACH,SAAS,eAAe,CAAC,KAAe;IACpC,MAAM,UAAU,GAAG,IAAI,GAAG,EAAU,CAAC;IACrC,IAAI,KAAK,GAAe,EAAE,cAAc,EAAE,KAAK,EAAE,gBAAgB,EAAE,KAAK,EAAE,CAAC;IAE3E,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACpC,MAAM,SAAS,GAAG,EAAE,GAAG,KAAK,EAAE,CAAC;QAC/B,KAAK,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC;QAEzC,kDAAkD;QAClD,IAAI,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,IAAI,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC;YAChE,IAAI,CAAC,KAAK,CAAC,cAAc,IAAI,CAAC,KAAK,CAAC,gBAAgB;gBAChD,CAAC,SAAS,CAAC,cAAc,IAAI,CAAC,SAAS,CAAC,gBAAgB,EAAE,CAAC;gBAC3D,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;YACtB,CAAC;QACL,CAAC;QAED,iDAAiD;QACjD,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,cAAc,IAAI,CAAC,KAAK,CAAC,gBAAgB,EAAE,CAAC;YAC7E,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;QACtB,CAAC;IACL,CAAC;IAED,OAAO,UAAU,CAAC;AACtB,CAAC;AAED;;GAEG;AACH,SAAS,WAAW,CAAC,KAAe,EAAE,SAAiB,EAAE,QAAgB;IACrE,MAAM,UAAU,GAAG,YAAY,QAAQ,EAAE,CAAC;IAC1C,MAAM,UAAU,GAAG,SAAS,GAAG,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC;IAChD,MAAM,QAAQ,GAAG,MAAM,CAAC,UAAU,CAAC,CAAC,MAAM,CAAC;IAE3C,MAAM,QAAQ,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE;QACnC,MAAM,OAAO,GAAG,MAAM,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;QAC9D,OAAO,GAAG,OAAO,MAAM,IAAI,EAAE,CAAC;IAClC,CAAC,CAAC,CAAC;IAEH,OAAO,UAAU,GAAG,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACnD,CAAC;AAED;;GAEG;AACH,SAAS,iBAAiB,CAAC,KAAe,EAAE,WAAmB;IAC3D,IAAI,KAAK,CAAC,MAAM,IAAI,WAAW,EAAE,CAAC;QAC9B,OAAO,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,GAAG,EAAE,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,CAAC;IACjD,CAAC;IAED,MAAM,UAAU,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;IAC1C,MAAM,MAAM,GAA0C,EAAE,CAAC;IACzD,IAAI,UAAU,GAAG,CAAC,CAAC;IAEnB,yCAAyC;IACzC,MAAM,cAAc,GAAa,EAAE,CAAC;IACpC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACpC,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,IAAI,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,IAAI,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;YAC5E,cAAc,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC3B,CAAC;IACL,CAAC;IAED,6CAA6C;IAC7C,IAAI,cAAc,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC5B,MAAM,WAAW,GAAG,iBAAiB,CAAC,cAAc,EAAE,KAAK,CAAC,MAAM,EAAE,WAAW,CAAC,CAAC;QACjF,KAAK,MAAM,KAAK,IAAI,WAAW,EAAE,CAAC;YAC9B,IAAI,KAAK,GAAG,UAAU,EAAE,CAAC;gBACrB,MAAM,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,UAAU,EAAE,GAAG,EAAE,KAAK,GAAG,CAAC,EAAE,CAAC,CAAC;gBACnD,UAAU,GAAG,KAAK,CAAC;YACvB,CAAC;QACL,CAAC;QACD,MAAM,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,UAAU,EAAE,GAAG,EAAE,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,CAAC;QAE1D,wCAAwC;QACxC,MAAM,eAAe,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,GAAG,WAAW,GAAG,GAAG,CAAC,CAAC;QACpF,IAAI,CAAC,eAAe;YAAE,OAAO,MAAM,CAAC;IACxC,CAAC;IAED,4CAA4C;IAC5C,MAAM,UAAU,GAAG,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAChE,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACxB,MAAM,aAAa,GAA0C,EAAE,CAAC;QAChE,UAAU,GAAG,CAAC,CAAC;QAEf,MAAM,WAAW,GAAG,iBAAiB,CAAC,UAAU,EAAE,KAAK,CAAC,MAAM,EAAE,WAAW,CAAC,CAAC;QAC7E,KAAK,MAAM,KAAK,IAAI,WAAW,EAAE,CAAC;YAC9B,IAAI,KAAK,GAAG,UAAU,EAAE,CAAC;gBACrB,aAAa,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,UAAU,EAAE,GAAG,EAAE,KAAK,GAAG,CAAC,EAAE,CAAC,CAAC;gBAC1D,UAAU,GAAG,KAAK,CAAC;YACvB,CAAC;QACL,CAAC;QACD,aAAa,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,UAAU,EAAE,GAAG,EAAE,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,CAAC;QACjE,OAAO,aAAa,CAAC;IACzB,CAAC;IAED,8DAA8D;IAC9D,MAAM,gBAAgB,GAA0C,EAAE,CAAC;IACnE,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,WAAW,EAAE,CAAC;QACjD,gBAAgB,CAAC,IAAI,CAAC;YAClB,KAAK,EAAE,CAAC;YACR,GAAG,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,WAAW,GAAG,CAAC,EAAE,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC;SACvD,CAAC,CAAC;IACP,CAAC;IACD,OAAO,gBAAgB,CAAC;AAC5B,CAAC;AAED;;;GAGG;AACH,SAAS,iBAAiB,CAAC,UAAoB,EAAE,WAAmB,EAAE,WAAmB;IACrF,MAAM,QAAQ,GAAa,EAAE,CAAC;IAC9B,IAAI,SAAS,GAAG,CAAC,CAAC;IAElB,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;QACjC,MAAM,QAAQ,GAAG,SAAS,GAAG,SAAS,CAAC;QACvC,IAAI,QAAQ,IAAI,WAAW,EAAE,CAAC;YAC1B,QAAQ,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YACzB,SAAS,GAAG,SAAS,CAAC;QAC1B,CAAC;IACL,CAAC;IAED,OAAO,QAAQ,CAAC;AACpB,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,SAAS,CAAC,OAAe,EAAE,QAAgB,EAAE,MAAoB;IAC7E,IAAI,CAAC,OAAO,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC;QAC9B,OAAO,EAAE,CAAC;IACd,CAAC;IAED,MAAM,WAAW,GAAG,MAAM,CAAC,KAAK,EAAE,YAAY,IAAI,oBAAoB,CAAC;IACvE,MAAM,YAAY,GAAG,MAAM,CAAC,KAAK,EAAE,aAAa,IAAI,qBAAqB,CAAC;IAE1E,MAAM,QAAQ,GAAG,cAAc,CAAC,QAAQ,CAAC,CAAC;IAC1C,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAElC,uDAAuD;IACvD,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,IAAI,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC;QACrD,KAAK,CAAC,GAAG,EAAE,CAAC;IAChB,CAAC;IAED,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACrB,OAAO,EAAE,CAAC;IACd,CAAC;IAED,oBAAoB;IACpB,MAAM,MAAM,GAAG,iBAAiB,CAAC,KAAK,EAAE,WAAW,CAAC,CAAC;IAErD,iCAAiC;IACjC,MAAM,MAAM,GAAkB,EAAE,CAAC;IAEjC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,IAAI,EAAE,KAAK,EAAE,GAAG,EAAE,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;QAE/B,oCAAoC;QACpC,IAAI,CAAC,GAAG,CAAC,IAAI,YAAY,GAAG,CAAC,EAAE,CAAC;YAC5B,MAAM,YAAY,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,GAAG,YAAY,GAAG,CAAC,EAAE,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;YACzF,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,YAAY,CAAC,CAAC;QAC1C,CAAC;QAED,MAAM,UAAU,GAAG,KAAK,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,GAAG,CAAC,CAAC,CAAC;QAC/C,MAAM,SAAS,GAAG,KAAK,GAAG,CAAC,CAAC,CAAC,YAAY;QACzC,MAAM,OAAO,GAAG,GAAG,GAAG,CAAC,CAAC,CAAK,YAAY;QAEzC,MAAM,CAAC,IAAI,CAAC;YACR,OAAO,EAAE,WAAW,CAAC,UAAU,EAAE,SAAS,EAAE,QAAQ,CAAC;YACrD,SAAS;YACT,OAAO;YACP,QAAQ;YACR,UAAU,EAAE,MAAM,CAAC,MAAM;SAC5B,CAAC,CAAC;IACP,CAAC;IAED,OAAO,MAAM,CAAC;AAClB,CAAC"}
@@ -0,0 +1,6 @@
1
+ import { type ChunkOutput, type SourceConfig } from '../../types.js';
2
+ type ChunkerFn = (content: string, filePath: string, config: SourceConfig) => ChunkOutput[];
3
+ export declare function registerChunker(type: string, fn: ChunkerFn): void;
4
+ export declare function getChunker(type: string): ChunkerFn;
5
+ export {};
6
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/indexing/chunking/index.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,KAAK,WAAW,EAAE,KAAK,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAErE,KAAK,SAAS,GAAG,CAAC,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,MAAM,EAAE,YAAY,KAAK,WAAW,EAAE,CAAC;AAI5F,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,EAAE,SAAS,GAAG,IAAI,CAEjE;AAED,wBAAgB,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,SAAS,CAIlD"}
@@ -0,0 +1,19 @@
1
+ // Chunker registry — maps source.type to a chunking function.
2
+ const registry = new Map();
3
+ export function registerChunker(type, fn) {
4
+ registry.set(type, fn);
5
+ }
6
+ export function getChunker(type) {
7
+ const fn = registry.get(type);
8
+ if (!fn)
9
+ throw new Error(`Unknown chunker type: "${type}". Available: ${[...registry.keys()].join(', ')}`);
10
+ return fn;
11
+ }
12
+ // Register built-ins on import
13
+ import { chunkMarkdown } from './markdown.js';
14
+ import { chunkCode } from './code.js';
15
+ import { chunkRawText } from './raw-text.js';
16
+ registerChunker('markdown', chunkMarkdown);
17
+ registerChunker('code', chunkCode);
18
+ registerChunker('raw-text', chunkRawText);
19
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/indexing/chunking/index.ts"],"names":[],"mappings":"AAAA,8DAA8D;AAM9D,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAqB,CAAC;AAE9C,MAAM,UAAU,eAAe,CAAC,IAAY,EAAE,EAAa;IACvD,QAAQ,CAAC,GAAG,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;AAC3B,CAAC;AAED,MAAM,UAAU,UAAU,CAAC,IAAY;IACnC,MAAM,EAAE,GAAG,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAC9B,IAAI,CAAC,EAAE;QAAE,MAAM,IAAI,KAAK,CAAC,0BAA0B,IAAI,iBAAiB,CAAC,GAAG,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAC3G,OAAO,EAAE,CAAC;AACd,CAAC;AAED,+BAA+B;AAC/B,OAAO,EAAE,aAAa,EAAE,MAAM,eAAe,CAAC;AAC9C,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AACtC,OAAO,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;AAE7C,eAAe,CAAC,UAAU,EAAE,aAAa,CAAC,CAAC;AAC3C,eAAe,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;AACnC,eAAe,CAAC,UAAU,EAAE,YAAY,CAAC,CAAC"}
@@ -0,0 +1,16 @@
1
+ import { type ChunkOutput, type SourceConfig } from '../../types.js';
2
+ export interface MarkdownChunk {
3
+ content: string;
4
+ title: string;
5
+ headingPath: string[];
6
+ chunkIndex: number;
7
+ }
8
+ /**
9
+ * Split markdown/MDX content into embedding-friendly chunks.
10
+ *
11
+ * @param content - The full markdown/MDX file content
12
+ * @param filePath - Path to the source file (used for metadata)
13
+ * @returns Array of MarkdownChunk objects
14
+ */
15
+ export declare function chunkMarkdown(content: string, filePath: string, config: SourceConfig): ChunkOutput[];
16
+ //# sourceMappingURL=markdown.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"markdown.d.ts","sourceRoot":"","sources":["../../../src/indexing/chunking/markdown.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,KAAK,WAAW,EAAE,KAAK,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAErE,MAAM,WAAW,aAAa;IAC1B,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;CACtB;AAsRD;;;;;;GAMG;AACH,wBAAgB,aAAa,CAAC,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,MAAM,EAAE,YAAY,GAAG,WAAW,EAAE,CAuDpG"}
@@ -0,0 +1,283 @@
1
+ // Recursive markdown/MDX splitter
2
+ const DEFAULT_TARGET_TOKENS = 600;
3
+ const DEFAULT_OVERLAP_TOKENS = 50;
4
+ /**
5
+ * Parse YAML frontmatter from markdown content.
6
+ * Returns the title (if found) and the content with frontmatter stripped.
7
+ */
8
+ function parseFrontmatter(content) {
9
+ const match = content.match(/^---\r?\n([\s\S]*?)\r?\n---\r?\n?/);
10
+ if (!match)
11
+ return { title: null, body: content };
12
+ const frontmatter = match[1];
13
+ const body = content.slice(match[0].length);
14
+ const titleMatch = frontmatter.match(/^title:\s*["']?(.+?)["']?\s*$/m);
15
+ return {
16
+ title: titleMatch ? titleMatch[1].trim() : null,
17
+ body,
18
+ };
19
+ }
20
+ /**
21
+ * Extract the first heading from content to use as fallback title.
22
+ */
23
+ function extractFirstHeading(content) {
24
+ const match = content.match(/^#{1,6}\s+(.+)$/m);
25
+ return match ? match[1].trim() : null;
26
+ }
27
+ /**
28
+ * Strip MDX-specific syntax: import statements and JSX component tags.
29
+ * Preserves text content inside JSX tags.
30
+ */
31
+ function stripMdx(content) {
32
+ // Strip import statements (single and multi-line)
33
+ let result = content.replace(/^import\s+[\s\S]*?from\s+['"][^'"]+['"];?\s*$/gm, '');
34
+ // Strip self-closing JSX tags: <Component ... />
35
+ result = result.replace(/<[A-Z][A-Za-z0-9]*(?:\s+[^>]*)?\s*\/>/g, '');
36
+ // Strip JSX component open/close tags but keep inner content
37
+ // Handles nested tags by repeatedly stripping innermost pairs
38
+ let prev = '';
39
+ while (prev !== result) {
40
+ prev = result;
41
+ result = result.replace(/<([A-Z][A-Za-z0-9]*)(?:\s+[^>]*)?>([^]*?)<\/\1>/g, '$2');
42
+ }
43
+ // Clean up excessive blank lines left by stripping
44
+ result = result.replace(/\n{3,}/g, '\n\n');
45
+ return result.trim();
46
+ }
47
+ function segmentCodeBlocks(content) {
48
+ const segments = [];
49
+ const codeBlockRegex = /^(`{3,})[^\n]*\n(?:[\s\S]*?\n)?\1\s*$/gm;
50
+ let lastIndex = 0;
51
+ let match;
52
+ while ((match = codeBlockRegex.exec(content)) !== null) {
53
+ if (match.index > lastIndex) {
54
+ segments.push({ text: content.slice(lastIndex, match.index), isCodeBlock: false });
55
+ }
56
+ segments.push({ text: match[0], isCodeBlock: true });
57
+ lastIndex = match.index + match[0].length;
58
+ }
59
+ if (lastIndex < content.length) {
60
+ segments.push({ text: content.slice(lastIndex), isCodeBlock: false });
61
+ }
62
+ return segments;
63
+ }
64
+ /**
65
+ * Split text on a delimiter, but never split inside code blocks.
66
+ */
67
+ function splitPreservingCodeBlocks(content, delimiter) {
68
+ const segments = segmentCodeBlocks(content);
69
+ const parts = [];
70
+ let current = '';
71
+ for (const segment of segments) {
72
+ if (segment.isCodeBlock) {
73
+ current += segment.text;
74
+ }
75
+ else {
76
+ const subParts = typeof delimiter === 'string'
77
+ ? segment.text.split(delimiter)
78
+ : segment.text.split(delimiter);
79
+ if (subParts.length === 1) {
80
+ current += subParts[0];
81
+ }
82
+ else {
83
+ // First sub-part continues the current accumulator
84
+ current += subParts[0];
85
+ for (let i = 1; i < subParts.length; i++) {
86
+ parts.push(current);
87
+ // Re-attach the delimiter for heading-based splits
88
+ if (typeof delimiter === 'string' && delimiter.startsWith('#')) {
89
+ current = delimiter + subParts[i];
90
+ }
91
+ else {
92
+ current = subParts[i];
93
+ }
94
+ }
95
+ }
96
+ }
97
+ }
98
+ if (current) {
99
+ parts.push(current);
100
+ }
101
+ return parts.filter(p => p.trim().length > 0);
102
+ }
103
+ /**
104
+ * Track heading hierarchy up to a given position in the original content.
105
+ */
106
+ function getHeadingPathAtPosition(fullContent, position) {
107
+ const contentBefore = fullContent.slice(0, position);
108
+ const headingRegex = /^(#{1,6})\s+(.+)$/gm;
109
+ const headings = [];
110
+ let match;
111
+ while ((match = headingRegex.exec(contentBefore)) !== null) {
112
+ const level = match[1].length;
113
+ const text = match[2].trim();
114
+ // Remove headings at same or deeper level (new section at this level)
115
+ while (headings.length > 0 && headings[headings.length - 1].level >= level) {
116
+ headings.pop();
117
+ }
118
+ headings.push({ level, text });
119
+ }
120
+ return headings.map(h => h.text);
121
+ }
122
+ /**
123
+ * Split text on heading boundaries at a specific level.
124
+ * Re-attaches the heading marker to each section.
125
+ */
126
+ function splitOnHeading(content, level) {
127
+ const prefix = '#'.repeat(level) + ' ';
128
+ const regex = new RegExp(`(?=^${prefix.replace(/ $/, ' ')})`, 'gm');
129
+ const segments = segmentCodeBlocks(content);
130
+ const parts = [];
131
+ let current = '';
132
+ for (const segment of segments) {
133
+ if (segment.isCodeBlock) {
134
+ current += segment.text;
135
+ }
136
+ else {
137
+ const subParts = segment.text.split(regex);
138
+ if (subParts.length === 1) {
139
+ current += subParts[0];
140
+ }
141
+ else {
142
+ current += subParts[0];
143
+ for (let i = 1; i < subParts.length; i++) {
144
+ if (current.trim())
145
+ parts.push(current);
146
+ current = subParts[i];
147
+ }
148
+ }
149
+ }
150
+ }
151
+ if (current.trim())
152
+ parts.push(current);
153
+ return parts;
154
+ }
155
+ /**
156
+ * Recursively split content to fit within target chunk size.
157
+ * Priority: h2 -> h3 -> paragraph -> line
158
+ */
159
+ function recursiveSplit(content, targetChars, depth = 0) {
160
+ if (content.length <= targetChars) {
161
+ return [content];
162
+ }
163
+ let parts;
164
+ if (depth === 0) {
165
+ // Split on ## headings
166
+ parts = splitOnHeading(content, 2);
167
+ if (parts.length > 1) {
168
+ return parts.flatMap(p => recursiveSplit(p, targetChars, 1));
169
+ }
170
+ }
171
+ if (depth <= 1) {
172
+ // Split on ### headings
173
+ parts = splitOnHeading(content, 3);
174
+ if (parts.length > 1) {
175
+ return parts.flatMap(p => recursiveSplit(p, targetChars, 2));
176
+ }
177
+ }
178
+ if (depth <= 2) {
179
+ // Split on paragraph boundaries
180
+ parts = splitPreservingCodeBlocks(content, /\n\n+/);
181
+ if (parts.length > 1) {
182
+ return mergeSmallParts(parts, targetChars).flatMap(p => recursiveSplit(p, targetChars, 3));
183
+ }
184
+ }
185
+ // Split on line boundaries
186
+ const lines = content.split('\n');
187
+ if (lines.length > 1) {
188
+ return mergeSmallParts(lines, targetChars);
189
+ }
190
+ // Content is a single very long line; return as-is
191
+ return [content];
192
+ }
193
+ /**
194
+ * Merge adjacent small parts until they approach the target size.
195
+ */
196
+ function mergeSmallParts(parts, targetSize) {
197
+ const merged = [];
198
+ let current = '';
199
+ for (const part of parts) {
200
+ const separator = current && !current.endsWith('\n') ? '\n\n' : '';
201
+ if (current && (current.length + separator.length + part.length) > targetSize) {
202
+ merged.push(current);
203
+ current = part;
204
+ }
205
+ else {
206
+ current = current ? current + separator + part : part;
207
+ }
208
+ }
209
+ if (current.trim()) {
210
+ merged.push(current);
211
+ }
212
+ return merged;
213
+ }
214
+ /**
215
+ * Apply overlap between consecutive chunks.
216
+ */
217
+ function applyOverlap(chunks, overlapChars) {
218
+ if (chunks.length <= 1 || overlapChars <= 0)
219
+ return chunks;
220
+ const result = [chunks[0]];
221
+ for (let i = 1; i < chunks.length; i++) {
222
+ const prevChunk = chunks[i - 1];
223
+ const overlapText = prevChunk.slice(-overlapChars);
224
+ // Find a clean break point (newline or space) in the overlap
225
+ const breakPoint = overlapText.lastIndexOf('\n');
226
+ const cleanOverlap = breakPoint > 0 ? overlapText.slice(breakPoint) : overlapText;
227
+ result.push(cleanOverlap + chunks[i]);
228
+ }
229
+ return result;
230
+ }
231
+ /**
232
+ * Split markdown/MDX content into embedding-friendly chunks.
233
+ *
234
+ * @param content - The full markdown/MDX file content
235
+ * @param filePath - Path to the source file (used for metadata)
236
+ * @returns Array of MarkdownChunk objects
237
+ */
238
+ export function chunkMarkdown(content, filePath, config) {
239
+ if (!content || !content.trim()) {
240
+ return [];
241
+ }
242
+ const targetChars = (config.chunk?.target_tokens ?? DEFAULT_TARGET_TOKENS) * 4;
243
+ const overlapChars = (config.chunk?.overlap_tokens ?? DEFAULT_OVERLAP_TOKENS) * 4;
244
+ // Parse frontmatter
245
+ const { title: fmTitle, body } = parseFrontmatter(content);
246
+ // Strip MDX syntax
247
+ const cleanBody = stripMdx(body);
248
+ if (!cleanBody.trim()) {
249
+ return [];
250
+ }
251
+ // Determine title
252
+ const title = fmTitle || extractFirstHeading(cleanBody) || filePath.split('/').pop() || filePath;
253
+ // Recursively split the content
254
+ const rawChunks = recursiveSplit(cleanBody, targetChars);
255
+ // Apply overlap
256
+ const overlappedChunks = applyOverlap(rawChunks, overlapChars);
257
+ // Build heading paths by finding where each raw chunk starts in the original
258
+ const chunks = [];
259
+ let searchFrom = 0;
260
+ for (let i = 0; i < overlappedChunks.length; i++) {
261
+ const chunkText = overlappedChunks[i].trim();
262
+ if (!chunkText)
263
+ continue;
264
+ // Find the position of this chunk's primary content in the clean body
265
+ // Use the raw (non-overlapped) chunk to find position
266
+ const rawText = rawChunks[i]?.trim() || chunkText;
267
+ const pos = cleanBody.indexOf(rawText, searchFrom);
268
+ const headingPath = pos >= 0
269
+ ? getHeadingPathAtPosition(cleanBody, pos)
270
+ : [];
271
+ if (pos >= 0) {
272
+ searchFrom = pos;
273
+ }
274
+ chunks.push({
275
+ content: chunkText,
276
+ title,
277
+ headingPath,
278
+ chunkIndex: chunks.length,
279
+ });
280
+ }
281
+ return chunks;
282
+ }
283
+ //# sourceMappingURL=markdown.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"markdown.js","sourceRoot":"","sources":["../../../src/indexing/chunking/markdown.ts"],"names":[],"mappings":"AAAA,kCAAkC;AAWlC,MAAM,qBAAqB,GAAG,GAAG,CAAC;AAClC,MAAM,sBAAsB,GAAG,EAAE,CAAC;AAElC;;;GAGG;AACH,SAAS,gBAAgB,CAAC,OAAe;IACrC,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,mCAAmC,CAAC,CAAC;IACjE,IAAI,CAAC,KAAK;QAAE,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC;IAElD,MAAM,WAAW,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;IAC7B,MAAM,IAAI,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;IAE5C,MAAM,UAAU,GAAG,WAAW,CAAC,KAAK,CAAC,gCAAgC,CAAC,CAAC;IACvE,OAAO;QACH,KAAK,EAAE,UAAU,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI;QAC/C,IAAI;KACP,CAAC;AACN,CAAC;AAED;;GAEG;AACH,SAAS,mBAAmB,CAAC,OAAe;IACxC,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,kBAAkB,CAAC,CAAC;IAChD,OAAO,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;AAC1C,CAAC;AAED;;;GAGG;AACH,SAAS,QAAQ,CAAC,OAAe;IAC7B,kDAAkD;IAClD,IAAI,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,iDAAiD,EAAE,EAAE,CAAC,CAAC;IAEpF,iDAAiD;IACjD,MAAM,GAAG,MAAM,CAAC,OAAO,CAAC,wCAAwC,EAAE,EAAE,CAAC,CAAC;IAEtE,6DAA6D;IAC7D,8DAA8D;IAC9D,IAAI,IAAI,GAAG,EAAE,CAAC;IACd,OAAO,IAAI,KAAK,MAAM,EAAE,CAAC;QACrB,IAAI,GAAG,MAAM,CAAC;QACd,MAAM,GAAG,MAAM,CAAC,OAAO,CAAC,kDAAkD,EAAE,IAAI,CAAC,CAAC;IACtF,CAAC;IAED,mDAAmD;IACnD,MAAM,GAAG,MAAM,CAAC,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;IAE3C,OAAO,MAAM,CAAC,IAAI,EAAE,CAAC;AACzB,CAAC;AAWD,SAAS,iBAAiB,CAAC,OAAe;IACtC,MAAM,QAAQ,GAAqB,EAAE,CAAC;IACtC,MAAM,cAAc,GAAG,yCAAyC,CAAC;IAEjE,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,IAAI,KAA6B,CAAC;IAElC,OAAO,CAAC,KAAK,GAAG,cAAc,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACrD,IAAI,KAAK,CAAC,KAAK,GAAG,SAAS,EAAE,CAAC;YAC1B,QAAQ,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,OAAO,CAAC,KAAK,CAAC,SAAS,EAAE,KAAK,CAAC,KAAK,CAAC,EAAE,WAAW,EAAE,KAAK,EAAE,CAAC,CAAC;QACvF,CAAC;QACD,QAAQ,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC,CAAC;QACrD,SAAS,GAAG,KAAK,CAAC,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;IAC9C,CAAC;IAED,IAAI,SAAS,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;QAC7B,QAAQ,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,OAAO,CAAC,KAAK,CAAC,SAAS,CAAC,EAAE,WAAW,EAAE,KAAK,EAAE,CAAC,CAAC;IAC1E,CAAC;IAED,OAAO,QAAQ,CAAC;AACpB,CAAC;AAED;;GAEG;AACH,SAAS,yBAAyB,CAAC,OAAe,EAAE,SAA0B;IAC1E,MAAM,QAAQ,GAAG,iBAAiB,CAAC,OAAO,CAAC,CAAC;IAC5C,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,IAAI,OAAO,GAAG,EAAE,CAAC;IAEjB,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;QAC7B,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC;YACtB,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;QAC5B,CAAC;aAAM,CAAC;YACJ,MAAM,QAAQ,GAAG,OAAO,SAAS,KAAK,QAAQ;gBAC1C,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC;gBAC/B,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;YAEpC,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACxB,OAAO,IAAI,QAAQ,CAAC,CAAC,CAAC,CAAC;YAC3B,CAAC;iBAAM,CAAC;gBACJ,mDAAmD;gBACnD,OAAO,IAAI,QAAQ,CAAC,CAAC,CAAC,CAAC;gBACvB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;oBACvC,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;oBACpB,mDAAmD;oBACnD,IAAI,OAAO,SAAS,KAAK,QAAQ,IAAI,SAAS,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;wBAC7D,OAAO,GAAG,SAAS,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;oBACtC,CAAC;yBAAM,CAAC;wBACJ,OAAO,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;oBAC1B,CAAC;gBACL,CAAC;YACL,CAAC;QACL,CAAC;IACL,CAAC;IACD,IAAI,OAAO,EAAE,CAAC;QACV,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IACxB,CAAC;IAED,OAAO,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;AAClD,CAAC;AAQD;;GAEG;AACH,SAAS,wBAAwB,CAAC,WAAmB,EAAE,QAAgB;IACnE,MAAM,aAAa,GAAG,WAAW,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC;IACrD,MAAM,YAAY,GAAG,qBAAqB,CAAC;IAC3C,MAAM,QAAQ,GAAkB,EAAE,CAAC;IACnC,IAAI,KAA6B,CAAC;IAElC,OAAO,CAAC,KAAK,GAAG,YAAY,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACzD,MAAM,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;QAC9B,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAE7B,sEAAsE;QACtE,OAAO,QAAQ,CAAC,MAAM,GAAG,CAAC,IAAI,QAAQ,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,KAAK,IAAI,KAAK,EAAE,CAAC;YACzE,QAAQ,CAAC,GAAG,EAAE,CAAC;QACnB,CAAC;QACD,QAAQ,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;IACnC,CAAC;IAED,OAAO,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;AACrC,CAAC;AAED;;;GAGG;AACH,SAAS,cAAc,CAAC,OAAe,EAAE,KAAa;IAClD,MAAM,MAAM,GAAG,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,GAAG,CAAC;IACvC,MAAM,KAAK,GAAG,IAAI,MAAM,CAAC,OAAO,MAAM,CAAC,OAAO,CAAC,IAAI,EAAE,GAAG,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC;IAEpE,MAAM,QAAQ,GAAG,iBAAiB,CAAC,OAAO,CAAC,CAAC;IAC5C,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,IAAI,OAAO,GAAG,EAAE,CAAC;IAEjB,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;QAC7B,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC;YACtB,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;QAC5B,CAAC;aAAM,CAAC;YACJ,MAAM,QAAQ,GAAG,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;YAC3C,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACxB,OAAO,IAAI,QAAQ,CAAC,CAAC,CAAC,CAAC;YAC3B,CAAC;iBAAM,CAAC;gBACJ,OAAO,IAAI,QAAQ,CAAC,CAAC,CAAC,CAAC;gBACvB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;oBACvC,IAAI,OAAO,CAAC,IAAI,EAAE;wBAAE,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;oBACxC,OAAO,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;gBAC1B,CAAC;YACL,CAAC;QACL,CAAC;IACL,CAAC;IACD,IAAI,OAAO,CAAC,IAAI,EAAE;QAAE,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAExC,OAAO,KAAK,CAAC;AACjB,CAAC;AAED;;;GAGG;AACH,SAAS,cAAc,CAAC,OAAe,EAAE,WAAmB,EAAE,QAAgB,CAAC;IAC3E,IAAI,OAAO,CAAC,MAAM,IAAI,WAAW,EAAE,CAAC;QAChC,OAAO,CAAC,OAAO,CAAC,CAAC;IACrB,CAAC;IAED,IAAI,KAAe,CAAC;IAEpB,IAAI,KAAK,KAAK,CAAC,EAAE,CAAC;QACd,uBAAuB;QACvB,KAAK,GAAG,cAAc,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;QACnC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACnB,OAAO,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,cAAc,CAAC,CAAC,EAAE,WAAW,EAAE,CAAC,CAAC,CAAC,CAAC;QACjE,CAAC;IACL,CAAC;IAED,IAAI,KAAK,IAAI,CAAC,EAAE,CAAC;QACb,wBAAwB;QACxB,KAAK,GAAG,cAAc,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;QACnC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACnB,OAAO,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,cAAc,CAAC,CAAC,EAAE,WAAW,EAAE,CAAC,CAAC,CAAC,CAAC;QACjE,CAAC;IACL,CAAC;IAED,IAAI,KAAK,IAAI,CAAC,EAAE,CAAC;QACb,gCAAgC;QAChC,KAAK,GAAG,yBAAyB,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;QACpD,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACnB,OAAO,eAAe,CAAC,KAAK,EAAE,WAAW,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,cAAc,CAAC,CAAC,EAAE,WAAW,EAAE,CAAC,CAAC,CAAC,CAAC;QAC/F,CAAC;IACL,CAAC;IAED,2BAA2B;IAC3B,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAClC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACnB,OAAO,eAAe,CAAC,KAAK,EAAE,WAAW,CAAC,CAAC;IAC/C,CAAC;IAED,mDAAmD;IACnD,OAAO,CAAC,OAAO,CAAC,CAAC;AACrB,CAAC;AAED;;GAEG;AACH,SAAS,eAAe,CAAC,KAAe,EAAE,UAAkB;IACxD,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,IAAI,OAAO,GAAG,EAAE,CAAC;IAEjB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACvB,MAAM,SAAS,GAAG,OAAO,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC;QACnE,IAAI,OAAO,IAAI,CAAC,OAAO,CAAC,MAAM,GAAG,SAAS,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,UAAU,EAAE,CAAC;YAC5E,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACrB,OAAO,GAAG,IAAI,CAAC;QACnB,CAAC;aAAM,CAAC;YACJ,OAAO,GAAG,OAAO,CAAC,CAAC,CAAC,OAAO,GAAG,SAAS,GAAG,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC;QAC1D,CAAC;IACL,CAAC;IACD,IAAI,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC;QACjB,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IACzB,CAAC;IAED,OAAO,MAAM,CAAC;AAClB,CAAC;AAED;;GAEG;AACH,SAAS,YAAY,CAAC,MAAgB,EAAE,YAAoB;IACxD,IAAI,MAAM,CAAC,MAAM,IAAI,CAAC,IAAI,YAAY,IAAI,CAAC;QAAE,OAAO,MAAM,CAAC;IAE3D,MAAM,MAAM,GAAa,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;IACrC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,MAAM,SAAS,GAAG,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QAChC,MAAM,WAAW,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,CAAC;QAEnD,6DAA6D;QAC7D,MAAM,UAAU,GAAG,WAAW,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;QACjD,MAAM,YAAY,GAAG,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC;QAElF,MAAM,CAAC,IAAI,CAAC,YAAY,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;IAC1C,CAAC;IAED,OAAO,MAAM,CAAC;AAClB,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,aAAa,CAAC,OAAe,EAAE,QAAgB,EAAE,MAAoB;IACjF,IAAI,CAAC,OAAO,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC;QAC9B,OAAO,EAAE,CAAC;IACd,CAAC;IAED,MAAM,WAAW,GAAG,CAAC,MAAM,CAAC,KAAK,EAAE,aAAa,IAAI,qBAAqB,CAAC,GAAG,CAAC,CAAC;IAC/E,MAAM,YAAY,GAAG,CAAC,MAAM,CAAC,KAAK,EAAE,cAAc,IAAI,sBAAsB,CAAC,GAAG,CAAC,CAAC;IAElF,oBAAoB;IACpB,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,IAAI,EAAE,GAAG,gBAAgB,CAAC,OAAO,CAAC,CAAC;IAE3D,mBAAmB;IACnB,MAAM,SAAS,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC;IAEjC,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,EAAE,CAAC;QACpB,OAAO,EAAE,CAAC;IACd,CAAC;IAED,kBAAkB;IAClB,MAAM,KAAK,GAAG,OAAO,IAAI,mBAAmB,CAAC,SAAS,CAAC,IAAI,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,IAAI,QAAQ,CAAC;IAEjG,gCAAgC;IAChC,MAAM,SAAS,GAAG,cAAc,CAAC,SAAS,EAAE,WAAW,CAAC,CAAC;IAEzD,gBAAgB;IAChB,MAAM,gBAAgB,GAAG,YAAY,CAAC,SAAS,EAAE,YAAY,CAAC,CAAC;IAE/D,6EAA6E;IAC7E,MAAM,MAAM,GAAkB,EAAE,CAAC;IACjC,IAAI,UAAU,GAAG,CAAC,CAAC;IAEnB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,gBAAgB,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC/C,MAAM,SAAS,GAAG,gBAAgB,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAC7C,IAAI,CAAC,SAAS;YAAE,SAAS;QAEzB,sEAAsE;QACtE,sDAAsD;QACtD,MAAM,OAAO,GAAG,SAAS,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,SAAS,CAAC;QAClD,MAAM,GAAG,GAAG,SAAS,CAAC,OAAO,CAAC,OAAO,EAAE,UAAU,CAAC,CAAC;QACnD,MAAM,WAAW,GAAG,GAAG,IAAI,CAAC;YACxB,CAAC,CAAC,wBAAwB,CAAC,SAAS,EAAE,GAAG,CAAC;YAC1C,CAAC,CAAC,EAAE,CAAC;QACT,IAAI,GAAG,IAAI,CAAC,EAAE,CAAC;YACX,UAAU,GAAG,GAAG,CAAC;QACrB,CAAC;QAED,MAAM,CAAC,IAAI,CAAC;YACR,OAAO,EAAE,SAAS;YAClB,KAAK;YACL,WAAW;YACX,UAAU,EAAE,MAAM,CAAC,MAAM;SAC5B,CAAC,CAAC;IACP,CAAC;IAED,OAAO,MAAM,CAAC;AAClB,CAAC"}
@@ -0,0 +1,11 @@
1
+ import { type ChunkOutput, type SourceConfig } from '../../types.js';
2
+ /**
3
+ * Split plain text into embedding-friendly chunks on paragraph boundaries.
4
+ *
5
+ * @param content - The full file content
6
+ * @param _filePath - Path to the source file (unused, kept for registry signature)
7
+ * @param config - Source configuration with chunk size parameters
8
+ * @returns Array of ChunkOutput objects
9
+ */
10
+ export declare function chunkRawText(content: string, _filePath: string, config: SourceConfig): ChunkOutput[];
11
+ //# sourceMappingURL=raw-text.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"raw-text.d.ts","sourceRoot":"","sources":["../../../src/indexing/chunking/raw-text.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,KAAK,WAAW,EAAE,KAAK,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAKrE;;;;;;;GAOG;AACH,wBAAgB,YAAY,CAAC,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,YAAY,GAAG,WAAW,EAAE,CA0DpG"}