@framers/agentos 0.1.101 → 0.1.103

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. package/README.md +16 -0
  2. package/dist/api/agency.js +1 -1
  3. package/dist/api/agency.js.map +1 -1
  4. package/dist/api/strategies/graph.d.ts.map +1 -1
  5. package/dist/api/strategies/graph.js +1 -0
  6. package/dist/api/strategies/graph.js.map +1 -1
  7. package/dist/api/strategies/sequential.d.ts.map +1 -1
  8. package/dist/api/strategies/sequential.js +1 -0
  9. package/dist/api/strategies/sequential.js.map +1 -1
  10. package/dist/memory/config.d.ts +39 -0
  11. package/dist/memory/config.d.ts.map +1 -1
  12. package/dist/memory/config.js.map +1 -1
  13. package/dist/memory/consolidation/ConsolidationLoop.d.ts +177 -0
  14. package/dist/memory/consolidation/ConsolidationLoop.d.ts.map +1 -0
  15. package/dist/memory/consolidation/ConsolidationLoop.js +517 -0
  16. package/dist/memory/consolidation/ConsolidationLoop.js.map +1 -0
  17. package/dist/memory/consolidation/ConsolidationPipeline.d.ts.map +1 -1
  18. package/dist/memory/consolidation/ConsolidationPipeline.js +7 -0
  19. package/dist/memory/consolidation/ConsolidationPipeline.js.map +1 -1
  20. package/dist/memory/consolidation/index.d.ts +8 -0
  21. package/dist/memory/consolidation/index.d.ts.map +1 -0
  22. package/dist/memory/consolidation/index.js +7 -0
  23. package/dist/memory/consolidation/index.js.map +1 -0
  24. package/dist/memory/decay/DecayModel.d.ts +33 -0
  25. package/dist/memory/decay/DecayModel.d.ts.map +1 -1
  26. package/dist/memory/decay/DecayModel.js +31 -0
  27. package/dist/memory/decay/DecayModel.js.map +1 -1
  28. package/dist/memory/facade/Memory.d.ts +228 -0
  29. package/dist/memory/facade/Memory.d.ts.map +1 -0
  30. package/dist/memory/facade/Memory.js +823 -0
  31. package/dist/memory/facade/Memory.js.map +1 -0
  32. package/dist/memory/facade/index.d.ts +13 -0
  33. package/dist/memory/facade/index.d.ts.map +1 -0
  34. package/dist/memory/facade/index.js +11 -0
  35. package/dist/memory/facade/index.js.map +1 -0
  36. package/dist/memory/facade/types.d.ts +606 -0
  37. package/dist/memory/facade/types.d.ts.map +1 -0
  38. package/dist/memory/facade/types.js +11 -0
  39. package/dist/memory/facade/types.js.map +1 -0
  40. package/dist/memory/feedback/RetrievalFeedbackSignal.d.ts +132 -0
  41. package/dist/memory/feedback/RetrievalFeedbackSignal.d.ts.map +1 -0
  42. package/dist/memory/feedback/RetrievalFeedbackSignal.js +178 -0
  43. package/dist/memory/feedback/RetrievalFeedbackSignal.js.map +1 -0
  44. package/dist/memory/feedback/index.d.ts +13 -0
  45. package/dist/memory/feedback/index.d.ts.map +1 -0
  46. package/dist/memory/feedback/index.js +12 -0
  47. package/dist/memory/feedback/index.js.map +1 -0
  48. package/dist/memory/index.d.ts +22 -0
  49. package/dist/memory/index.d.ts.map +1 -1
  50. package/dist/memory/index.js +24 -0
  51. package/dist/memory/index.js.map +1 -1
  52. package/dist/memory/ingestion/ChunkingEngine.d.ts +143 -0
  53. package/dist/memory/ingestion/ChunkingEngine.d.ts.map +1 -0
  54. package/dist/memory/ingestion/ChunkingEngine.js +508 -0
  55. package/dist/memory/ingestion/ChunkingEngine.js.map +1 -0
  56. package/dist/memory/ingestion/DoclingLoader.d.ts +44 -0
  57. package/dist/memory/ingestion/DoclingLoader.d.ts.map +1 -0
  58. package/dist/memory/ingestion/DoclingLoader.js +228 -0
  59. package/dist/memory/ingestion/DoclingLoader.js.map +1 -0
  60. package/dist/memory/ingestion/DocxLoader.d.ts +37 -0
  61. package/dist/memory/ingestion/DocxLoader.d.ts.map +1 -0
  62. package/dist/memory/ingestion/DocxLoader.js +111 -0
  63. package/dist/memory/ingestion/DocxLoader.js.map +1 -0
  64. package/dist/memory/ingestion/FolderScanner.d.ts +116 -0
  65. package/dist/memory/ingestion/FolderScanner.d.ts.map +1 -0
  66. package/dist/memory/ingestion/FolderScanner.js +127 -0
  67. package/dist/memory/ingestion/FolderScanner.js.map +1 -0
  68. package/dist/memory/ingestion/HtmlLoader.d.ts +49 -0
  69. package/dist/memory/ingestion/HtmlLoader.d.ts.map +1 -0
  70. package/dist/memory/ingestion/HtmlLoader.js +202 -0
  71. package/dist/memory/ingestion/HtmlLoader.js.map +1 -0
  72. package/dist/memory/ingestion/IDocumentLoader.d.ts +63 -0
  73. package/dist/memory/ingestion/IDocumentLoader.d.ts.map +1 -0
  74. package/dist/memory/ingestion/IDocumentLoader.js +11 -0
  75. package/dist/memory/ingestion/IDocumentLoader.js.map +1 -0
  76. package/dist/memory/ingestion/LoaderRegistry.d.ts +140 -0
  77. package/dist/memory/ingestion/LoaderRegistry.d.ts.map +1 -0
  78. package/dist/memory/ingestion/LoaderRegistry.js +229 -0
  79. package/dist/memory/ingestion/LoaderRegistry.js.map +1 -0
  80. package/dist/memory/ingestion/MarkdownLoader.d.ts +50 -0
  81. package/dist/memory/ingestion/MarkdownLoader.d.ts.map +1 -0
  82. package/dist/memory/ingestion/MarkdownLoader.js +169 -0
  83. package/dist/memory/ingestion/MarkdownLoader.js.map +1 -0
  84. package/dist/memory/ingestion/MultimodalAggregator.d.ts +88 -0
  85. package/dist/memory/ingestion/MultimodalAggregator.d.ts.map +1 -0
  86. package/dist/memory/ingestion/MultimodalAggregator.js +96 -0
  87. package/dist/memory/ingestion/MultimodalAggregator.js.map +1 -0
  88. package/dist/memory/ingestion/OcrPdfLoader.d.ts +41 -0
  89. package/dist/memory/ingestion/OcrPdfLoader.d.ts.map +1 -0
  90. package/dist/memory/ingestion/OcrPdfLoader.js +149 -0
  91. package/dist/memory/ingestion/OcrPdfLoader.js.map +1 -0
  92. package/dist/memory/ingestion/PdfLoader.d.ts +78 -0
  93. package/dist/memory/ingestion/PdfLoader.d.ts.map +1 -0
  94. package/dist/memory/ingestion/PdfLoader.js +179 -0
  95. package/dist/memory/ingestion/PdfLoader.js.map +1 -0
  96. package/dist/memory/ingestion/TextLoader.d.ts +66 -0
  97. package/dist/memory/ingestion/TextLoader.d.ts.map +1 -0
  98. package/dist/memory/ingestion/TextLoader.js +207 -0
  99. package/dist/memory/ingestion/TextLoader.js.map +1 -0
  100. package/dist/memory/ingestion/UrlLoader.d.ts +95 -0
  101. package/dist/memory/ingestion/UrlLoader.d.ts.map +1 -0
  102. package/dist/memory/ingestion/UrlLoader.js +174 -0
  103. package/dist/memory/ingestion/UrlLoader.js.map +1 -0
  104. package/dist/memory/io/ChatGptImporter.d.ts +85 -0
  105. package/dist/memory/io/ChatGptImporter.d.ts.map +1 -0
  106. package/dist/memory/io/ChatGptImporter.js +231 -0
  107. package/dist/memory/io/ChatGptImporter.js.map +1 -0
  108. package/dist/memory/io/JsonExporter.d.ts +67 -0
  109. package/dist/memory/io/JsonExporter.d.ts.map +1 -0
  110. package/dist/memory/io/JsonExporter.js +132 -0
  111. package/dist/memory/io/JsonExporter.js.map +1 -0
  112. package/dist/memory/io/JsonImporter.d.ts +84 -0
  113. package/dist/memory/io/JsonImporter.d.ts.map +1 -0
  114. package/dist/memory/io/JsonImporter.js +234 -0
  115. package/dist/memory/io/JsonImporter.js.map +1 -0
  116. package/dist/memory/io/MarkdownExporter.d.ts +95 -0
  117. package/dist/memory/io/MarkdownExporter.d.ts.map +1 -0
  118. package/dist/memory/io/MarkdownExporter.js +130 -0
  119. package/dist/memory/io/MarkdownExporter.js.map +1 -0
  120. package/dist/memory/io/MarkdownImporter.d.ts +84 -0
  121. package/dist/memory/io/MarkdownImporter.d.ts.map +1 -0
  122. package/dist/memory/io/MarkdownImporter.js +166 -0
  123. package/dist/memory/io/MarkdownImporter.js.map +1 -0
  124. package/dist/memory/io/ObsidianExporter.d.ts +80 -0
  125. package/dist/memory/io/ObsidianExporter.d.ts.map +1 -0
  126. package/dist/memory/io/ObsidianExporter.js +127 -0
  127. package/dist/memory/io/ObsidianExporter.js.map +1 -0
  128. package/dist/memory/io/ObsidianImporter.d.ts +93 -0
  129. package/dist/memory/io/ObsidianImporter.d.ts.map +1 -0
  130. package/dist/memory/io/ObsidianImporter.js +221 -0
  131. package/dist/memory/io/ObsidianImporter.js.map +1 -0
  132. package/dist/memory/io/SqliteExporter.d.ts +47 -0
  133. package/dist/memory/io/SqliteExporter.d.ts.map +1 -0
  134. package/dist/memory/io/SqliteExporter.js +56 -0
  135. package/dist/memory/io/SqliteExporter.js.map +1 -0
  136. package/dist/memory/io/SqliteImporter.d.ts +82 -0
  137. package/dist/memory/io/SqliteImporter.d.ts.map +1 -0
  138. package/dist/memory/io/SqliteImporter.js +232 -0
  139. package/dist/memory/io/SqliteImporter.js.map +1 -0
  140. package/dist/memory/io/index.d.ts +31 -0
  141. package/dist/memory/io/index.d.ts.map +1 -0
  142. package/dist/memory/io/index.js +31 -0
  143. package/dist/memory/io/index.js.map +1 -0
  144. package/dist/memory/store/SqliteBrain.d.ts +125 -0
  145. package/dist/memory/store/SqliteBrain.d.ts.map +1 -0
  146. package/dist/memory/store/SqliteBrain.js +407 -0
  147. package/dist/memory/store/SqliteBrain.js.map +1 -0
  148. package/dist/memory/store/SqliteKnowledgeGraph.d.ts +259 -0
  149. package/dist/memory/store/SqliteKnowledgeGraph.d.ts.map +1 -0
  150. package/dist/memory/store/SqliteKnowledgeGraph.js +1062 -0
  151. package/dist/memory/store/SqliteKnowledgeGraph.js.map +1 -0
  152. package/dist/memory/store/SqliteMemoryGraph.d.ts +251 -0
  153. package/dist/memory/store/SqliteMemoryGraph.d.ts.map +1 -0
  154. package/dist/memory/store/SqliteMemoryGraph.js +637 -0
  155. package/dist/memory/store/SqliteMemoryGraph.js.map +1 -0
  156. package/dist/memory/tools/MemoryAddTool.d.ts +98 -0
  157. package/dist/memory/tools/MemoryAddTool.d.ts.map +1 -0
  158. package/dist/memory/tools/MemoryAddTool.js +131 -0
  159. package/dist/memory/tools/MemoryAddTool.js.map +1 -0
  160. package/dist/memory/tools/MemoryDeleteTool.d.ts +83 -0
  161. package/dist/memory/tools/MemoryDeleteTool.d.ts.map +1 -0
  162. package/dist/memory/tools/MemoryDeleteTool.js +96 -0
  163. package/dist/memory/tools/MemoryDeleteTool.js.map +1 -0
  164. package/dist/memory/tools/MemoryMergeTool.d.ts +95 -0
  165. package/dist/memory/tools/MemoryMergeTool.d.ts.map +1 -0
  166. package/dist/memory/tools/MemoryMergeTool.js +164 -0
  167. package/dist/memory/tools/MemoryMergeTool.js.map +1 -0
  168. package/dist/memory/tools/MemoryReflectTool.d.ts +86 -0
  169. package/dist/memory/tools/MemoryReflectTool.d.ts.map +1 -0
  170. package/dist/memory/tools/MemoryReflectTool.js +102 -0
  171. package/dist/memory/tools/MemoryReflectTool.js.map +1 -0
  172. package/dist/memory/tools/MemorySearchTool.d.ts +117 -0
  173. package/dist/memory/tools/MemorySearchTool.d.ts.map +1 -0
  174. package/dist/memory/tools/MemorySearchTool.js +162 -0
  175. package/dist/memory/tools/MemorySearchTool.js.map +1 -0
  176. package/dist/memory/tools/MemoryUpdateTool.d.ts +92 -0
  177. package/dist/memory/tools/MemoryUpdateTool.d.ts.map +1 -0
  178. package/dist/memory/tools/MemoryUpdateTool.js +125 -0
  179. package/dist/memory/tools/MemoryUpdateTool.js.map +1 -0
  180. package/dist/memory/tools/index.d.ts +32 -0
  181. package/dist/memory/tools/index.d.ts.map +1 -0
  182. package/dist/memory/tools/index.js +26 -0
  183. package/dist/memory/tools/index.js.map +1 -0
  184. package/package.json +6 -1
@@ -0,0 +1,508 @@
1
+ /**
2
+ * @fileoverview ChunkingEngine — splits raw document text into `DocumentChunk`
3
+ * slices ready for embedding and vector-store ingestion.
4
+ *
5
+ * Four strategies are supported:
6
+ *
7
+ * - **fixed** — split at a fixed character count with word-boundary
8
+ * awareness and configurable overlap.
9
+ * - **semantic** — embed individual sentences and split where cosine
10
+ * similarity drops below a threshold (topic boundaries).
11
+ * Falls back to `fixed` when no `embedFn` is supplied.
12
+ * - **hierarchical**— honour Markdown heading structure; each heading creates
13
+ * a new chunk boundary with the heading stored in metadata.
14
+ * Long sections are sub-split with `fixed`.
15
+ * - **layout** — preserve fenced code blocks and pipe-delimited tables as
16
+ * atomic chunks; surrounding prose is split with `fixed`.
17
+ *
18
+ * @module memory/ingestion/ChunkingEngine
19
+ */
20
+ // ---------------------------------------------------------------------------
21
+ // Internal constants
22
+ // ---------------------------------------------------------------------------
23
+ /** Default target chunk size in characters. */
24
+ const DEFAULT_CHUNK_SIZE = 512;
25
+ /** Default overlap between consecutive chunks in characters. */
26
+ const DEFAULT_CHUNK_OVERLAP = 64;
27
+ /**
28
+ * Cosine similarity threshold below which two consecutive sentence embeddings
29
+ * are considered to belong to different topics. Split points are inserted
30
+ * wherever similarity falls below this value.
31
+ */
32
+ const SEMANTIC_SPLIT_THRESHOLD = 0.3;
33
+ /**
34
+ * Maximum allowed chunk character count for a semantic group before it is
35
+ * further sub-split with the fixed strategy. Expressed as a multiplier of
36
+ * `chunkSize`.
37
+ */
38
+ const SEMANTIC_MAX_CHUNK_MULTIPLIER = 2;
39
+ // ---------------------------------------------------------------------------
40
+ // Helper — cosine similarity
41
+ // ---------------------------------------------------------------------------
42
+ /**
43
+ * Computes the cosine similarity between two equal-length dense vectors.
44
+ *
45
+ * Returns a value in [-1, 1] where 1 means identical direction and 0 means
46
+ * orthogonal. Returns 0 safely when either vector is the zero vector.
47
+ *
48
+ * @param a - First vector.
49
+ * @param b - Second vector.
50
+ * @returns Cosine similarity scalar.
51
+ */
52
+ function cosineSimilarity(a, b) {
53
+ let dot = 0;
54
+ let magA = 0;
55
+ let magB = 0;
56
+ for (let i = 0; i < a.length; i++) {
57
+ dot += a[i] * b[i];
58
+ magA += a[i] * a[i];
59
+ magB += b[i] * b[i];
60
+ }
61
+ return dot / (Math.sqrt(magA) * Math.sqrt(magB) || 1);
62
+ }
63
+ // ---------------------------------------------------------------------------
64
+ // Helper — fixed strategy (used internally by other strategies)
65
+ // ---------------------------------------------------------------------------
66
+ /**
67
+ * Splits `content` into fixed-size character chunks with optional overlap,
68
+ * breaking at word boundaries so no word is split mid-token.
69
+ *
70
+ * @param content - Full text to split.
71
+ * @param chunkSize - Target character count per chunk.
72
+ * @param chunkOverlap - Number of trailing characters from the previous chunk
73
+ * prepended to the next chunk.
74
+ * @param startIndex - The `DocumentChunk.index` to assign to the first
75
+ * produced chunk. Useful when merging partial results.
76
+ * @param baseMetadata - Extra metadata fields merged into every produced chunk.
77
+ * @returns Array of `DocumentChunk` objects in order.
78
+ */
79
+ function fixedChunks(content, chunkSize, chunkOverlap, startIndex = 0, baseMetadata) {
80
+ const chunks = [];
81
+ let pos = 0;
82
+ let chunkIndex = startIndex;
83
+ while (pos < content.length) {
84
+ // Desired end position for this window.
85
+ let end = pos + chunkSize;
86
+ if (end >= content.length) {
87
+ // We've reached (or exceeded) the end — take whatever remains.
88
+ const slice = content.slice(pos).trim();
89
+ if (slice.length > 0) {
90
+ chunks.push({
91
+ content: slice,
92
+ index: chunkIndex++,
93
+ ...(baseMetadata ? { metadata: { ...baseMetadata } } : {}),
94
+ });
95
+ }
96
+ break;
97
+ }
98
+ // Walk backwards from `end` until we land on a whitespace boundary so we
99
+ // never split a word in the middle.
100
+ while (end > pos && !/\s/.test(content[end])) {
101
+ end--;
102
+ }
103
+ // Edge case: no whitespace found in the whole window — hard-cut.
104
+ if (end === pos) {
105
+ end = pos + chunkSize;
106
+ }
107
+ const slice = content.slice(pos, end).trim();
108
+ if (slice.length > 0) {
109
+ chunks.push({
110
+ content: slice,
111
+ index: chunkIndex++,
112
+ ...(baseMetadata ? { metadata: { ...baseMetadata } } : {}),
113
+ });
114
+ }
115
+ // Advance position, stepping back by `chunkOverlap` characters.
116
+ pos = end - chunkOverlap;
117
+ if (pos <= 0)
118
+ pos = end; // Guard against infinite loop on tiny content.
119
+ }
120
+ return chunks;
121
+ }
122
+ // ---------------------------------------------------------------------------
123
+ // ChunkingEngine
124
+ // ---------------------------------------------------------------------------
125
+ /**
126
+ * Splits raw document text into an ordered array of `DocumentChunk` objects
127
+ * suitable for embedding and storage in a vector index.
128
+ *
129
+ * @example
130
+ * ```typescript
131
+ * const engine = new ChunkingEngine();
132
+ * const chunks = await engine.chunk(content, { strategy: 'fixed', chunkSize: 512 });
133
+ * ```
134
+ */
135
+ export class ChunkingEngine {
136
+ // -------------------------------------------------------------------------
137
+ // Public API
138
+ // -------------------------------------------------------------------------
139
+ /**
140
+ * Chunks the provided `content` string according to the given `options`.
141
+ *
142
+ * All strategy implementations are async to accommodate the optional
143
+ * `embedFn` used by the semantic strategy.
144
+ *
145
+ * @param content - Full document text to split.
146
+ * @param options - Chunking strategy and tuning parameters.
147
+ * @returns Ordered array of `DocumentChunk` objects with sequential indices.
148
+ */
149
+ async chunk(content, options) {
150
+ const chunkSize = options.chunkSize ?? DEFAULT_CHUNK_SIZE;
151
+ const chunkOverlap = options.chunkOverlap ?? DEFAULT_CHUNK_OVERLAP;
152
+ switch (options.strategy) {
153
+ case 'fixed':
154
+ return this._chunkFixed(content, chunkSize, chunkOverlap);
155
+ case 'semantic':
156
+ return this._chunkSemantic(content, chunkSize, chunkOverlap, options.embedFn);
157
+ case 'hierarchical':
158
+ return this._chunkHierarchical(content, chunkSize, chunkOverlap);
159
+ case 'layout':
160
+ return this._chunkLayout(content, chunkSize, chunkOverlap);
161
+ default: {
162
+ // TypeScript exhaustiveness guard.
163
+ const never = options.strategy;
164
+ throw new Error(`ChunkingEngine: unknown strategy "${String(never)}"`);
165
+ }
166
+ }
167
+ }
168
+ // -------------------------------------------------------------------------
169
+ // Strategy: fixed
170
+ // -------------------------------------------------------------------------
171
+ /**
172
+ * Splits content at a fixed character count with word-boundary awareness
173
+ * and configurable overlap between consecutive chunks.
174
+ *
175
+ * @param content - Text to split.
176
+ * @param chunkSize - Target character count per chunk.
177
+ * @param chunkOverlap - Overlap in characters between consecutive chunks.
178
+ * @returns Ordered `DocumentChunk[]`.
179
+ */
180
+ _chunkFixed(content, chunkSize, chunkOverlap) {
181
+ return fixedChunks(content, chunkSize, chunkOverlap, 0);
182
+ }
183
+ // -------------------------------------------------------------------------
184
+ // Strategy: semantic
185
+ // -------------------------------------------------------------------------
186
+ /**
187
+ * Embeds individual sentences and inserts split points wherever the cosine
188
+ * similarity between consecutive sentence embeddings drops below
189
+ * {@link SEMANTIC_SPLIT_THRESHOLD} (topic boundary heuristic).
190
+ *
191
+ * When `embedFn` is not supplied the method falls back to `_chunkFixed`.
192
+ *
193
+ * Any resulting group that exceeds `2 × chunkSize` characters is further
194
+ * sub-split with the fixed strategy.
195
+ *
196
+ * @param content - Text to split.
197
+ * @param chunkSize - Target character count per chunk.
198
+ * @param chunkOverlap - Overlap used when sub-splitting oversized groups.
199
+ * @param embedFn - Optional batch embedding function.
200
+ * @returns Ordered `DocumentChunk[]`.
201
+ */
202
+ async _chunkSemantic(content, chunkSize, chunkOverlap, embedFn) {
203
+ // No embedding function → fall back to fixed.
204
+ if (!embedFn) {
205
+ return this._chunkFixed(content, chunkSize, chunkOverlap);
206
+ }
207
+ // Split into sentences. We use two approaches to cover common patterns:
208
+ // 1. Lookbehind / lookahead regex (modern engines support this).
209
+ // 2. Simple split on terminal punctuation + whitespace as fallback.
210
+ let sentences;
211
+ try {
212
+ sentences = content.split(/(?<=[.!?])\s+(?=[A-Z])/).filter((s) => s.trim().length > 0);
213
+ }
214
+ catch {
215
+ // Safari / older engines may not support lookbehind — use simpler split.
216
+ sentences = content.split(/[.!?]\s+/).filter((s) => s.trim().length > 0);
217
+ }
218
+ // Degenerate case: no meaningful sentences.
219
+ if (sentences.length === 0) {
220
+ return this._chunkFixed(content, chunkSize, chunkOverlap);
221
+ }
222
+ // Single sentence: emit as one chunk.
223
+ if (sentences.length === 1) {
224
+ return [{ content: sentences[0].trim(), index: 0 }];
225
+ }
226
+ // Batch-embed all sentences.
227
+ const embeddings = await embedFn(sentences);
228
+ // Identify split points: positions BETWEEN sentence[i] and sentence[i+1]
229
+ // where similarity falls below the threshold.
230
+ const splitAfter = new Set();
231
+ for (let i = 0; i < sentences.length - 1; i++) {
232
+ const sim = cosineSimilarity(embeddings[i], embeddings[i + 1]);
233
+ if (sim < SEMANTIC_SPLIT_THRESHOLD) {
234
+ splitAfter.add(i);
235
+ }
236
+ }
237
+ // Group sentences into chunks.
238
+ const groups = [];
239
+ let currentGroup = [];
240
+ for (let i = 0; i < sentences.length; i++) {
241
+ currentGroup.push(sentences[i]);
242
+ if (splitAfter.has(i) || i === sentences.length - 1) {
243
+ groups.push(currentGroup);
244
+ currentGroup = [];
245
+ }
246
+ }
247
+ // Convert groups to DocumentChunks, sub-splitting oversized ones.
248
+ const maxGroupSize = chunkSize * SEMANTIC_MAX_CHUNK_MULTIPLIER;
249
+ const result = [];
250
+ let chunkIndex = 0;
251
+ for (const group of groups) {
252
+ const groupText = group.join(' ').trim();
253
+ if (groupText.length === 0)
254
+ continue;
255
+ if (groupText.length > maxGroupSize) {
256
+ // Sub-split the oversized group with fixed strategy.
257
+ const subChunks = fixedChunks(groupText, chunkSize, chunkOverlap, chunkIndex);
258
+ result.push(...subChunks);
259
+ chunkIndex += subChunks.length;
260
+ }
261
+ else {
262
+ result.push({ content: groupText, index: chunkIndex++ });
263
+ }
264
+ }
265
+ return result;
266
+ }
267
+ // -------------------------------------------------------------------------
268
+ // Strategy: hierarchical
269
+ // -------------------------------------------------------------------------
270
+ /**
271
+ * Recognises Markdown heading lines (`# H1`, `## H2`, …, `###### H6`) and
272
+ * creates a new chunk boundary at each heading. The heading text is stored
273
+ * in `DocumentChunk.heading` and its level in `metadata.headingLevel`.
274
+ *
275
+ * Sections whose text exceeds `chunkSize` are sub-split with the fixed
276
+ * strategy while preserving the heading metadata.
277
+ *
278
+ * @param content - Markdown-formatted text.
279
+ * @param chunkSize - Maximum characters per output chunk.
280
+ * @param chunkOverlap - Overlap used when sub-splitting oversized sections.
281
+ * @returns Ordered `DocumentChunk[]`.
282
+ */
283
+ _chunkHierarchical(content, chunkSize, chunkOverlap) {
284
+ const headingRegex = /^(#{1,6})\s+(.+)$/gm;
285
+ const sections = [];
286
+ // Track a heading stack to capture hierarchy context.
287
+ const headingStack = [];
288
+ // Find all heading match positions and slice between them.
289
+ let lastMatchEnd = 0;
290
+ let currentHeading;
291
+ let currentHeadingLevel;
292
+ let currentAncestors = [];
293
+ // Collect all matches first so we can slice between them.
294
+ const matches = [];
295
+ let m;
296
+ while ((m = headingRegex.exec(content)) !== null) {
297
+ matches.push({
298
+ index: m.index,
299
+ end: m.index + m[0].length,
300
+ level: m[1].length,
301
+ text: m[2].trim(),
302
+ });
303
+ }
304
+ if (matches.length === 0) {
305
+ // No headings — treat entire content as a single section.
306
+ sections.push({
307
+ heading: undefined,
308
+ headingLevel: undefined,
309
+ ancestorHeadings: [],
310
+ text: content,
311
+ });
312
+ }
313
+ else {
314
+ // Text before the first heading (preamble).
315
+ const preamble = content.slice(0, matches[0].index).trim();
316
+ if (preamble.length > 0) {
317
+ sections.push({
318
+ heading: undefined,
319
+ headingLevel: undefined,
320
+ ancestorHeadings: [],
321
+ text: preamble,
322
+ });
323
+ }
324
+ for (let i = 0; i < matches.length; i++) {
325
+ const match = matches[i];
326
+ const nextIndex = i + 1 < matches.length ? matches[i + 1].index : content.length;
327
+ // The body of this section is the text after the heading line.
328
+ const body = content.slice(match.end, nextIndex).trim();
329
+ // Update heading stack: pop entries at the same level or deeper.
330
+ while (headingStack.length > 0 && headingStack[headingStack.length - 1].level >= match.level) {
331
+ headingStack.pop();
332
+ }
333
+ const ancestors = headingStack.map((h) => h.text);
334
+ headingStack.push({ level: match.level, text: match.text });
335
+ sections.push({
336
+ heading: match.text,
337
+ headingLevel: match.level,
338
+ ancestorHeadings: ancestors,
339
+ text: body,
340
+ });
341
+ void lastMatchEnd; // suppress unused-variable lint
342
+ void currentHeading;
343
+ void currentHeadingLevel;
344
+ void currentAncestors;
345
+ lastMatchEnd = nextIndex;
346
+ currentHeading = match.text;
347
+ currentHeadingLevel = match.level;
348
+ currentAncestors = [...ancestors];
349
+ }
350
+ }
351
+ // Materialise sections into DocumentChunks.
352
+ const result = [];
353
+ let chunkIndex = 0;
354
+ for (const section of sections) {
355
+ const text = section.text;
356
+ if (text.length === 0 && section.heading === undefined)
357
+ continue;
358
+ // Build metadata common to all chunks from this section.
359
+ const sectionMeta = {};
360
+ if (section.headingLevel !== undefined) {
361
+ sectionMeta.headingLevel = section.headingLevel;
362
+ }
363
+ if (section.ancestorHeadings.length > 0) {
364
+ sectionMeta.ancestorHeadings = section.ancestorHeadings;
365
+ }
366
+ if (text.length === 0) {
367
+ // Heading with no body — emit an empty-content chunk.
368
+ result.push({
369
+ content: section.heading ?? '',
370
+ index: chunkIndex++,
371
+ heading: section.heading,
372
+ metadata: Object.keys(sectionMeta).length > 0 ? sectionMeta : undefined,
373
+ });
374
+ continue;
375
+ }
376
+ if (text.length <= chunkSize) {
377
+ // Fits in a single chunk.
378
+ result.push({
379
+ content: text,
380
+ index: chunkIndex++,
381
+ heading: section.heading,
382
+ metadata: Object.keys(sectionMeta).length > 0 ? sectionMeta : undefined,
383
+ });
384
+ }
385
+ else {
386
+ // Sub-split the section body with the fixed strategy, preserving heading
387
+ // metadata on every produced sub-chunk.
388
+ const subChunks = fixedChunks(text, chunkSize, chunkOverlap, chunkIndex, sectionMeta);
389
+ for (const sc of subChunks) {
390
+ result.push({
391
+ ...sc,
392
+ heading: section.heading,
393
+ });
394
+ chunkIndex++;
395
+ }
396
+ }
397
+ }
398
+ return result;
399
+ }
400
+ // -------------------------------------------------------------------------
401
+ // Strategy: layout
402
+ // -------------------------------------------------------------------------
403
+ /**
404
+ * Detects fenced code blocks (``` … ```) and pipe-delimited tables and
405
+ * emits each as an atomic chunk (never split mid-block). Surrounding prose
406
+ * is split with the fixed strategy.
407
+ *
408
+ * Chunk metadata:
409
+ * - Code blocks: `{ type: 'code' }`
410
+ * - Tables: `{ type: 'table' }`
411
+ * - Prose: no special metadata.
412
+ *
413
+ * @param content - Text potentially containing code blocks and tables.
414
+ * @param chunkSize - Target character count for prose chunks.
415
+ * @param chunkOverlap - Overlap for prose fixed-splits.
416
+ * @returns Ordered `DocumentChunk[]`.
417
+ */
418
+ _chunkLayout(content, chunkSize, chunkOverlap) {
419
+ const segments = [];
420
+ const lines = content.split('\n');
421
+ let i = 0;
422
+ while (i < lines.length) {
423
+ // ── Fenced code block ────────────────────────────────────────────────
424
+ if (/^```/.test(lines[i])) {
425
+ // Flush any preceding prose first (handled after the block ends).
426
+ const fence = lines[i].match(/^(`{3,})/)?.[1] ?? '```';
427
+ const blockLines = [lines[i]];
428
+ i++;
429
+ while (i < lines.length && !lines[i].startsWith(fence)) {
430
+ blockLines.push(lines[i]);
431
+ i++;
432
+ }
433
+ // Include the closing fence if present.
434
+ if (i < lines.length) {
435
+ blockLines.push(lines[i]);
436
+ i++;
437
+ }
438
+ segments.push({ kind: 'code', text: blockLines.join('\n') });
439
+ continue;
440
+ }
441
+ // ── Table block ───────────────────────────────────────────────────────
442
+ // A table is a contiguous run of lines where every non-blank line
443
+ // contains at least one `|` pipe character.
444
+ if (/\|/.test(lines[i])) {
445
+ const tableLines = [];
446
+ while (i < lines.length && (lines[i].trim() === '' || /\|/.test(lines[i]))) {
447
+ // Stop accumulating if we hit a blank line after table content.
448
+ if (lines[i].trim() === '' && tableLines.length > 0) {
449
+ break;
450
+ }
451
+ tableLines.push(lines[i]);
452
+ i++;
453
+ }
454
+ if (tableLines.some((l) => /\|/.test(l))) {
455
+ segments.push({ kind: 'table', text: tableLines.join('\n') });
456
+ }
457
+ else {
458
+ // No actual pipe content — treat as prose.
459
+ segments.push({ kind: 'prose', text: tableLines.join('\n') });
460
+ }
461
+ continue;
462
+ }
463
+ // ── Prose line ───────────────────────────────────────────────────────
464
+ // Accumulate lines until we hit a code fence or table.
465
+ const proseLines = [];
466
+ while (i < lines.length && !/^```/.test(lines[i]) && !/\|/.test(lines[i])) {
467
+ proseLines.push(lines[i]);
468
+ i++;
469
+ }
470
+ const proseText = proseLines.join('\n').trim();
471
+ if (proseText.length > 0) {
472
+ segments.push({ kind: 'prose', text: proseText });
473
+ }
474
+ }
475
+ // Convert segments to DocumentChunks.
476
+ const result = [];
477
+ let chunkIndex = 0;
478
+ for (const seg of segments) {
479
+ if (seg.text.trim().length === 0)
480
+ continue;
481
+ switch (seg.kind) {
482
+ case 'code':
483
+ result.push({
484
+ content: seg.text,
485
+ index: chunkIndex++,
486
+ metadata: { type: 'code' },
487
+ });
488
+ break;
489
+ case 'table':
490
+ result.push({
491
+ content: seg.text,
492
+ index: chunkIndex++,
493
+ metadata: { type: 'table' },
494
+ });
495
+ break;
496
+ case 'prose': {
497
+ // Split prose with the fixed strategy.
498
+ const proseChunks = fixedChunks(seg.text, chunkSize, chunkOverlap, chunkIndex);
499
+ result.push(...proseChunks);
500
+ chunkIndex += proseChunks.length;
501
+ break;
502
+ }
503
+ }
504
+ }
505
+ return result;
506
+ }
507
+ }
508
+ //# sourceMappingURL=ChunkingEngine.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ChunkingEngine.js","sourceRoot":"","sources":["../../../src/memory/ingestion/ChunkingEngine.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAuDH,8EAA8E;AAC9E,qBAAqB;AACrB,8EAA8E;AAE9E,+CAA+C;AAC/C,MAAM,kBAAkB,GAAG,GAAG,CAAC;AAE/B,gEAAgE;AAChE,MAAM,qBAAqB,GAAG,EAAE,CAAC;AAEjC;;;;GAIG;AACH,MAAM,wBAAwB,GAAG,GAAG,CAAC;AAErC;;;;GAIG;AACH,MAAM,6BAA6B,GAAG,CAAC,CAAC;AAExC,8EAA8E;AAC9E,6BAA6B;AAC7B,8EAA8E;AAE9E;;;;;;;;;GASG;AACH,SAAS,gBAAgB,CAAC,CAAW,EAAE,CAAW;IAChD,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,IAAI,IAAI,GAAG,CAAC,CAAC;IACb,IAAI,IAAI,GAAG,CAAC,CAAC;IACb,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAClC,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACnB,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACpB,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;IACtB,CAAC;IACD,OAAO,GAAG,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;AACxD,CAAC;AAED,8EAA8E;AAC9E,gEAAgE;AAChE,8EAA8E;AAE9E;;;;;;;;;;;;GAYG;AACH,SAAS,WAAW,CAClB,OAAe,EACf,SAAiB,EACjB,YAAoB,EACpB,aAAqB,CAAC,EACtB,YAAsC;IAEtC,MAAM,MAAM,GAAoB,EAAE,CAAC;IACnC,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,IAAI,UAAU,GAAG,UAAU,CAAC;IAE5B,OAAO,GAAG,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;QAC5B,wCAAwC;QACxC,IAAI,GAAG,GAAG,GAAG,GAAG,SAAS,CAAC;QAE1B,IAAI,GAAG,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;YAC1B,+DAA+D;YAC/D,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;YACxC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACrB,MAAM,CAAC,IAAI,CAAC;oBACV,OAAO,EAAE,KAAK;oBACd,KAAK,EAAE,UAAU,EAAE;oBACnB,GAAG,CAAC,YAAY,CAAC,CAAC,CAAC,EAAE,QAAQ,EAAE,EAAE,GAAG,YAAY,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;iBAC3D,CAAC,CAAC;YACL,CAAC;YACD,MAAM;QACR,CAAC;QAED,yEAAyE;QACzE,oCAAoC;QACpC,OAAO,GAAG,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC;YAC7C,GAAG,EAAE,CAAC;QACR,CAAC;QAED,iEAAiE;QACjE,IAAI,GAAG,KAAK,GAAG,EAAE,CAAC;YAChB,GAAG,GAAG,GAAG,GAAG,SAAS,CAAC;QACxB,CAAC;QAED,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;QAC7C,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACrB,MAAM,CAAC,IAAI,CAAC;gBACV,OAAO,EAAE,KAAK;gBACd,KAAK,EAAE,UAAU,EAAE;gBACnB,GAAG,CAAC,YAAY,CAAC,CAAC,CAAC,EAAE,QAAQ,EAAE,EAAE,GAAG,YAAY,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;aAC3D,CAAC,CAAC;QACL,CAAC;QAED,gEAAgE;QAChE,GAAG,GAAG,GAAG,GAAG,YAAY,CAAC;QACzB,IAAI,GAAG,IAAI,CAAC;YAAE,GAAG,GAAG,GAAG,CAAC,CAAC,+CAA+C;IAC1E,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,8EAA8E;AAC9E,iBAAiB;AACjB,8EAA8E;AAE9E;;;;;;;;;GASG;AACH,MAAM,OAAO,cAAc;IACzB,4EAA4E;IAC5E,aAAa;IACb,4EAA4E;IAE5E;;;;;;;;;OASG;IACH,KAAK,CAAC,KAAK,CAAC,OAAe,EAAE,OAAqB;QAChD,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,kBAAkB,CAAC;QAC1D,MAAM,YAAY,GAAG,OAAO,CAAC,YAAY,IAAI,qBAAqB,CAAC;QAEnE,QAAQ,OAAO,CAAC,QAAQ,EAAE,CAAC;YACzB,KAAK,OAAO;gBACV,OAAO,IAAI,CAAC,WAAW,CAAC,OAAO,EAAE,SAAS,EAAE,YAAY,CAAC,CAAC;YAE5D,KAAK,UAAU;gBACb,OAAO,IAAI,CAAC,cAAc,CAAC,OAAO,EAAE,SAAS,EAAE,YAAY,EAAE,OAAO,CAAC,OAAO,CAAC,CAAC;YAEhF,KAAK,cAAc;gBACjB,OAAO,IAAI,CAAC,kBAAkB,CAAC,OAAO,EAAE,SAAS,EAAE,YAAY,CAAC,CAAC;YAEnE,KAAK,QAAQ;gBACX,OAAO,IAAI,CAAC,YAAY,CAAC,OAAO,EAAE,SAAS,EAAE,YAAY,CAAC,CAAC;YAE7D,OAAO,CAAC,CAAC,CAAC;gBACR,mCAAmC;gBACnC,MAAM,KAAK,GAAU,OAAO,CAAC,QAAQ,CAAC;gBACtC,MAAM,IAAI,KAAK,CAAC,qCAAqC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;YACzE,CAAC;QACH,CAAC;IACH,CAAC;IAED,4EAA4E;IAC5E,kBAAkB;IAClB,4EAA4E;IAE5E;;;;;;;;OAQG;IACK,WAAW,CACjB,OAAe,EACf,SAAiB,EACjB,YAAoB;QAEpB,OAAO,WAAW,CAAC,OAAO,EAAE,SAAS,EAAE,YAAY,EAAE,CAAC,CAAC,CAAC;IAC1D,CAAC;IAED,4EAA4E;IAC5E,qBAAqB;IACrB,4EAA4E;IAE5E;;;;;;;;;;;;;;;OAeG;IACK,KAAK,CAAC,cAAc,CAC1B,OAAe,EACf,SAAiB,EACjB,YAAoB,EACpB,OAAkD;QAElD,8CAA8C;QAC9C,IAAI,CAAC,OAAO,EAAE,CAAC;YACb,OAAO,IAAI,CAAC,WAAW,CAAC,OAAO,EAAE,SAAS,EAAE,YAAY,CAAC,CAAC;QAC5D,CAAC;QAED,yEAAyE;QACzE,mEAAmE;QACnE,sEAAsE;QACtE,IAAI,SAAmB,CAAC;QACxB,IAAI,CAAC;YACH,SAAS,GAAG,OAAO,CAAC,KAAK,CAAC,wBAAwB,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QACzF,CAAC;QAAC,MAAM,CAAC;YACP,yEAAyE;YACzE,SAAS,GAAG,OAAO,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAC3E,CAAC;QAED,4CAA4C;QAC5C,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC3B,OAAO,IAAI,CAAC,WAAW,CAAC,OAAO,EAAE,SAAS,EAAE,YAAY,CAAC,CAAC;QAC5D,CAAC;QAED,sCAAsC;QACtC,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC3B,OAAO,CAAC,EAAE,OAAO,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,CAAC;QACtD,CAAC;QAED,6BAA6B;QAC7B,MAAM,UAAU,GAAG,MAAM,OAAO,CAAC,SAAS,CAAC,CAAC;QAE5C,yEAAyE;QACzE,8CAA8C;QAC9C,MAAM,UAAU,GAAG,IAAI,GAAG,EAAU,CAAC;QACrC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAC9C,MAAM,GAAG,GAAG,gBAAgB,CAAC,UAAU,CAAC,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;YAC/D,IAAI,GAAG,GAAG,wBAAwB,EAAE,CAAC;gBACnC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;YACpB,CAAC;QACH,CAAC;QAED,+BAA+B;QAC/B,MAAM,MAAM,GAAe,EAAE,CAAC;QAC9B,IAAI,YAAY,GAAa,EAAE,CAAC;QAChC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC1C,YAAY,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC;YAChC,IAAI,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACpD,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;gBAC1B,YAAY,GAAG,EAAE,CAAC;YACpB,CAAC;QACH,CAAC;QAED,kEAAkE;QAClE,MAAM,YAAY,GAAG,SAAS,GAAG,6BAA6B,CAAC;QAC/D,MAAM,MAAM,GAAoB,EAAE,CAAC;QACnC,IAAI,UAAU,GAAG,CAAC,CAAC;QAEnB,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC3B,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;YACzC,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC;gBAAE,SAAS;YAErC,IAAI,SAAS,CAAC,MAAM,GAAG,YAAY,EAAE,CAAC;gBACpC,qDAAqD;gBACrD,MAAM,SAAS,GAAG,WAAW,CAAC,SAAS,EAAE,SAAS,EAAE,YAAY,EAAE,UAAU,CAAC,CAAC;gBAC9E,MAAM,CAAC,IAAI,CAAC,GAAG,SAAS,CAAC,CAAC;gBAC1B,UAAU,IAAI,SAAS,CAAC,MAAM,CAAC;YACjC,CAAC;iBAAM,CAAC;gBACN,MAAM,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,SAAS,EAAE,KAAK,EAAE,UAAU,EAAE,EAAE,CAAC,CAAC;YAC3D,CAAC;QACH,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,4EAA4E;IAC5E,yBAAyB;IACzB,4EAA4E;IAE5E;;;;;;;;;;;;OAYG;IACK,kBAAkB,CACxB,OAAe,EACf,SAAiB,EACjB,YAAoB;QAEpB,MAAM,YAAY,GAAG,qBAAqB,CAAC;QAW3C,MAAM,QAAQ,GAAc,EAAE,CAAC;QAE/B,sDAAsD;QACtD,MAAM,YAAY,GAA2C,EAAE,CAAC;QAEhE,2DAA2D;QAC3D,IAAI,YAAY,GAAG,CAAC,CAAC;QACrB,IAAI,cAAkC,CAAC;QACvC,IAAI,mBAAuC,CAAC;QAC5C,IAAI,gBAAgB,GAAa,EAAE,CAAC;QAEpC,0DAA0D;QAC1D,MAAM,OAAO,GAAuE,EAAE,CAAC;QACvF,IAAI,CAAyB,CAAC;QAC9B,OAAO,CAAC,CAAC,GAAG,YAAY,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;YACjD,OAAO,CAAC,IAAI,CAAC;gBACX,KAAK,EAAE,CAAC,CAAC,KAAK;gBACd,GAAG,EAAE,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM;gBAC1B,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM;gBAClB,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE;aAClB,CAAC,CAAC;QACL,CAAC;QAED,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACzB,0DAA0D;YAC1D,QAAQ,CAAC,IAAI,CAAC;gBACZ,OAAO,EAAE,SAAS;gBAClB,YAAY,EAAE,SAAS;gBACvB,gBAAgB,EAAE,EAAE;gBACpB,IAAI,EAAE,OAAO;aACd,CAAC,CAAC;QACL,CAAC;aAAM,CAAC;YACN,4CAA4C;YAC5C,MAAM,QAAQ,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,EAAE,CAAC;YAC3D,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACxB,QAAQ,CAAC,IAAI,CAAC;oBACZ,OAAO,EAAE,SAAS;oBAClB,YAAY,EAAE,SAAS;oBACvB,gBAAgB,EAAE,EAAE;oBACpB,IAAI,EAAE,QAAQ;iBACf,CAAC,CAAC;YACL,CAAC;YAED,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACxC,MAAM,KAAK,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;gBACzB,MAAM,SAAS,GAAG,CAAC,GAAG,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC;gBACjF,+DAA+D;gBAC/D,MAAM,IAAI,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,EAAE,SAAS,CAAC,CAAC,IAAI,EAAE,CAAC;gBAExD,iEAAiE;gBACjE,OAAO,YAAY,CAAC,MAAM,GAAG,CAAC,IAAI,YAAY,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,KAAK,IAAI,KAAK,CAAC,KAAK,EAAE,CAAC;oBAC7F,YAAY,CAAC,GAAG,EAAE,CAAC;gBACrB,CAAC;gBACD,MAAM,SAAS,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;gBAClD,YAAY,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,KAAK,CAAC,KAAK,EAAE,IAAI,EAAE,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC;gBAE5D,QAAQ,CAAC,IAAI,CAAC;oBACZ,OAAO,EAAE,KAAK,CAAC,IAAI;oBACnB,YAAY,EAAE,KAAK,CAAC,KAAK;oBACzB,gBAAgB,EAAE,SAAS;oBAC3B,IAAI,EAAE,IAAI;iBACX,CAAC,CAAC;gBAEH,KAAK,YAAY,CAAC,CAAC,gCAAgC;gBACnD,KAAK,cAAc,CAAC;gBACpB,KAAK,mBAAmB,CAAC;gBACzB,KAAK,gBAAgB,CAAC;gBACtB,YAAY,GAAG,SAAS,CAAC;gBACzB,cAAc,GAAG,KAAK,CAAC,IAAI,CAAC;gBAC5B,mBAAmB,GAAG,KAAK,CAAC,KAAK,CAAC;gBAClC,gBAAgB,GAAG,CAAC,GAAG,SAAS,CAAC,CAAC;YACpC,CAAC;QACH,CAAC;QAED,4CAA4C;QAC5C,MAAM,MAAM,GAAoB,EAAE,CAAC;QACnC,IAAI,UAAU,GAAG,CAAC,CAAC;QAEnB,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;YAC/B,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC;YAC1B,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,IAAI,OAAO,CAAC,OAAO,KAAK,SAAS;gBAAE,SAAS;YAEjE,yDAAyD;YACzD,MAAM,WAAW,GAA4B,EAAE,CAAC;YAChD,IAAI,OAAO,CAAC,YAAY,KAAK,SAAS,EAAE,CAAC;gBACvC,WAAW,CAAC,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC;YAClD,CAAC;YACD,IAAI,OAAO,CAAC,gBAAgB,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACxC,WAAW,CAAC,gBAAgB,GAAG,OAAO,CAAC,gBAAgB,CAAC;YAC1D,CAAC;YAED,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACtB,sDAAsD;gBACtD,MAAM,CAAC,IAAI,CAAC;oBACV,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,EAAE;oBAC9B,KAAK,EAAE,UAAU,EAAE;oBACnB,OAAO,EAAE,OAAO,CAAC,OAAO;oBACxB,QAAQ,EAAE,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,SAAS;iBACxE,CAAC,CAAC;gBACH,SAAS;YACX,CAAC;YAED,IAAI,IAAI,CAAC,MAAM,IAAI,SAAS,EAAE,CAAC;gBAC7B,0BAA0B;gBAC1B,MAAM,CAAC,IAAI,CAAC;oBACV,OAAO,EAAE,IAAI;oBACb,KAAK,EAAE,UAAU,EAAE;oBACnB,OAAO,EAAE,OAAO,CAAC,OAAO;oBACxB,QAAQ,EAAE,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,SAAS;iBACxE,CAAC,CAAC;YACL,CAAC;iBAAM,CAAC;gBACN,yEAAyE;gBACzE,wCAAwC;gBACxC,MAAM,SAAS,GAAG,WAAW,CAAC,IAAI,EAAE,SAAS,EAAE,YAAY,EAAE,UAAU,EAAE,WAAW,CAAC,CAAC;gBACtF,KAAK,MAAM,EAAE,IAAI,SAAS,EAAE,CAAC;oBAC3B,MAAM,CAAC,IAAI,CAAC;wBACV,GAAG,EAAE;wBACL,OAAO,EAAE,OAAO,CAAC,OAAO;qBACzB,CAAC,CAAC;oBACH,UAAU,EAAE,CAAC;gBACf,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,4EAA4E;IAC5E,mBAAmB;IACnB,4EAA4E;IAE5E;;;;;;;;;;;;;;OAcG;IACK,YAAY,CAClB,OAAe,EACf,SAAiB,EACjB,YAAoB;QAWpB,MAAM,QAAQ,GAAc,EAAE,CAAC;QAC/B,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAClC,IAAI,CAAC,GAAG,CAAC,CAAC;QAEV,OAAO,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;YACxB,wEAAwE;YACxE,IAAI,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;gBAC1B,kEAAkE;gBAClE,MAAM,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,UAAU,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC;gBACvD,MAAM,UAAU,GAAa,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;gBACxC,CAAC,EAAE,CAAC;gBACJ,OAAO,CAAC,GAAG,KAAK,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,KAAK,CAAC,EAAE,CAAC;oBACvD,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;oBAC1B,CAAC,EAAE,CAAC;gBACN,CAAC;gBACD,wCAAwC;gBACxC,IAAI,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;oBACrB,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;oBAC1B,CAAC,EAAE,CAAC;gBACN,CAAC;gBACD,QAAQ,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBAC7D,SAAS;YACX,CAAC;YAED,yEAAyE;YACzE,kEAAkE;YAClE,4CAA4C;YAC5C,IAAI,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;gBACxB,MAAM,UAAU,GAAa,EAAE,CAAC;gBAChC,OAAO,CAAC,GAAG,KAAK,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,IAAI,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;oBAC3E,gEAAgE;oBAChE,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;wBACpD,MAAM;oBACR,CAAC;oBACD,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;oBAC1B,CAAC,EAAE,CAAC;gBACN,CAAC;gBACD,IAAI,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;oBACzC,QAAQ,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBAChE,CAAC;qBAAM,CAAC;oBACN,2CAA2C;oBAC3C,QAAQ,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBAChE,CAAC;gBACD,SAAS;YACX,CAAC;YAED,wEAAwE;YACxE,uDAAuD;YACvD,MAAM,UAAU,GAAa,EAAE,CAAC;YAChC,OAAO,CAAC,GAAG,KAAK,CAAC,MAAM,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;gBAC1E,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;gBAC1B,CAAC,EAAE,CAAC;YACN,CAAC;YACD,MAAM,SAAS,GAAG,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;YAC/C,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACzB,QAAQ,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC,CAAC;YACpD,CAAC;QACH,CAAC;QAED,sCAAsC;QACtC,MAAM,MAAM,GAAoB,EAAE,CAAC;QACnC,IAAI,UAAU,GAAG,CAAC,CAAC;QAEnB,KAAK,MAAM,GAAG,IAAI,QAAQ,EAAE,CAAC;YAC3B,IAAI,GAAG,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC;gBAAE,SAAS;YAE3C,QAAQ,GAAG,CAAC,IAAI,EAAE,CAAC;gBACjB,KAAK,MAAM;oBACT,MAAM,CAAC,IAAI,CAAC;wBACV,OAAO,EAAE,GAAG,CAAC,IAAI;wBACjB,KAAK,EAAE,UAAU,EAAE;wBACnB,QAAQ,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE;qBAC3B,CAAC,CAAC;oBACH,MAAM;gBAER,KAAK,OAAO;oBACV,MAAM,CAAC,IAAI,CAAC;wBACV,OAAO,EAAE,GAAG,CAAC,IAAI;wBACjB,KAAK,EAAE,UAAU,EAAE;wBACnB,QAAQ,EAAE,EAAE,IAAI,EAAE,OAAO,EAAE;qBAC5B,CAAC,CAAC;oBACH,MAAM;gBAER,KAAK,OAAO,CAAC,CAAC,CAAC;oBACb,uCAAuC;oBACvC,MAAM,WAAW,GAAG,WAAW,CAAC,GAAG,CAAC,IAAI,EAAE,SAAS,EAAE,YAAY,EAAE,UAAU,CAAC,CAAC;oBAC/E,MAAM,CAAC,IAAI,CAAC,GAAG,WAAW,CAAC,CAAC;oBAC5B,UAAU,IAAI,WAAW,CAAC,MAAM,CAAC;oBACjC,MAAM;gBACR,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;CACF"}
@@ -0,0 +1,44 @@
1
+ /**
2
+ * @fileoverview DoclingLoader — high-fidelity PDF/DOCX extraction via Python Docling.
3
+ *
4
+ * Docling (https://github.com/DS4SD/docling) is an IBM Research open-source
5
+ * library that converts PDFs and office documents to structured JSON, preserving
6
+ * tables, figures, and layout information far beyond what pure-JS text extraction
7
+ * can achieve.
8
+ *
9
+ * This module provides a factory function {@link createDoclingLoader} that:
10
+ * 1. Checks whether `python3 -m docling --version` succeeds in the current PATH.
11
+ * 2. If it does, returns a {@link DoclingLoader} instance that spawns a
12
+ * `python3 -m docling` subprocess for each document.
13
+ * 3. If Docling is not installed, returns `null` gracefully.
14
+ *
15
+ * ### Opting in
16
+ * ```sh
17
+ * pip install docling
18
+ * ```
19
+ *
20
+ * @module memory/ingestion/DoclingLoader
21
+ */
22
+ import type { IDocumentLoader } from './IDocumentLoader.js';
23
+ /**
24
+ * Checks whether `python3 -m docling` is available in the current environment
25
+ * and, if so, returns a new {@link DoclingLoader} instance; otherwise returns
26
+ * `null`.
27
+ *
28
+ * The availability check runs `python3 -m docling --version` synchronously
29
+ * via `spawnSync` — it exits quickly and is only called once during registry
30
+ * initialisation.
31
+ *
32
+ * ### Usage
33
+ * ```ts
34
+ * import { createDoclingLoader } from './DoclingLoader.js';
35
+ * import { PdfLoader } from './PdfLoader.js';
36
+ *
37
+ * const doclingLoader = createDoclingLoader();
38
+ * const loader = new PdfLoader(null, doclingLoader);
39
+ * ```
40
+ *
41
+ * @returns A `DoclingLoader` instance when Docling is installed, or `null`.
42
+ */
43
+ export declare function createDoclingLoader(): IDocumentLoader | null;
44
+ //# sourceMappingURL=DoclingLoader.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"DoclingLoader.d.ts","sourceRoot":"","sources":["../../../src/memory/ingestion/DoclingLoader.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAMH,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAC;AAgO5D;;;;;;;;;;;;;;;;;;;GAmBG;AACH,wBAAgB,mBAAmB,IAAI,eAAe,GAAG,IAAI,CAgB5D"}