@framers/agentos 0.1.101 → 0.1.102

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (176) hide show
  1. package/README.md +16 -0
  2. package/dist/memory/config.d.ts +39 -0
  3. package/dist/memory/config.d.ts.map +1 -1
  4. package/dist/memory/config.js.map +1 -1
  5. package/dist/memory/consolidation/ConsolidationLoop.d.ts +177 -0
  6. package/dist/memory/consolidation/ConsolidationLoop.d.ts.map +1 -0
  7. package/dist/memory/consolidation/ConsolidationLoop.js +517 -0
  8. package/dist/memory/consolidation/ConsolidationLoop.js.map +1 -0
  9. package/dist/memory/consolidation/ConsolidationPipeline.d.ts.map +1 -1
  10. package/dist/memory/consolidation/ConsolidationPipeline.js +7 -0
  11. package/dist/memory/consolidation/ConsolidationPipeline.js.map +1 -1
  12. package/dist/memory/consolidation/index.d.ts +8 -0
  13. package/dist/memory/consolidation/index.d.ts.map +1 -0
  14. package/dist/memory/consolidation/index.js +7 -0
  15. package/dist/memory/consolidation/index.js.map +1 -0
  16. package/dist/memory/decay/DecayModel.d.ts +33 -0
  17. package/dist/memory/decay/DecayModel.d.ts.map +1 -1
  18. package/dist/memory/decay/DecayModel.js +31 -0
  19. package/dist/memory/decay/DecayModel.js.map +1 -1
  20. package/dist/memory/facade/Memory.d.ts +228 -0
  21. package/dist/memory/facade/Memory.d.ts.map +1 -0
  22. package/dist/memory/facade/Memory.js +823 -0
  23. package/dist/memory/facade/Memory.js.map +1 -0
  24. package/dist/memory/facade/index.d.ts +13 -0
  25. package/dist/memory/facade/index.d.ts.map +1 -0
  26. package/dist/memory/facade/index.js +11 -0
  27. package/dist/memory/facade/index.js.map +1 -0
  28. package/dist/memory/facade/types.d.ts +606 -0
  29. package/dist/memory/facade/types.d.ts.map +1 -0
  30. package/dist/memory/facade/types.js +11 -0
  31. package/dist/memory/facade/types.js.map +1 -0
  32. package/dist/memory/feedback/RetrievalFeedbackSignal.d.ts +132 -0
  33. package/dist/memory/feedback/RetrievalFeedbackSignal.d.ts.map +1 -0
  34. package/dist/memory/feedback/RetrievalFeedbackSignal.js +178 -0
  35. package/dist/memory/feedback/RetrievalFeedbackSignal.js.map +1 -0
  36. package/dist/memory/feedback/index.d.ts +13 -0
  37. package/dist/memory/feedback/index.d.ts.map +1 -0
  38. package/dist/memory/feedback/index.js +12 -0
  39. package/dist/memory/feedback/index.js.map +1 -0
  40. package/dist/memory/index.d.ts +22 -0
  41. package/dist/memory/index.d.ts.map +1 -1
  42. package/dist/memory/index.js +24 -0
  43. package/dist/memory/index.js.map +1 -1
  44. package/dist/memory/ingestion/ChunkingEngine.d.ts +143 -0
  45. package/dist/memory/ingestion/ChunkingEngine.d.ts.map +1 -0
  46. package/dist/memory/ingestion/ChunkingEngine.js +508 -0
  47. package/dist/memory/ingestion/ChunkingEngine.js.map +1 -0
  48. package/dist/memory/ingestion/DoclingLoader.d.ts +44 -0
  49. package/dist/memory/ingestion/DoclingLoader.d.ts.map +1 -0
  50. package/dist/memory/ingestion/DoclingLoader.js +228 -0
  51. package/dist/memory/ingestion/DoclingLoader.js.map +1 -0
  52. package/dist/memory/ingestion/DocxLoader.d.ts +37 -0
  53. package/dist/memory/ingestion/DocxLoader.d.ts.map +1 -0
  54. package/dist/memory/ingestion/DocxLoader.js +111 -0
  55. package/dist/memory/ingestion/DocxLoader.js.map +1 -0
  56. package/dist/memory/ingestion/FolderScanner.d.ts +116 -0
  57. package/dist/memory/ingestion/FolderScanner.d.ts.map +1 -0
  58. package/dist/memory/ingestion/FolderScanner.js +127 -0
  59. package/dist/memory/ingestion/FolderScanner.js.map +1 -0
  60. package/dist/memory/ingestion/HtmlLoader.d.ts +49 -0
  61. package/dist/memory/ingestion/HtmlLoader.d.ts.map +1 -0
  62. package/dist/memory/ingestion/HtmlLoader.js +202 -0
  63. package/dist/memory/ingestion/HtmlLoader.js.map +1 -0
  64. package/dist/memory/ingestion/IDocumentLoader.d.ts +63 -0
  65. package/dist/memory/ingestion/IDocumentLoader.d.ts.map +1 -0
  66. package/dist/memory/ingestion/IDocumentLoader.js +11 -0
  67. package/dist/memory/ingestion/IDocumentLoader.js.map +1 -0
  68. package/dist/memory/ingestion/LoaderRegistry.d.ts +140 -0
  69. package/dist/memory/ingestion/LoaderRegistry.d.ts.map +1 -0
  70. package/dist/memory/ingestion/LoaderRegistry.js +229 -0
  71. package/dist/memory/ingestion/LoaderRegistry.js.map +1 -0
  72. package/dist/memory/ingestion/MarkdownLoader.d.ts +50 -0
  73. package/dist/memory/ingestion/MarkdownLoader.d.ts.map +1 -0
  74. package/dist/memory/ingestion/MarkdownLoader.js +169 -0
  75. package/dist/memory/ingestion/MarkdownLoader.js.map +1 -0
  76. package/dist/memory/ingestion/MultimodalAggregator.d.ts +88 -0
  77. package/dist/memory/ingestion/MultimodalAggregator.d.ts.map +1 -0
  78. package/dist/memory/ingestion/MultimodalAggregator.js +96 -0
  79. package/dist/memory/ingestion/MultimodalAggregator.js.map +1 -0
  80. package/dist/memory/ingestion/OcrPdfLoader.d.ts +41 -0
  81. package/dist/memory/ingestion/OcrPdfLoader.d.ts.map +1 -0
  82. package/dist/memory/ingestion/OcrPdfLoader.js +149 -0
  83. package/dist/memory/ingestion/OcrPdfLoader.js.map +1 -0
  84. package/dist/memory/ingestion/PdfLoader.d.ts +78 -0
  85. package/dist/memory/ingestion/PdfLoader.d.ts.map +1 -0
  86. package/dist/memory/ingestion/PdfLoader.js +179 -0
  87. package/dist/memory/ingestion/PdfLoader.js.map +1 -0
  88. package/dist/memory/ingestion/TextLoader.d.ts +66 -0
  89. package/dist/memory/ingestion/TextLoader.d.ts.map +1 -0
  90. package/dist/memory/ingestion/TextLoader.js +207 -0
  91. package/dist/memory/ingestion/TextLoader.js.map +1 -0
  92. package/dist/memory/ingestion/UrlLoader.d.ts +95 -0
  93. package/dist/memory/ingestion/UrlLoader.d.ts.map +1 -0
  94. package/dist/memory/ingestion/UrlLoader.js +174 -0
  95. package/dist/memory/ingestion/UrlLoader.js.map +1 -0
  96. package/dist/memory/io/ChatGptImporter.d.ts +85 -0
  97. package/dist/memory/io/ChatGptImporter.d.ts.map +1 -0
  98. package/dist/memory/io/ChatGptImporter.js +231 -0
  99. package/dist/memory/io/ChatGptImporter.js.map +1 -0
  100. package/dist/memory/io/JsonExporter.d.ts +67 -0
  101. package/dist/memory/io/JsonExporter.d.ts.map +1 -0
  102. package/dist/memory/io/JsonExporter.js +132 -0
  103. package/dist/memory/io/JsonExporter.js.map +1 -0
  104. package/dist/memory/io/JsonImporter.d.ts +84 -0
  105. package/dist/memory/io/JsonImporter.d.ts.map +1 -0
  106. package/dist/memory/io/JsonImporter.js +234 -0
  107. package/dist/memory/io/JsonImporter.js.map +1 -0
  108. package/dist/memory/io/MarkdownExporter.d.ts +95 -0
  109. package/dist/memory/io/MarkdownExporter.d.ts.map +1 -0
  110. package/dist/memory/io/MarkdownExporter.js +130 -0
  111. package/dist/memory/io/MarkdownExporter.js.map +1 -0
  112. package/dist/memory/io/MarkdownImporter.d.ts +84 -0
  113. package/dist/memory/io/MarkdownImporter.d.ts.map +1 -0
  114. package/dist/memory/io/MarkdownImporter.js +166 -0
  115. package/dist/memory/io/MarkdownImporter.js.map +1 -0
  116. package/dist/memory/io/ObsidianExporter.d.ts +80 -0
  117. package/dist/memory/io/ObsidianExporter.d.ts.map +1 -0
  118. package/dist/memory/io/ObsidianExporter.js +127 -0
  119. package/dist/memory/io/ObsidianExporter.js.map +1 -0
  120. package/dist/memory/io/ObsidianImporter.d.ts +93 -0
  121. package/dist/memory/io/ObsidianImporter.d.ts.map +1 -0
  122. package/dist/memory/io/ObsidianImporter.js +221 -0
  123. package/dist/memory/io/ObsidianImporter.js.map +1 -0
  124. package/dist/memory/io/SqliteExporter.d.ts +47 -0
  125. package/dist/memory/io/SqliteExporter.d.ts.map +1 -0
  126. package/dist/memory/io/SqliteExporter.js +56 -0
  127. package/dist/memory/io/SqliteExporter.js.map +1 -0
  128. package/dist/memory/io/SqliteImporter.d.ts +82 -0
  129. package/dist/memory/io/SqliteImporter.d.ts.map +1 -0
  130. package/dist/memory/io/SqliteImporter.js +232 -0
  131. package/dist/memory/io/SqliteImporter.js.map +1 -0
  132. package/dist/memory/io/index.d.ts +31 -0
  133. package/dist/memory/io/index.d.ts.map +1 -0
  134. package/dist/memory/io/index.js +31 -0
  135. package/dist/memory/io/index.js.map +1 -0
  136. package/dist/memory/store/SqliteBrain.d.ts +125 -0
  137. package/dist/memory/store/SqliteBrain.d.ts.map +1 -0
  138. package/dist/memory/store/SqliteBrain.js +407 -0
  139. package/dist/memory/store/SqliteBrain.js.map +1 -0
  140. package/dist/memory/store/SqliteKnowledgeGraph.d.ts +259 -0
  141. package/dist/memory/store/SqliteKnowledgeGraph.d.ts.map +1 -0
  142. package/dist/memory/store/SqliteKnowledgeGraph.js +1062 -0
  143. package/dist/memory/store/SqliteKnowledgeGraph.js.map +1 -0
  144. package/dist/memory/store/SqliteMemoryGraph.d.ts +251 -0
  145. package/dist/memory/store/SqliteMemoryGraph.d.ts.map +1 -0
  146. package/dist/memory/store/SqliteMemoryGraph.js +637 -0
  147. package/dist/memory/store/SqliteMemoryGraph.js.map +1 -0
  148. package/dist/memory/tools/MemoryAddTool.d.ts +98 -0
  149. package/dist/memory/tools/MemoryAddTool.d.ts.map +1 -0
  150. package/dist/memory/tools/MemoryAddTool.js +131 -0
  151. package/dist/memory/tools/MemoryAddTool.js.map +1 -0
  152. package/dist/memory/tools/MemoryDeleteTool.d.ts +83 -0
  153. package/dist/memory/tools/MemoryDeleteTool.d.ts.map +1 -0
  154. package/dist/memory/tools/MemoryDeleteTool.js +96 -0
  155. package/dist/memory/tools/MemoryDeleteTool.js.map +1 -0
  156. package/dist/memory/tools/MemoryMergeTool.d.ts +95 -0
  157. package/dist/memory/tools/MemoryMergeTool.d.ts.map +1 -0
  158. package/dist/memory/tools/MemoryMergeTool.js +164 -0
  159. package/dist/memory/tools/MemoryMergeTool.js.map +1 -0
  160. package/dist/memory/tools/MemoryReflectTool.d.ts +86 -0
  161. package/dist/memory/tools/MemoryReflectTool.d.ts.map +1 -0
  162. package/dist/memory/tools/MemoryReflectTool.js +102 -0
  163. package/dist/memory/tools/MemoryReflectTool.js.map +1 -0
  164. package/dist/memory/tools/MemorySearchTool.d.ts +117 -0
  165. package/dist/memory/tools/MemorySearchTool.d.ts.map +1 -0
  166. package/dist/memory/tools/MemorySearchTool.js +162 -0
  167. package/dist/memory/tools/MemorySearchTool.js.map +1 -0
  168. package/dist/memory/tools/MemoryUpdateTool.d.ts +92 -0
  169. package/dist/memory/tools/MemoryUpdateTool.d.ts.map +1 -0
  170. package/dist/memory/tools/MemoryUpdateTool.js +125 -0
  171. package/dist/memory/tools/MemoryUpdateTool.js.map +1 -0
  172. package/dist/memory/tools/index.d.ts +32 -0
  173. package/dist/memory/tools/index.d.ts.map +1 -0
  174. package/dist/memory/tools/index.js +26 -0
  175. package/dist/memory/tools/index.js.map +1 -0
  176. package/package.json +6 -1
@@ -0,0 +1,228 @@
1
+ /**
2
+ * @fileoverview DoclingLoader — high-fidelity PDF/DOCX extraction via Python Docling.
3
+ *
4
+ * Docling (https://github.com/DS4SD/docling) is an IBM Research open-source
5
+ * library that converts PDFs and office documents to structured JSON, preserving
6
+ * tables, figures, and layout information far beyond what pure-JS text extraction
7
+ * can achieve.
8
+ *
9
+ * This module provides a factory function {@link createDoclingLoader} that:
10
+ * 1. Checks whether `python3 -m docling --version` succeeds in the current PATH.
11
+ * 2. If it does, returns a {@link DoclingLoader} instance that spawns a
12
+ * `python3 -m docling` subprocess for each document.
13
+ * 3. If Docling is not installed, returns `null` gracefully.
14
+ *
15
+ * ### Opting in
16
+ * ```sh
17
+ * pip install docling
18
+ * ```
19
+ *
20
+ * @module memory/ingestion/DoclingLoader
21
+ */
22
+ import { spawn, spawnSync } from 'node:child_process';
23
+ import path from 'node:path';
24
+ import os from 'node:os';
25
+ import fs from 'node:fs/promises';
26
+ // ---------------------------------------------------------------------------
27
+ // Constants
28
+ // ---------------------------------------------------------------------------
29
+ /** Extensions this loader can handle (Docling supports PDF and DOCX). */
30
+ const SUPPORTED_EXTENSIONS = ['.pdf', '.docx'];
31
+ // ---------------------------------------------------------------------------
32
+ // Helpers
33
+ // ---------------------------------------------------------------------------
34
+ /**
35
+ * Returns the lower-cased extension (with dot) of a file path.
36
+ *
37
+ * @param filePath - Absolute or relative file path.
38
+ */
39
+ function extOf(filePath) {
40
+ return path.extname(filePath).toLowerCase();
41
+ }
42
+ // ---------------------------------------------------------------------------
43
+ // DoclingLoader (internal class)
44
+ // ---------------------------------------------------------------------------
45
+ /**
46
+ * High-fidelity document loader that delegates to a `python3 -m docling`
47
+ * subprocess.
48
+ *
49
+ * Consumers should use {@link createDoclingLoader} rather than constructing
50
+ * this class directly so that the Python availability check is always run
51
+ * before first use.
52
+ *
53
+ * @implements {IDocumentLoader}
54
+ */
55
+ class DoclingLoader {
56
+ constructor() {
57
+ /** @inheritdoc */
58
+ this.supportedExtensions = [...SUPPORTED_EXTENSIONS];
59
+ }
60
+ // -------------------------------------------------------------------------
61
+ // canLoad
62
+ // -------------------------------------------------------------------------
63
+ /** @inheritdoc */
64
+ canLoad(source) {
65
+ if (Buffer.isBuffer(source)) {
66
+ // Without an extension we can't determine compatibility from bytes alone.
67
+ return false;
68
+ }
69
+ return SUPPORTED_EXTENSIONS.includes(extOf(source));
70
+ }
71
+ // -------------------------------------------------------------------------
72
+ // load
73
+ // -------------------------------------------------------------------------
74
+ /** @inheritdoc */
75
+ async load(source, _options) {
76
+ let filePath;
77
+ let tempFile = null;
78
+ if (Buffer.isBuffer(source)) {
79
+ // Write buffer to a temp file so Docling has a real path to read.
80
+ tempFile = path.join(os.tmpdir(), `docling-input-${Date.now()}.pdf`);
81
+ await fs.writeFile(tempFile, source);
82
+ filePath = tempFile;
83
+ }
84
+ else {
85
+ filePath = source;
86
+ }
87
+ try {
88
+ const jsonOutput = await this._runDocling(filePath);
89
+ return this._mapToLoadedDocument(jsonOutput, Buffer.isBuffer(source) ? undefined : source);
90
+ }
91
+ finally {
92
+ // Clean up any temp file we created.
93
+ if (tempFile !== null) {
94
+ await fs.unlink(tempFile).catch(() => { });
95
+ }
96
+ }
97
+ }
98
+ // -------------------------------------------------------------------------
99
+ // Private: subprocess invocation
100
+ // -------------------------------------------------------------------------
101
+ /**
102
+ * Spawn `python3 -m docling --output-format json <filePath>` and collect
103
+ * stdout.
104
+ *
105
+ * @param filePath - Absolute path to the PDF or DOCX file.
106
+ * @returns Parsed Docling JSON output.
107
+ * @throws When the subprocess exits with a non-zero code or stdout is not
108
+ * valid JSON.
109
+ */
110
+ async _runDocling(filePath) {
111
+ return new Promise((resolve, reject) => {
112
+ let stdout = '';
113
+ let stderr = '';
114
+ const proc = spawn('python3', ['-m', 'docling', '--output-format', 'json', filePath], {
115
+ stdio: ['ignore', 'pipe', 'pipe'],
116
+ });
117
+ proc.stdout.on('data', (chunk) => {
118
+ stdout += chunk.toString('utf8');
119
+ });
120
+ proc.stderr.on('data', (chunk) => {
121
+ stderr += chunk.toString('utf8');
122
+ });
123
+ proc.on('close', (code) => {
124
+ if (code !== 0) {
125
+ reject(new Error(`DoclingLoader: python3 -m docling exited with code ${code}.\n${stderr.slice(0, 500)}`));
126
+ return;
127
+ }
128
+ try {
129
+ const parsed = JSON.parse(stdout);
130
+ resolve(parsed);
131
+ }
132
+ catch (err) {
133
+ reject(new Error(`DoclingLoader: failed to parse Docling JSON output: ${String(err)}\n` +
134
+ `stdout (first 500 chars): ${stdout.slice(0, 500)}`));
135
+ }
136
+ });
137
+ proc.on('error', (err) => {
138
+ reject(new Error(`DoclingLoader: failed to spawn python3: ${err.message}`));
139
+ });
140
+ });
141
+ }
142
+ // -------------------------------------------------------------------------
143
+ // Private: JSON → LoadedDocument mapping
144
+ // -------------------------------------------------------------------------
145
+ /**
146
+ * Convert a Docling JSON output object to a {@link LoadedDocument}.
147
+ *
148
+ * Handles both the newer (`text` top-level string) and older
149
+ * (`pages[].text` array) Docling output shapes.
150
+ *
151
+ * @param json - Parsed Docling JSON.
152
+ * @param resolvedPath - Original source path for the `source` metadata field.
153
+ */
154
+ _mapToLoadedDocument(json, resolvedPath) {
155
+ // Prefer top-level `text` (Docling v2+), fall back to concatenating pages.
156
+ let content;
157
+ if (typeof json['text'] === 'string') {
158
+ content = json['text'];
159
+ }
160
+ else if (Array.isArray(json['pages'])) {
161
+ content = json['pages']
162
+ .map((p) => (typeof p['text'] === 'string' ? p['text'] : ''))
163
+ .join('\n\n');
164
+ }
165
+ else {
166
+ content = '';
167
+ }
168
+ const rawMeta = json['metadata'] ?? {};
169
+ const pageCount = typeof rawMeta['pageCount'] === 'number' ? rawMeta['pageCount'] :
170
+ typeof rawMeta['page_count'] === 'number' ? rawMeta['page_count'] :
171
+ undefined;
172
+ const meta = {
173
+ ...(typeof rawMeta['title'] === 'string' && rawMeta['title']
174
+ ? { title: rawMeta['title'] }
175
+ : {}),
176
+ ...(typeof rawMeta['author'] === 'string' ? { author: rawMeta['author'] } : {}),
177
+ ...(pageCount !== undefined ? { pageCount } : {}),
178
+ ...(resolvedPath ? { source: resolvedPath } : {}),
179
+ };
180
+ return {
181
+ content,
182
+ metadata: meta,
183
+ format: 'pdf',
184
+ };
185
+ }
186
+ }
187
+ // ---------------------------------------------------------------------------
188
+ // Factory
189
+ // ---------------------------------------------------------------------------
190
+ /**
191
+ * Checks whether `python3 -m docling` is available in the current environment
192
+ * and, if so, returns a new {@link DoclingLoader} instance; otherwise returns
193
+ * `null`.
194
+ *
195
+ * The availability check runs `python3 -m docling --version` synchronously
196
+ * via `spawnSync` — it exits quickly and is only called once during registry
197
+ * initialisation.
198
+ *
199
+ * ### Usage
200
+ * ```ts
201
+ * import { createDoclingLoader } from './DoclingLoader.js';
202
+ * import { PdfLoader } from './PdfLoader.js';
203
+ *
204
+ * const doclingLoader = createDoclingLoader();
205
+ * const loader = new PdfLoader(null, doclingLoader);
206
+ * ```
207
+ *
208
+ * @returns A `DoclingLoader` instance when Docling is installed, or `null`.
209
+ */
210
+ export function createDoclingLoader() {
211
+ try {
212
+ const result = spawnSync('python3', ['-m', 'docling', '--version'], {
213
+ stdio: 'ignore',
214
+ timeout: 5000,
215
+ });
216
+ // spawnSync throws when the binary cannot be found, and sets .error for
217
+ // other failure modes. A non-zero status also means docling is absent.
218
+ if (result.error !== undefined || result.status !== 0) {
219
+ return null;
220
+ }
221
+ return new DoclingLoader();
222
+ }
223
+ catch {
224
+ // python3 is not in PATH or docling is not installed.
225
+ return null;
226
+ }
227
+ }
228
+ //# sourceMappingURL=DoclingLoader.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"DoclingLoader.js","sourceRoot":"","sources":["../../../src/memory/ingestion/DoclingLoader.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH,OAAO,EAAE,KAAK,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AACtD,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,MAAM,SAAS,CAAC;AACzB,OAAO,EAAE,MAAM,kBAAkB,CAAC;AAIlC,8EAA8E;AAC9E,YAAY;AACZ,8EAA8E;AAE9E,yEAAyE;AACzE,MAAM,oBAAoB,GAAG,CAAC,MAAM,EAAE,OAAO,CAAU,CAAC;AAExD,8EAA8E;AAC9E,UAAU;AACV,8EAA8E;AAE9E;;;;GAIG;AACH,SAAS,KAAK,CAAC,QAAgB;IAC7B,OAAO,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;AAC9C,CAAC;AAgCD,8EAA8E;AAC9E,iCAAiC;AACjC,8EAA8E;AAE9E;;;;;;;;;GASG;AACH,MAAM,aAAa;IAAnB;QACE,kBAAkB;QACT,wBAAmB,GAAa,CAAC,GAAG,oBAAoB,CAAC,CAAC;IAqJrE,CAAC;IAnJC,4EAA4E;IAC5E,UAAU;IACV,4EAA4E;IAE5E,kBAAkB;IAClB,OAAO,CAAC,MAAuB;QAC7B,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YAC5B,0EAA0E;YAC1E,OAAO,KAAK,CAAC;QACf,CAAC;QACD,OAAQ,oBAA0C,CAAC,QAAQ,CAAC,KAAK,CAAC,MAAM,CAAqB,CAAC,CAAC;IACjG,CAAC;IAED,4EAA4E;IAC5E,OAAO;IACP,4EAA4E;IAE5E,kBAAkB;IAClB,KAAK,CAAC,IAAI,CAAC,MAAuB,EAAE,QAAsB;QACxD,IAAI,QAAgB,CAAC;QACrB,IAAI,QAAQ,GAAkB,IAAI,CAAC;QAEnC,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YAC5B,kEAAkE;YAClE,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,MAAM,EAAE,EAAE,iBAAiB,IAAI,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC;YACrE,MAAM,EAAE,CAAC,SAAS,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;YACrC,QAAQ,GAAG,QAAQ,CAAC;QACtB,CAAC;aAAM,CAAC;YACN,QAAQ,GAAG,MAAM,CAAC;QACpB,CAAC;QAED,IAAI,CAAC;YACH,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC;YACpD,OAAO,IAAI,CAAC,oBAAoB,CAAC,UAAU,EAAE,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;QAC7F,CAAC;gBAAS,CAAC;YACT,qCAAqC;YACrC,IAAI,QAAQ,KAAK,IAAI,EAAE,CAAC;gBACtB,MAAM,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,GAA+B,CAAC,CAAC,CAAC;YACzE,CAAC;QACH,CAAC;IACH,CAAC;IAED,4EAA4E;IAC5E,iCAAiC;IACjC,4EAA4E;IAE5E;;;;;;;;OAQG;IACK,KAAK,CAAC,WAAW,CAAC,QAAgB;QACxC,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;YACrC,IAAI,MAAM,GAAG,EAAE,CAAC;YAChB,IAAI,MAAM,GAAG,EAAE,CAAC;YAEhB,MAAM,IAAI,GAAG,KAAK,CAAC,SAAS,EAAE,CAAC,IAAI,EAAE,SAAS,EAAE,iBAAiB,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE;gBACpF,KAAK,EAAE,CAAC,QAAQ,EAAE,MAAM,EAAE,MAAM,CAAC;aAClC,CAAC,CAAC;YAEH,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,KAAa,EAAE,EAAE;gBACvC,MAAM,IAAI,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;YACnC,CAAC,CAAC,CAAC;YAEH,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,KAAa,EAAE,EAAE;gBACvC,MAAM,IAAI,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;YACnC,CAAC,CAAC,CAAC;YAEH,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,EAAE;gBACxB,IAAI,IAAI,KAAK,CAAC,EAAE,CAAC;oBACf,MAAM,CAAC,IAAI,KAAK,CACd,sDAAsD,IAAI,MAAM,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CACvF,CAAC,CAAC;oBACH,OAAO;gBACT,CAAC;gBAED,IAAI,CAAC;oBACH,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAsB,CAAC;oBACvD,OAAO,CAAC,MAAM,CAAC,CAAC;gBAClB,CAAC;gBAAC,OAAO,GAAG,EAAE,CAAC;oBACb,MAAM,CAAC,IAAI,KAAK,CACd,uDAAuD,MAAM,CAAC,GAAG,CAAC,IAAI;wBACtE,6BAA6B,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CACpD,CAAC,CAAC;gBACL,CAAC;YACH,CAAC,CAAC,CAAC;YAEH,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,GAAG,EAAE,EAAE;gBACvB,MAAM,CAAC,IAAI,KAAK,CAAC,2CAA2C,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;YAC9E,CAAC,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;IACL,CAAC;IAED,4EAA4E;IAC5E,yCAAyC;IACzC,4EAA4E;IAE5E;;;;;;;;OAQG;IACK,oBAAoB,CAC1B,IAAuB,EACvB,YAAqB;QAErB,2EAA2E;QAC3E,IAAI,OAAe,CAAC;QACpB,IAAI,OAAO,IAAI,CAAC,MAAM,CAAC,KAAK,QAAQ,EAAE,CAAC;YACrC,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC;QACzB,CAAC;aAAM,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,EAAE,CAAC;YACxC,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC;iBACpB,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,KAAK,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;iBAC5D,IAAI,CAAC,MAAM,CAAC,CAAC;QAClB,CAAC;aAAM,CAAC;YACN,OAAO,GAAG,EAAE,CAAC;QACf,CAAC;QAED,MAAM,OAAO,GAAG,IAAI,CAAC,UAAU,CAAC,IAAI,EAAE,CAAC;QACvC,MAAM,SAAS,GACb,OAAO,OAAO,CAAC,WAAW,CAAC,KAAK,QAAQ,CAAC,CAAC,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC,CAAC;YACjE,OAAO,OAAO,CAAC,YAAY,CAAC,KAAK,QAAQ,CAAC,CAAC,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC,CAAC;gBACnE,SAAS,CAAC;QAEZ,MAAM,IAAI,GAAqB;YAC7B,GAAG,CAAC,OAAO,OAAO,CAAC,OAAO,CAAC,KAAK,QAAQ,IAAI,OAAO,CAAC,OAAO,CAAC;gBAC1D,CAAC,CAAC,EAAE,KAAK,EAAE,OAAO,CAAC,OAAO,CAAC,EAAE;gBAC7B,CAAC,CAAC,EAAE,CAAC;YACP,GAAG,CAAC,OAAO,OAAO,CAAC,QAAQ,CAAC,KAAK,QAAQ,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,OAAO,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YAC/E,GAAG,CAAC,SAAS,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YACjD,GAAG,CAAC,YAAY,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,YAAY,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;SAClD,CAAC;QAEF,OAAO;YACL,OAAO;YACP,QAAQ,EAAE,IAAI;YACd,MAAM,EAAE,KAAK;SACd,CAAC;IACJ,CAAC;CACF;AAED,8EAA8E;AAC9E,UAAU;AACV,8EAA8E;AAE9E;;;;;;;;;;;;;;;;;;;GAmBG;AACH,MAAM,UAAU,mBAAmB;IACjC,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,SAAS,CAAC,SAAS,EAAE,CAAC,IAAI,EAAE,SAAS,EAAE,WAAW,CAAC,EAAE;YAClE,KAAK,EAAE,QAAQ;YACf,OAAO,EAAE,IAAI;SACd,CAAC,CAAC;QACH,wEAAwE;QACxE,wEAAwE;QACxE,IAAI,MAAM,CAAC,KAAK,KAAK,SAAS,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACtD,OAAO,IAAI,CAAC;QACd,CAAC;QACD,OAAO,IAAI,aAAa,EAAE,CAAC;IAC7B,CAAC;IAAC,MAAM,CAAC;QACP,sDAAsD;QACtD,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC"}
@@ -0,0 +1,37 @@
1
+ /**
2
+ * @fileoverview DocxLoader — loads `.docx` documents using `mammoth`.
3
+ *
4
+ * The `mammoth` library extracts raw text from OOXML (Office Open XML) Word
5
+ * documents by stripping all formatting and returning the plain-text content.
6
+ * This keeps the ingestion pipeline fast and dependency-light while still
7
+ * producing high-quality text suitable for chunking and embedding.
8
+ *
9
+ * @module memory/ingestion/DocxLoader
10
+ */
11
+ import type { IDocumentLoader } from './IDocumentLoader.js';
12
+ import type { LoadOptions, LoadedDocument } from '../facade/types.js';
13
+ /**
14
+ * Document loader for Microsoft Word (`.docx`) files.
15
+ *
16
+ * Uses `mammoth.extractRawText()` to strip all styling and return plain
17
+ * prose text, which is then stored as the `content` field. The `metadata`
18
+ * block includes an approximate `wordCount`.
19
+ *
20
+ * @implements {IDocumentLoader}
21
+ *
22
+ * @example
23
+ * ```ts
24
+ * const loader = new DocxLoader();
25
+ * const doc = await loader.load('/docs/spec.docx');
26
+ * console.log(doc.metadata.wordCount); // e.g. 1842
27
+ * ```
28
+ */
29
+ export declare class DocxLoader implements IDocumentLoader {
30
+ /** @inheritdoc */
31
+ readonly supportedExtensions: string[];
32
+ /** @inheritdoc */
33
+ canLoad(source: string | Buffer): boolean;
34
+ /** @inheritdoc */
35
+ load(source: string | Buffer, _options?: LoadOptions): Promise<LoadedDocument>;
36
+ }
37
+ //# sourceMappingURL=DocxLoader.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"DocxLoader.d.ts","sourceRoot":"","sources":["../../../src/memory/ingestion/DocxLoader.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAKH,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAC;AAC5D,OAAO,KAAK,EAAE,WAAW,EAAE,cAAc,EAAoB,MAAM,oBAAoB,CAAC;AAsCxF;;;;;;;;;;;;;;;GAeG;AACH,qBAAa,UAAW,YAAW,eAAe;IAChD,kBAAkB;IAClB,QAAQ,CAAC,mBAAmB,EAAE,MAAM,EAAE,CAA6B;IAMnE,kBAAkB;IAClB,OAAO,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,GAAG,OAAO;IAezC,kBAAkB;IACZ,IAAI,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,EAAE,QAAQ,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,cAAc,CAAC;CA+BrF"}
@@ -0,0 +1,111 @@
1
+ /**
2
+ * @fileoverview DocxLoader — loads `.docx` documents using `mammoth`.
3
+ *
4
+ * The `mammoth` library extracts raw text from OOXML (Office Open XML) Word
5
+ * documents by stripping all formatting and returning the plain-text content.
6
+ * This keeps the ingestion pipeline fast and dependency-light while still
7
+ * producing high-quality text suitable for chunking and embedding.
8
+ *
9
+ * @module memory/ingestion/DocxLoader
10
+ */
11
+ import fs from 'node:fs/promises';
12
+ import path from 'node:path';
13
+ import mammoth from 'mammoth';
14
+ // ---------------------------------------------------------------------------
15
+ // Constants
16
+ // ---------------------------------------------------------------------------
17
+ /** Extensions handled by this loader, each with a leading dot. */
18
+ const SUPPORTED_EXTENSIONS = ['.docx'];
19
+ // ---------------------------------------------------------------------------
20
+ // Helpers
21
+ // ---------------------------------------------------------------------------
22
+ /**
23
+ * Returns the lower-cased extension (with dot) of a file path.
24
+ *
25
+ * @param filePath - Absolute or relative file path.
26
+ */
27
+ function extOf(filePath) {
28
+ return path.extname(filePath).toLowerCase();
29
+ }
30
+ /**
31
+ * Count the approximate number of words in a string.
32
+ *
33
+ * Splits on runs of whitespace — intentionally lightweight for the typical
34
+ * document sizes encountered during ingestion.
35
+ *
36
+ * @param text - Raw text to count.
37
+ */
38
+ function wordCount(text) {
39
+ return text.trim() === '' ? 0 : text.trim().split(/\s+/).length;
40
+ }
41
+ // ---------------------------------------------------------------------------
42
+ // DocxLoader
43
+ // ---------------------------------------------------------------------------
44
+ /**
45
+ * Document loader for Microsoft Word (`.docx`) files.
46
+ *
47
+ * Uses `mammoth.extractRawText()` to strip all styling and return plain
48
+ * prose text, which is then stored as the `content` field. The `metadata`
49
+ * block includes an approximate `wordCount`.
50
+ *
51
+ * @implements {IDocumentLoader}
52
+ *
53
+ * @example
54
+ * ```ts
55
+ * const loader = new DocxLoader();
56
+ * const doc = await loader.load('/docs/spec.docx');
57
+ * console.log(doc.metadata.wordCount); // e.g. 1842
58
+ * ```
59
+ */
60
+ export class DocxLoader {
61
+ constructor() {
62
+ /** @inheritdoc */
63
+ this.supportedExtensions = [...SUPPORTED_EXTENSIONS];
64
+ }
65
+ // -------------------------------------------------------------------------
66
+ // canLoad
67
+ // -------------------------------------------------------------------------
68
+ /** @inheritdoc */
69
+ canLoad(source) {
70
+ if (Buffer.isBuffer(source)) {
71
+ // OOXML magic: PK zip signature (0x50 0x4B 0x03 0x04).
72
+ // .docx files are ZIP archives — check for the PK header.
73
+ return source.length >= 4 &&
74
+ source[0] === 0x50 && source[1] === 0x4B &&
75
+ source[2] === 0x03 && source[3] === 0x04;
76
+ }
77
+ return SUPPORTED_EXTENSIONS.includes(extOf(source));
78
+ }
79
+ // -------------------------------------------------------------------------
80
+ // load
81
+ // -------------------------------------------------------------------------
82
+ /** @inheritdoc */
83
+ async load(source, _options) {
84
+ let buffer;
85
+ let resolvedPath;
86
+ if (Buffer.isBuffer(source)) {
87
+ buffer = source;
88
+ }
89
+ else {
90
+ resolvedPath = source;
91
+ buffer = await fs.readFile(resolvedPath);
92
+ }
93
+ // mammoth.extractRawText strips all OOXML formatting and returns plain text.
94
+ // The `buffer` option accepts a Node Buffer directly (no temp file needed).
95
+ const result = await mammoth.extractRawText({ buffer });
96
+ // `result.value` is the extracted text; `result.messages` holds any
97
+ // conversion warnings (ignored here — they're rarely actionable for
98
+ // text-only extraction).
99
+ const content = result.value;
100
+ const meta = {
101
+ wordCount: wordCount(content),
102
+ ...(resolvedPath ? { source: resolvedPath } : {}),
103
+ };
104
+ return {
105
+ content,
106
+ metadata: meta,
107
+ format: 'docx',
108
+ };
109
+ }
110
+ }
111
+ //# sourceMappingURL=DocxLoader.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"DocxLoader.js","sourceRoot":"","sources":["../../../src/memory/ingestion/DocxLoader.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,EAAE,MAAM,kBAAkB,CAAC;AAClC,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,OAAO,MAAM,SAAS,CAAC;AAI9B,8EAA8E;AAC9E,YAAY;AACZ,8EAA8E;AAE9E,kEAAkE;AAClE,MAAM,oBAAoB,GAAG,CAAC,OAAO,CAAU,CAAC;AAEhD,8EAA8E;AAC9E,UAAU;AACV,8EAA8E;AAE9E;;;;GAIG;AACH,SAAS,KAAK,CAAC,QAAgB;IAC7B,OAAO,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;AAC9C,CAAC;AAED;;;;;;;GAOG;AACH,SAAS,SAAS,CAAC,IAAY;IAC7B,OAAO,IAAI,CAAC,IAAI,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC;AAClE,CAAC;AAED,8EAA8E;AAC9E,aAAa;AACb,8EAA8E;AAE9E;;;;;;;;;;;;;;;GAeG;AACH,MAAM,OAAO,UAAU;IAAvB;QACE,kBAAkB;QACT,wBAAmB,GAAa,CAAC,GAAG,oBAAoB,CAAC,CAAC;IAsDrE,CAAC;IApDC,4EAA4E;IAC5E,UAAU;IACV,4EAA4E;IAE5E,kBAAkB;IAClB,OAAO,CAAC,MAAuB;QAC7B,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YAC5B,uDAAuD;YACvD,0DAA0D;YAC1D,OAAO,MAAM,CAAC,MAAM,IAAI,CAAC;gBACvB,MAAM,CAAC,CAAC,CAAC,KAAK,IAAI,IAAI,MAAM,CAAC,CAAC,CAAC,KAAK,IAAI;gBACxC,MAAM,CAAC,CAAC,CAAC,KAAK,IAAI,IAAI,MAAM,CAAC,CAAC,CAAC,KAAK,IAAI,CAAC;QAC7C,CAAC;QACD,OAAQ,oBAA0C,CAAC,QAAQ,CAAC,KAAK,CAAC,MAAM,CAAY,CAAC,CAAC;IACxF,CAAC;IAED,4EAA4E;IAC5E,OAAO;IACP,4EAA4E;IAE5E,kBAAkB;IAClB,KAAK,CAAC,IAAI,CAAC,MAAuB,EAAE,QAAsB;QACxD,IAAI,MAAc,CAAC;QACnB,IAAI,YAAgC,CAAC;QAErC,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YAC5B,MAAM,GAAG,MAAM,CAAC;QAClB,CAAC;aAAM,CAAC;YACN,YAAY,GAAG,MAAM,CAAC;YACtB,MAAM,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAC;QAC3C,CAAC;QAED,6EAA6E;QAC7E,4EAA4E;QAC5E,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;QAExD,oEAAoE;QACpE,oEAAoE;QACpE,yBAAyB;QACzB,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;QAE7B,MAAM,IAAI,GAAqB;YAC7B,SAAS,EAAE,SAAS,CAAC,OAAO,CAAC;YAC7B,GAAG,CAAC,YAAY,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,YAAY,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;SAClD,CAAC;QAEF,OAAO;YACL,OAAO;YACP,QAAQ,EAAE,IAAI;YACd,MAAM,EAAE,MAAM;SACf,CAAC;IACJ,CAAC;CACF"}
@@ -0,0 +1,116 @@
1
+ /**
2
+ * @fileoverview FolderScanner — recursive directory walker for the ingestion
3
+ * pipeline.
4
+ *
5
+ * FolderScanner is NOT an {@link IDocumentLoader}; instead it orchestrates a
6
+ * {@link LoaderRegistry} to batch-process every file in a directory tree. It
7
+ * supports glob-based include/exclude filters via `minimatch`, per-file
8
+ * progress callbacks, and graceful error collection so a single unreadable file
9
+ * never aborts a whole scan.
10
+ *
11
+ * @module memory/ingestion/FolderScanner
12
+ */
13
+ import type { LoadedDocument } from '../facade/types.js';
14
+ import type { LoaderRegistry } from './LoaderRegistry.js';
15
+ /**
16
+ * Configuration options for {@link FolderScanner.scan}.
17
+ */
18
+ export interface FolderScanOptions {
19
+ /**
20
+ * Whether to descend into sub-directories.
21
+ *
22
+ * When `false` only the direct children of `dirPath` are considered.
23
+ *
24
+ * @default true
25
+ */
26
+ recursive?: boolean;
27
+ /**
28
+ * Glob patterns that a file path must match at least one of in order to be
29
+ * processed. Patterns are evaluated against the path *relative* to the
30
+ * scanned root directory using {@link minimatch}.
31
+ *
32
+ * When omitted every file with a registered extension is processed.
33
+ *
34
+ * @example ['**\/*.pdf', '**\/*.md']
35
+ */
36
+ include?: string[];
37
+ /**
38
+ * Glob patterns that cause a file to be skipped when its relative path
39
+ * matches any of them. Evaluated *after* `include`.
40
+ *
41
+ * @example ['**\/node_modules\/**', '**\/.git\/**']
42
+ */
43
+ exclude?: string[];
44
+ /**
45
+ * Called after each file attempt (success *or* failure).
46
+ *
47
+ * @param file - Absolute path of the file that was just processed.
48
+ * @param index - 1-based index of the file in the total discovered list.
49
+ * @param total - Total number of matching files discovered before processing began.
50
+ */
51
+ onProgress?: (file: string, index: number, total: number) => void;
52
+ }
53
+ /**
54
+ * The aggregated result of a {@link FolderScanner.scan} call.
55
+ */
56
+ export interface FolderScanResult {
57
+ /**
58
+ * Successfully loaded documents, one per processed file.
59
+ */
60
+ documents: LoadedDocument[];
61
+ /**
62
+ * Absolute paths of files that were loaded without error.
63
+ */
64
+ succeeded: string[];
65
+ /**
66
+ * Files that could not be processed, with per-file error messages.
67
+ */
68
+ failed: Array<{
69
+ path: string;
70
+ error: string;
71
+ }>;
72
+ }
73
+ /**
74
+ * Recursively scans a directory and loads every file whose extension has a
75
+ * registered loader in the supplied {@link LoaderRegistry}.
76
+ *
77
+ * ### Example
78
+ * ```ts
79
+ * const registry = new LoaderRegistry();
80
+ * const scanner = new FolderScanner(registry);
81
+ *
82
+ * const result = await scanner.scan('/knowledge-base', {
83
+ * recursive: true,
84
+ * include: ['**\/*.md', '**\/*.pdf'],
85
+ * exclude: ['**\/node_modules\/**'],
86
+ * onProgress: (file, i, total) => console.log(`${i}/${total} ${file}`),
87
+ * });
88
+ *
89
+ * console.log(`Loaded ${result.documents.length} documents`);
90
+ * console.log(`Failed: ${result.failed.length}`);
91
+ * ```
92
+ */
93
+ export declare class FolderScanner {
94
+ private readonly registry;
95
+ /**
96
+ * @param registry - The {@link LoaderRegistry} used to dispatch each file to
97
+ * the appropriate loader.
98
+ */
99
+ constructor(registry: LoaderRegistry);
100
+ /**
101
+ * Walk `dirPath` and load every matching file.
102
+ *
103
+ * Files are discovered first and then loaded sequentially. Errors thrown
104
+ * by individual loaders are caught and accumulated in
105
+ * {@link FolderScanResult.failed} rather than propagating.
106
+ *
107
+ * @param dirPath - Absolute path to the directory to scan.
108
+ * @param options - Optional include/exclude filters and progress callback.
109
+ * @returns A promise that resolves to a {@link FolderScanResult}.
110
+ *
111
+ * @throws {Error} When `dirPath` cannot be read as a directory (e.g.
112
+ * it does not exist or is a regular file).
113
+ */
114
+ scan(dirPath: string, options?: FolderScanOptions): Promise<FolderScanResult>;
115
+ }
116
+ //# sourceMappingURL=FolderScanner.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"FolderScanner.d.ts","sourceRoot":"","sources":["../../../src/memory/ingestion/FolderScanner.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAKH,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACzD,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AAM1D;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC;;;;;;OAMG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC;IAEpB;;;;;;;;OAQG;IACH,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IAEnB;;;;;OAKG;IACH,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IAEnB;;;;;;OAMG;IACH,UAAU,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,KAAK,IAAI,CAAC;CACnE;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B;;OAEG;IACH,SAAS,EAAE,cAAc,EAAE,CAAC;IAE5B;;OAEG;IACH,SAAS,EAAE,MAAM,EAAE,CAAC;IAEpB;;OAEG;IACH,MAAM,EAAE,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;CAChD;AAMD;;;;;;;;;;;;;;;;;;;GAmBG;AACH,qBAAa,aAAa;IAKZ,OAAO,CAAC,QAAQ,CAAC,QAAQ;IAJrC;;;OAGG;gBAC0B,QAAQ,EAAE,cAAc;IAMrD;;;;;;;;;;;;;OAaG;IACG,IAAI,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,GAAE,iBAAsB,GAAG,OAAO,CAAC,gBAAgB,CAAC;CAoFxF"}
@@ -0,0 +1,127 @@
1
+ /**
2
+ * @fileoverview FolderScanner — recursive directory walker for the ingestion
3
+ * pipeline.
4
+ *
5
+ * FolderScanner is NOT an {@link IDocumentLoader}; instead it orchestrates a
6
+ * {@link LoaderRegistry} to batch-process every file in a directory tree. It
7
+ * supports glob-based include/exclude filters via `minimatch`, per-file
8
+ * progress callbacks, and graceful error collection so a single unreadable file
9
+ * never aborts a whole scan.
10
+ *
11
+ * @module memory/ingestion/FolderScanner
12
+ */
13
+ import fs from 'node:fs/promises';
14
+ import path from 'node:path';
15
+ import { minimatch } from 'minimatch';
16
+ // ---------------------------------------------------------------------------
17
+ // FolderScanner
18
+ // ---------------------------------------------------------------------------
19
+ /**
20
+ * Recursively scans a directory and loads every file whose extension has a
21
+ * registered loader in the supplied {@link LoaderRegistry}.
22
+ *
23
+ * ### Example
24
+ * ```ts
25
+ * const registry = new LoaderRegistry();
26
+ * const scanner = new FolderScanner(registry);
27
+ *
28
+ * const result = await scanner.scan('/knowledge-base', {
29
+ * recursive: true,
30
+ * include: ['**\/*.md', '**\/*.pdf'],
31
+ * exclude: ['**\/node_modules\/**'],
32
+ * onProgress: (file, i, total) => console.log(`${i}/${total} ${file}`),
33
+ * });
34
+ *
35
+ * console.log(`Loaded ${result.documents.length} documents`);
36
+ * console.log(`Failed: ${result.failed.length}`);
37
+ * ```
38
+ */
39
+ export class FolderScanner {
40
+ /**
41
+ * @param registry - The {@link LoaderRegistry} used to dispatch each file to
42
+ * the appropriate loader.
43
+ */
44
+ constructor(registry) {
45
+ this.registry = registry;
46
+ }
47
+ // -------------------------------------------------------------------------
48
+ // scan
49
+ // -------------------------------------------------------------------------
50
+ /**
51
+ * Walk `dirPath` and load every matching file.
52
+ *
53
+ * Files are discovered first and then loaded sequentially. Errors thrown
54
+ * by individual loaders are caught and accumulated in
55
+ * {@link FolderScanResult.failed} rather than propagating.
56
+ *
57
+ * @param dirPath - Absolute path to the directory to scan.
58
+ * @param options - Optional include/exclude filters and progress callback.
59
+ * @returns A promise that resolves to a {@link FolderScanResult}.
60
+ *
61
+ * @throws {Error} When `dirPath` cannot be read as a directory (e.g.
62
+ * it does not exist or is a regular file).
63
+ */
64
+ async scan(dirPath, options = {}) {
65
+ const { recursive = true, include, exclude, onProgress, } = options;
66
+ // ------------------------------------------------------------------
67
+ // 1. Discover all candidate file paths.
68
+ // ------------------------------------------------------------------
69
+ const allEntries = await fs.readdir(dirPath, { recursive, withFileTypes: true });
70
+ // Filter to only regular files whose extension is registered.
71
+ const supportedExtensions = new Set(this.registry.getSupportedExtensions());
72
+ const candidatePaths = [];
73
+ for (const entry of allEntries) {
74
+ // Skip directories (readdir with recursive:true includes dirs too).
75
+ if (!entry.isFile())
76
+ continue;
77
+ // Build the absolute path. In Node 20+ with recursive:true the
78
+ // `parentPath` property is set; older versions use `path` (deprecated).
79
+ // We fall back gracefully.
80
+ const parentPath = entry.parentPath ??
81
+ entry.path ??
82
+ dirPath;
83
+ const absolutePath = path.join(parentPath, entry.name);
84
+ // Check that the extension has a registered loader.
85
+ const ext = path.extname(entry.name).toLowerCase();
86
+ if (!supportedExtensions.has(ext))
87
+ continue;
88
+ // Build relative path for glob matching.
89
+ const relativePath = path.relative(dirPath, absolutePath);
90
+ // Apply include filter — file must match at least one pattern.
91
+ if (include && include.length > 0) {
92
+ const matches = include.some((pattern) => minimatch(relativePath, pattern, { dot: true }));
93
+ if (!matches)
94
+ continue;
95
+ }
96
+ // Apply exclude filter — file must NOT match any pattern.
97
+ if (exclude && exclude.length > 0) {
98
+ const excluded = exclude.some((pattern) => minimatch(relativePath, pattern, { dot: true }));
99
+ if (excluded)
100
+ continue;
101
+ }
102
+ candidatePaths.push(absolutePath);
103
+ }
104
+ // ------------------------------------------------------------------
105
+ // 2. Load each candidate file, collecting results and errors.
106
+ // ------------------------------------------------------------------
107
+ const documents = [];
108
+ const succeeded = [];
109
+ const failed = [];
110
+ for (let i = 0; i < candidatePaths.length; i++) {
111
+ const filePath = candidatePaths[i];
112
+ try {
113
+ const doc = await this.registry.loadFile(filePath);
114
+ documents.push(doc);
115
+ succeeded.push(filePath);
116
+ }
117
+ catch (err) {
118
+ const message = err instanceof Error ? err.message : String(err);
119
+ failed.push({ path: filePath, error: message });
120
+ }
121
+ // Fire progress callback (1-based index).
122
+ onProgress?.(filePath, i + 1, candidatePaths.length);
123
+ }
124
+ return { documents, succeeded, failed };
125
+ }
126
+ }
127
+ //# sourceMappingURL=FolderScanner.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"FolderScanner.js","sourceRoot":"","sources":["../../../src/memory/ingestion/FolderScanner.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,OAAO,EAAE,MAAM,kBAAkB,CAAC;AAClC,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAsEtC,8EAA8E;AAC9E,gBAAgB;AAChB,8EAA8E;AAE9E;;;;;;;;;;;;;;;;;;;GAmBG;AACH,MAAM,OAAO,aAAa;IACxB;;;OAGG;IACH,YAA6B,QAAwB;QAAxB,aAAQ,GAAR,QAAQ,CAAgB;IAAG,CAAC;IAEzD,4EAA4E;IAC5E,OAAO;IACP,4EAA4E;IAE5E;;;;;;;;;;;;;OAaG;IACH,KAAK,CAAC,IAAI,CAAC,OAAe,EAAE,UAA6B,EAAE;QACzD,MAAM,EACJ,SAAS,GAAG,IAAI,EAChB,OAAO,EACP,OAAO,EACP,UAAU,GACX,GAAG,OAAO,CAAC;QAEZ,qEAAqE;QACrE,wCAAwC;QACxC,qEAAqE;QAErE,MAAM,UAAU,GAAG,MAAM,EAAE,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CAAC;QAEjF,8DAA8D;QAC9D,MAAM,mBAAmB,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,QAAQ,CAAC,sBAAsB,EAAE,CAAC,CAAC;QAE5E,MAAM,cAAc,GAAa,EAAE,CAAC;QAEpC,KAAK,MAAM,KAAK,IAAI,UAAU,EAAE,CAAC;YAC/B,oEAAoE;YACpE,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE;gBAAE,SAAS;YAE9B,gEAAgE;YAChE,wEAAwE;YACxE,2BAA2B;YAC3B,MAAM,UAAU,GACb,KAAgD,CAAC,UAAU;gBAC3D,KAAgD,CAAC,IAAI;gBACtD,OAAO,CAAC;YACV,MAAM,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC;YAEvD,oDAAoD;YACpD,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,WAAW,EAAE,CAAC;YACnD,IAAI,CAAC,mBAAmB,CAAC,GAAG,CAAC,GAAG,CAAC;gBAAE,SAAS;YAE5C,yCAAyC;YACzC,MAAM,YAAY,GAAG,IAAI,CAAC,QAAQ,CAAC,OAAO,EAAE,YAAY,CAAC,CAAC;YAE1D,+DAA+D;YAC/D,IAAI,OAAO,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAClC,MAAM,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,EAAE,CACvC,SAAS,CAAC,YAAY,EAAE,OAAO,EAAE,EAAE,GAAG,EAAE,IAAI,EAAE,CAAC,CAChD,CAAC;gBACF,IAAI,CAAC,OAAO;oBAAE,SAAS;YACzB,CAAC;YAED,0DAA0D;YAC1D,IAAI,OAAO,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAClC,MAAM,QAAQ,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,EAAE,CACxC,SAAS,CAAC,YAAY,EAAE,OAAO,EAAE,EAAE,GAAG,EAAE,IAAI,EAAE,CAAC,CAChD,CAAC;gBACF,IAAI,QAAQ;oBAAE,SAAS;YACzB,CAAC;YAED,cAAc,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QACpC,CAAC;QAED,qEAAqE;QACrE,8DAA8D;QAC9D,qEAAqE;QAErE,MAAM,SAAS,GAAqB,EAAE,CAAC;QACvC,MAAM,SAAS,GAAa,EAAE,CAAC;QAC/B,MAAM,MAAM,GAA2C,EAAE,CAAC;QAE1D,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,cAAc,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC/C,MAAM,QAAQ,GAAG,cAAc,CAAC,CAAC,CAAC,CAAC;YAEnC,IAAI,CAAC;gBACH,MAAM,GAAG,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;gBACnD,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;gBACpB,SAAS,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YAC3B,CAAC;YAAC,OAAO,GAAY,EAAE,CAAC;gBACtB,MAAM,OAAO,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;gBACjE,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,QAAQ,EAAE,KAAK,EAAE,OAAO,EAAE,CAAC,CAAC;YAClD,CAAC;YAED,0CAA0C;YAC1C,UAAU,EAAE,CAAC,QAAQ,EAAE,CAAC,GAAG,CAAC,EAAE,cAAc,CAAC,MAAM,CAAC,CAAC;QACvD,CAAC;QAED,OAAO,EAAE,SAAS,EAAE,SAAS,EAAE,MAAM,EAAE,CAAC;IAC1C,CAAC;CACF"}