@framers/agentos 0.1.101 → 0.1.103
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -0
- package/dist/api/agency.js +1 -1
- package/dist/api/agency.js.map +1 -1
- package/dist/api/strategies/graph.d.ts.map +1 -1
- package/dist/api/strategies/graph.js +1 -0
- package/dist/api/strategies/graph.js.map +1 -1
- package/dist/api/strategies/sequential.d.ts.map +1 -1
- package/dist/api/strategies/sequential.js +1 -0
- package/dist/api/strategies/sequential.js.map +1 -1
- package/dist/memory/config.d.ts +39 -0
- package/dist/memory/config.d.ts.map +1 -1
- package/dist/memory/config.js.map +1 -1
- package/dist/memory/consolidation/ConsolidationLoop.d.ts +177 -0
- package/dist/memory/consolidation/ConsolidationLoop.d.ts.map +1 -0
- package/dist/memory/consolidation/ConsolidationLoop.js +517 -0
- package/dist/memory/consolidation/ConsolidationLoop.js.map +1 -0
- package/dist/memory/consolidation/ConsolidationPipeline.d.ts.map +1 -1
- package/dist/memory/consolidation/ConsolidationPipeline.js +7 -0
- package/dist/memory/consolidation/ConsolidationPipeline.js.map +1 -1
- package/dist/memory/consolidation/index.d.ts +8 -0
- package/dist/memory/consolidation/index.d.ts.map +1 -0
- package/dist/memory/consolidation/index.js +7 -0
- package/dist/memory/consolidation/index.js.map +1 -0
- package/dist/memory/decay/DecayModel.d.ts +33 -0
- package/dist/memory/decay/DecayModel.d.ts.map +1 -1
- package/dist/memory/decay/DecayModel.js +31 -0
- package/dist/memory/decay/DecayModel.js.map +1 -1
- package/dist/memory/facade/Memory.d.ts +228 -0
- package/dist/memory/facade/Memory.d.ts.map +1 -0
- package/dist/memory/facade/Memory.js +823 -0
- package/dist/memory/facade/Memory.js.map +1 -0
- package/dist/memory/facade/index.d.ts +13 -0
- package/dist/memory/facade/index.d.ts.map +1 -0
- package/dist/memory/facade/index.js +11 -0
- package/dist/memory/facade/index.js.map +1 -0
- package/dist/memory/facade/types.d.ts +606 -0
- package/dist/memory/facade/types.d.ts.map +1 -0
- package/dist/memory/facade/types.js +11 -0
- package/dist/memory/facade/types.js.map +1 -0
- package/dist/memory/feedback/RetrievalFeedbackSignal.d.ts +132 -0
- package/dist/memory/feedback/RetrievalFeedbackSignal.d.ts.map +1 -0
- package/dist/memory/feedback/RetrievalFeedbackSignal.js +178 -0
- package/dist/memory/feedback/RetrievalFeedbackSignal.js.map +1 -0
- package/dist/memory/feedback/index.d.ts +13 -0
- package/dist/memory/feedback/index.d.ts.map +1 -0
- package/dist/memory/feedback/index.js +12 -0
- package/dist/memory/feedback/index.js.map +1 -0
- package/dist/memory/index.d.ts +22 -0
- package/dist/memory/index.d.ts.map +1 -1
- package/dist/memory/index.js +24 -0
- package/dist/memory/index.js.map +1 -1
- package/dist/memory/ingestion/ChunkingEngine.d.ts +143 -0
- package/dist/memory/ingestion/ChunkingEngine.d.ts.map +1 -0
- package/dist/memory/ingestion/ChunkingEngine.js +508 -0
- package/dist/memory/ingestion/ChunkingEngine.js.map +1 -0
- package/dist/memory/ingestion/DoclingLoader.d.ts +44 -0
- package/dist/memory/ingestion/DoclingLoader.d.ts.map +1 -0
- package/dist/memory/ingestion/DoclingLoader.js +228 -0
- package/dist/memory/ingestion/DoclingLoader.js.map +1 -0
- package/dist/memory/ingestion/DocxLoader.d.ts +37 -0
- package/dist/memory/ingestion/DocxLoader.d.ts.map +1 -0
- package/dist/memory/ingestion/DocxLoader.js +111 -0
- package/dist/memory/ingestion/DocxLoader.js.map +1 -0
- package/dist/memory/ingestion/FolderScanner.d.ts +116 -0
- package/dist/memory/ingestion/FolderScanner.d.ts.map +1 -0
- package/dist/memory/ingestion/FolderScanner.js +127 -0
- package/dist/memory/ingestion/FolderScanner.js.map +1 -0
- package/dist/memory/ingestion/HtmlLoader.d.ts +49 -0
- package/dist/memory/ingestion/HtmlLoader.d.ts.map +1 -0
- package/dist/memory/ingestion/HtmlLoader.js +202 -0
- package/dist/memory/ingestion/HtmlLoader.js.map +1 -0
- package/dist/memory/ingestion/IDocumentLoader.d.ts +63 -0
- package/dist/memory/ingestion/IDocumentLoader.d.ts.map +1 -0
- package/dist/memory/ingestion/IDocumentLoader.js +11 -0
- package/dist/memory/ingestion/IDocumentLoader.js.map +1 -0
- package/dist/memory/ingestion/LoaderRegistry.d.ts +140 -0
- package/dist/memory/ingestion/LoaderRegistry.d.ts.map +1 -0
- package/dist/memory/ingestion/LoaderRegistry.js +229 -0
- package/dist/memory/ingestion/LoaderRegistry.js.map +1 -0
- package/dist/memory/ingestion/MarkdownLoader.d.ts +50 -0
- package/dist/memory/ingestion/MarkdownLoader.d.ts.map +1 -0
- package/dist/memory/ingestion/MarkdownLoader.js +169 -0
- package/dist/memory/ingestion/MarkdownLoader.js.map +1 -0
- package/dist/memory/ingestion/MultimodalAggregator.d.ts +88 -0
- package/dist/memory/ingestion/MultimodalAggregator.d.ts.map +1 -0
- package/dist/memory/ingestion/MultimodalAggregator.js +96 -0
- package/dist/memory/ingestion/MultimodalAggregator.js.map +1 -0
- package/dist/memory/ingestion/OcrPdfLoader.d.ts +41 -0
- package/dist/memory/ingestion/OcrPdfLoader.d.ts.map +1 -0
- package/dist/memory/ingestion/OcrPdfLoader.js +149 -0
- package/dist/memory/ingestion/OcrPdfLoader.js.map +1 -0
- package/dist/memory/ingestion/PdfLoader.d.ts +78 -0
- package/dist/memory/ingestion/PdfLoader.d.ts.map +1 -0
- package/dist/memory/ingestion/PdfLoader.js +179 -0
- package/dist/memory/ingestion/PdfLoader.js.map +1 -0
- package/dist/memory/ingestion/TextLoader.d.ts +66 -0
- package/dist/memory/ingestion/TextLoader.d.ts.map +1 -0
- package/dist/memory/ingestion/TextLoader.js +207 -0
- package/dist/memory/ingestion/TextLoader.js.map +1 -0
- package/dist/memory/ingestion/UrlLoader.d.ts +95 -0
- package/dist/memory/ingestion/UrlLoader.d.ts.map +1 -0
- package/dist/memory/ingestion/UrlLoader.js +174 -0
- package/dist/memory/ingestion/UrlLoader.js.map +1 -0
- package/dist/memory/io/ChatGptImporter.d.ts +85 -0
- package/dist/memory/io/ChatGptImporter.d.ts.map +1 -0
- package/dist/memory/io/ChatGptImporter.js +231 -0
- package/dist/memory/io/ChatGptImporter.js.map +1 -0
- package/dist/memory/io/JsonExporter.d.ts +67 -0
- package/dist/memory/io/JsonExporter.d.ts.map +1 -0
- package/dist/memory/io/JsonExporter.js +132 -0
- package/dist/memory/io/JsonExporter.js.map +1 -0
- package/dist/memory/io/JsonImporter.d.ts +84 -0
- package/dist/memory/io/JsonImporter.d.ts.map +1 -0
- package/dist/memory/io/JsonImporter.js +234 -0
- package/dist/memory/io/JsonImporter.js.map +1 -0
- package/dist/memory/io/MarkdownExporter.d.ts +95 -0
- package/dist/memory/io/MarkdownExporter.d.ts.map +1 -0
- package/dist/memory/io/MarkdownExporter.js +130 -0
- package/dist/memory/io/MarkdownExporter.js.map +1 -0
- package/dist/memory/io/MarkdownImporter.d.ts +84 -0
- package/dist/memory/io/MarkdownImporter.d.ts.map +1 -0
- package/dist/memory/io/MarkdownImporter.js +166 -0
- package/dist/memory/io/MarkdownImporter.js.map +1 -0
- package/dist/memory/io/ObsidianExporter.d.ts +80 -0
- package/dist/memory/io/ObsidianExporter.d.ts.map +1 -0
- package/dist/memory/io/ObsidianExporter.js +127 -0
- package/dist/memory/io/ObsidianExporter.js.map +1 -0
- package/dist/memory/io/ObsidianImporter.d.ts +93 -0
- package/dist/memory/io/ObsidianImporter.d.ts.map +1 -0
- package/dist/memory/io/ObsidianImporter.js +221 -0
- package/dist/memory/io/ObsidianImporter.js.map +1 -0
- package/dist/memory/io/SqliteExporter.d.ts +47 -0
- package/dist/memory/io/SqliteExporter.d.ts.map +1 -0
- package/dist/memory/io/SqliteExporter.js +56 -0
- package/dist/memory/io/SqliteExporter.js.map +1 -0
- package/dist/memory/io/SqliteImporter.d.ts +82 -0
- package/dist/memory/io/SqliteImporter.d.ts.map +1 -0
- package/dist/memory/io/SqliteImporter.js +232 -0
- package/dist/memory/io/SqliteImporter.js.map +1 -0
- package/dist/memory/io/index.d.ts +31 -0
- package/dist/memory/io/index.d.ts.map +1 -0
- package/dist/memory/io/index.js +31 -0
- package/dist/memory/io/index.js.map +1 -0
- package/dist/memory/store/SqliteBrain.d.ts +125 -0
- package/dist/memory/store/SqliteBrain.d.ts.map +1 -0
- package/dist/memory/store/SqliteBrain.js +407 -0
- package/dist/memory/store/SqliteBrain.js.map +1 -0
- package/dist/memory/store/SqliteKnowledgeGraph.d.ts +259 -0
- package/dist/memory/store/SqliteKnowledgeGraph.d.ts.map +1 -0
- package/dist/memory/store/SqliteKnowledgeGraph.js +1062 -0
- package/dist/memory/store/SqliteKnowledgeGraph.js.map +1 -0
- package/dist/memory/store/SqliteMemoryGraph.d.ts +251 -0
- package/dist/memory/store/SqliteMemoryGraph.d.ts.map +1 -0
- package/dist/memory/store/SqliteMemoryGraph.js +637 -0
- package/dist/memory/store/SqliteMemoryGraph.js.map +1 -0
- package/dist/memory/tools/MemoryAddTool.d.ts +98 -0
- package/dist/memory/tools/MemoryAddTool.d.ts.map +1 -0
- package/dist/memory/tools/MemoryAddTool.js +131 -0
- package/dist/memory/tools/MemoryAddTool.js.map +1 -0
- package/dist/memory/tools/MemoryDeleteTool.d.ts +83 -0
- package/dist/memory/tools/MemoryDeleteTool.d.ts.map +1 -0
- package/dist/memory/tools/MemoryDeleteTool.js +96 -0
- package/dist/memory/tools/MemoryDeleteTool.js.map +1 -0
- package/dist/memory/tools/MemoryMergeTool.d.ts +95 -0
- package/dist/memory/tools/MemoryMergeTool.d.ts.map +1 -0
- package/dist/memory/tools/MemoryMergeTool.js +164 -0
- package/dist/memory/tools/MemoryMergeTool.js.map +1 -0
- package/dist/memory/tools/MemoryReflectTool.d.ts +86 -0
- package/dist/memory/tools/MemoryReflectTool.d.ts.map +1 -0
- package/dist/memory/tools/MemoryReflectTool.js +102 -0
- package/dist/memory/tools/MemoryReflectTool.js.map +1 -0
- package/dist/memory/tools/MemorySearchTool.d.ts +117 -0
- package/dist/memory/tools/MemorySearchTool.d.ts.map +1 -0
- package/dist/memory/tools/MemorySearchTool.js +162 -0
- package/dist/memory/tools/MemorySearchTool.js.map +1 -0
- package/dist/memory/tools/MemoryUpdateTool.d.ts +92 -0
- package/dist/memory/tools/MemoryUpdateTool.d.ts.map +1 -0
- package/dist/memory/tools/MemoryUpdateTool.js +125 -0
- package/dist/memory/tools/MemoryUpdateTool.js.map +1 -0
- package/dist/memory/tools/index.d.ts +32 -0
- package/dist/memory/tools/index.d.ts.map +1 -0
- package/dist/memory/tools/index.js +26 -0
- package/dist/memory/tools/index.js.map +1 -0
- package/package.json +6 -1
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview LoaderRegistry — extension-to-loader routing table.
|
|
3
|
+
*
|
|
4
|
+
* The registry maintains a map of file extensions to {@link IDocumentLoader}
|
|
5
|
+
* implementations and provides a convenience `loadFile()` method that
|
|
6
|
+
* auto-detects the format from a file path before delegating to the
|
|
7
|
+
* appropriate loader.
|
|
8
|
+
*
|
|
9
|
+
* On construction the registry pre-registers five built-in loaders:
|
|
10
|
+
* {@link TextLoader}, {@link MarkdownLoader}, {@link HtmlLoader},
|
|
11
|
+
* {@link PdfLoader}, and {@link DocxLoader}. In addition, the optional
|
|
12
|
+
* {@link OcrPdfLoader} and {@link DoclingLoader} are registered when their
|
|
13
|
+
* respective factories return non-null values (i.e. when `tesseract.js` and
|
|
14
|
+
* `python3 -m docling` are available in the environment).
|
|
15
|
+
*
|
|
16
|
+
* Additional loaders can be added at runtime via {@link LoaderRegistry.register}.
|
|
17
|
+
*
|
|
18
|
+
* @module memory/ingestion/LoaderRegistry
|
|
19
|
+
*/
|
|
20
|
+
import path from 'node:path';
|
|
21
|
+
import { TextLoader } from './TextLoader.js';
|
|
22
|
+
import { MarkdownLoader } from './MarkdownLoader.js';
|
|
23
|
+
import { HtmlLoader } from './HtmlLoader.js';
|
|
24
|
+
import { PdfLoader } from './PdfLoader.js';
|
|
25
|
+
import { DocxLoader } from './DocxLoader.js';
|
|
26
|
+
import { createOcrPdfLoader } from './OcrPdfLoader.js';
|
|
27
|
+
import { createDoclingLoader } from './DoclingLoader.js';
|
|
28
|
+
// ---------------------------------------------------------------------------
|
|
29
|
+
// Helpers
|
|
30
|
+
// ---------------------------------------------------------------------------
|
|
31
|
+
/**
|
|
32
|
+
* Derive the normalised extension from an arbitrary string.
|
|
33
|
+
*
|
|
34
|
+
* Accepts both bare extensions (`.pdf`, `pdf`) and full file paths
|
|
35
|
+
* (`/docs/report.pdf`). Always returns lower-cased with a leading dot, or
|
|
36
|
+
* an empty string when no extension can be determined.
|
|
37
|
+
*
|
|
38
|
+
* @param extensionOrPath - Bare extension or full file path.
|
|
39
|
+
*
|
|
40
|
+
* @example
|
|
41
|
+
* normaliseExt('.PDF') // → '.pdf'
|
|
42
|
+
* normaliseExt('file.PDF') // → '.pdf'
|
|
43
|
+
* normaliseExt('pdf') // → '.pdf'
|
|
44
|
+
* normaliseExt('/notes.md') // → '.md'
|
|
45
|
+
*/
|
|
46
|
+
function normaliseExt(extensionOrPath) {
|
|
47
|
+
// If the input looks like a bare extension (with or without dot), normalise
|
|
48
|
+
// it directly without treating it as a path. A bare extension contains no
|
|
49
|
+
// directory separators and at most one dot at position 0.
|
|
50
|
+
const hasSeparator = extensionOrPath.includes('/') || extensionOrPath.includes('\\');
|
|
51
|
+
const hasDotInMiddle = extensionOrPath.lastIndexOf('.') > 0;
|
|
52
|
+
if (!hasSeparator && !hasDotInMiddle) {
|
|
53
|
+
// Bare extension like 'pdf' or '.pdf'.
|
|
54
|
+
const stripped = extensionOrPath.startsWith('.')
|
|
55
|
+
? extensionOrPath.slice(1)
|
|
56
|
+
: extensionOrPath;
|
|
57
|
+
return stripped ? `.${stripped.toLowerCase()}` : '';
|
|
58
|
+
}
|
|
59
|
+
return path.extname(extensionOrPath).toLowerCase();
|
|
60
|
+
}
|
|
61
|
+
// ---------------------------------------------------------------------------
|
|
62
|
+
// LoaderRegistry
|
|
63
|
+
// ---------------------------------------------------------------------------
|
|
64
|
+
/**
|
|
65
|
+
* Central registry mapping file extensions to {@link IDocumentLoader}
|
|
66
|
+
* implementations.
|
|
67
|
+
*
|
|
68
|
+
* ### Built-in loaders (registered automatically)
|
|
69
|
+
* | Extensions | Loader |
|
|
70
|
+
* |----------------------------------------------------|-----------------------|
|
|
71
|
+
* | `.txt`, `.csv`, `.tsv`, `.json`, `.yaml`, `.yml` | {@link TextLoader} |
|
|
72
|
+
* | `.md`, `.mdx` | {@link MarkdownLoader} |
|
|
73
|
+
* | `.html`, `.htm` | {@link HtmlLoader} |
|
|
74
|
+
* | `.pdf` | {@link PdfLoader} |
|
|
75
|
+
* | `.docx` | {@link DocxLoader} |
|
|
76
|
+
*
|
|
77
|
+
* ### Conditional loaders (registered when available)
|
|
78
|
+
* | Condition | Loader |
|
|
79
|
+
* |-------------------------------|-------------------------------------|
|
|
80
|
+
* | `tesseract.js` installed | {@link OcrPdfLoader} (overrides PDF) |
|
|
81
|
+
* | `python3 -m docling` available | {@link DoclingLoader} (overrides PDF + DOCX) |
|
|
82
|
+
*
|
|
83
|
+
* ### Registering a custom loader
|
|
84
|
+
* ```ts
|
|
85
|
+
* const registry = new LoaderRegistry();
|
|
86
|
+
* registry.register(new PdfLoader());
|
|
87
|
+
* const doc = await registry.loadFile('/reports/q3.pdf');
|
|
88
|
+
* ```
|
|
89
|
+
*
|
|
90
|
+
* ### Using loadFile
|
|
91
|
+
* ```ts
|
|
92
|
+
* const registry = new LoaderRegistry();
|
|
93
|
+
* const doc = await registry.loadFile('/notes/meeting.md');
|
|
94
|
+
* console.log(doc.metadata.title);
|
|
95
|
+
* ```
|
|
96
|
+
*/
|
|
97
|
+
export class LoaderRegistry {
|
|
98
|
+
/**
|
|
99
|
+
* Creates a new registry pre-populated with the built-in loaders.
|
|
100
|
+
*
|
|
101
|
+
* Loader registration order determines conflict resolution: later
|
|
102
|
+
* registrations override earlier ones for the same extension.
|
|
103
|
+
*
|
|
104
|
+
* Registration order:
|
|
105
|
+
* 1. {@link TextLoader}, {@link MarkdownLoader}, {@link HtmlLoader} — core text formats.
|
|
106
|
+
* 2. {@link PdfLoader} (with injected OCR + Docling loaders) — PDF extraction.
|
|
107
|
+
* 3. {@link DocxLoader} — DOCX extraction.
|
|
108
|
+
* 4. Optional: an {@link OcrPdfLoader} override when `tesseract.js` is installed.
|
|
109
|
+
* 5. Optional: a {@link DoclingLoader} override when Python Docling is available.
|
|
110
|
+
* DoclingLoader supports both `.pdf` and `.docx`, so it supersedes both
|
|
111
|
+
* PdfLoader and DocxLoader when present.
|
|
112
|
+
*/
|
|
113
|
+
constructor() {
|
|
114
|
+
/**
|
|
115
|
+
* Internal map from lower-cased extension (with dot) to the loader
|
|
116
|
+
* responsible for that extension.
|
|
117
|
+
*
|
|
118
|
+
* When multiple loaders claim the same extension the last one registered
|
|
119
|
+
* wins (newest-registration-wins semantics), allowing callers to override
|
|
120
|
+
* built-in loaders.
|
|
121
|
+
*/
|
|
122
|
+
this._loaders = new Map();
|
|
123
|
+
// Core text-format loaders.
|
|
124
|
+
this.register(new TextLoader());
|
|
125
|
+
this.register(new MarkdownLoader());
|
|
126
|
+
this.register(new HtmlLoader());
|
|
127
|
+
// Probe optional loaders before constructing PdfLoader so we can inject
|
|
128
|
+
// them as fallbacks rather than having two separate registered instances.
|
|
129
|
+
const ocrLoader = createOcrPdfLoader();
|
|
130
|
+
const doclingLoader = createDoclingLoader();
|
|
131
|
+
// PDF loader — passes optional fallbacks into the tier system.
|
|
132
|
+
this.register(new PdfLoader(ocrLoader, doclingLoader));
|
|
133
|
+
// DOCX loader.
|
|
134
|
+
this.register(new DocxLoader());
|
|
135
|
+
// When Docling is available register it separately so it also overrides
|
|
136
|
+
// the DOCX extension (Docling supports both .pdf and .docx).
|
|
137
|
+
if (doclingLoader !== null) {
|
|
138
|
+
this.register(doclingLoader);
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
// -------------------------------------------------------------------------
|
|
142
|
+
// register
|
|
143
|
+
// -------------------------------------------------------------------------
|
|
144
|
+
/**
|
|
145
|
+
* Register a loader for all extensions it declares.
|
|
146
|
+
*
|
|
147
|
+
* If a previously registered loader already handles one of the extension,
|
|
148
|
+
* it is replaced. This makes it trivial to swap in a higher-fidelity
|
|
149
|
+
* implementation for any format.
|
|
150
|
+
*
|
|
151
|
+
* @param loader - The loader instance to register.
|
|
152
|
+
*
|
|
153
|
+
* @example
|
|
154
|
+
* ```ts
|
|
155
|
+
* registry.register(new PdfLoader());
|
|
156
|
+
* ```
|
|
157
|
+
*/
|
|
158
|
+
register(loader) {
|
|
159
|
+
for (const ext of loader.supportedExtensions) {
|
|
160
|
+
this._loaders.set(ext.toLowerCase(), loader);
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
// -------------------------------------------------------------------------
|
|
164
|
+
// getLoader
|
|
165
|
+
// -------------------------------------------------------------------------
|
|
166
|
+
/**
|
|
167
|
+
* Retrieve the loader registered for `extensionOrPath`.
|
|
168
|
+
*
|
|
169
|
+
* Both bare extensions (`.md`, `md`) and full file paths
|
|
170
|
+
* (`/docs/guide.md`) are accepted.
|
|
171
|
+
*
|
|
172
|
+
* @param extensionOrPath - File extension or full path.
|
|
173
|
+
* @returns The matching {@link IDocumentLoader}, or `undefined` when no
|
|
174
|
+
* loader is registered for the detected extension.
|
|
175
|
+
*
|
|
176
|
+
* @example
|
|
177
|
+
* ```ts
|
|
178
|
+
* const loader = registry.getLoader('.md');
|
|
179
|
+
* const loader2 = registry.getLoader('README.md');
|
|
180
|
+
* ```
|
|
181
|
+
*/
|
|
182
|
+
getLoader(extensionOrPath) {
|
|
183
|
+
const ext = normaliseExt(extensionOrPath);
|
|
184
|
+
return this._loaders.get(ext);
|
|
185
|
+
}
|
|
186
|
+
// -------------------------------------------------------------------------
|
|
187
|
+
// getSupportedExtensions
|
|
188
|
+
// -------------------------------------------------------------------------
|
|
189
|
+
/**
|
|
190
|
+
* Return a sorted array of all extensions currently registered.
|
|
191
|
+
*
|
|
192
|
+
* Each extension is returned with a leading dot in lower-case, e.g.
|
|
193
|
+
* `['.csv', '.htm', '.html', '.json', '.md', …]`.
|
|
194
|
+
*
|
|
195
|
+
* @returns Sorted array of registered extension strings.
|
|
196
|
+
*/
|
|
197
|
+
getSupportedExtensions() {
|
|
198
|
+
return [...this._loaders.keys()].sort();
|
|
199
|
+
}
|
|
200
|
+
// -------------------------------------------------------------------------
|
|
201
|
+
// loadFile
|
|
202
|
+
// -------------------------------------------------------------------------
|
|
203
|
+
/**
|
|
204
|
+
* Convenience method: detect format from `filePath`, find the matching
|
|
205
|
+
* loader, and delegate to its `load()` method.
|
|
206
|
+
*
|
|
207
|
+
* @param filePath - Absolute (or resolvable relative) file path.
|
|
208
|
+
* @param options - Optional load hints forwarded to the loader.
|
|
209
|
+
* @returns A promise resolving to the {@link LoadedDocument}.
|
|
210
|
+
*
|
|
211
|
+
* @throws {Error} When no loader is registered for the file's extension.
|
|
212
|
+
* @throws {Error} When the underlying loader's `load()` throws.
|
|
213
|
+
*
|
|
214
|
+
* @example
|
|
215
|
+
* ```ts
|
|
216
|
+
* const doc = await registry.loadFile('/notes/architecture.md');
|
|
217
|
+
* ```
|
|
218
|
+
*/
|
|
219
|
+
async loadFile(filePath, options) {
|
|
220
|
+
const loader = this.getLoader(filePath);
|
|
221
|
+
if (!loader) {
|
|
222
|
+
const ext = normaliseExt(filePath);
|
|
223
|
+
throw new Error(`LoaderRegistry: no loader registered for extension "${ext}" (file: "${filePath}"). ` +
|
|
224
|
+
`Supported extensions: ${this.getSupportedExtensions().join(', ')}.`);
|
|
225
|
+
}
|
|
226
|
+
return loader.load(filePath, options);
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
//# sourceMappingURL=LoaderRegistry.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"LoaderRegistry.js","sourceRoot":"","sources":["../../../src/memory/ingestion/LoaderRegistry.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAEH,OAAO,IAAI,MAAM,WAAW,CAAC;AAG7B,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AACrD,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAC3C,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,kBAAkB,EAAE,MAAM,mBAAmB,CAAC;AACvD,OAAO,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AAEzD,8EAA8E;AAC9E,UAAU;AACV,8EAA8E;AAE9E;;;;;;;;;;;;;;GAcG;AACH,SAAS,YAAY,CAAC,eAAuB;IAC3C,4EAA4E;IAC5E,2EAA2E;IAC3E,0DAA0D;IAC1D,MAAM,YAAY,GAAG,eAAe,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,eAAe,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;IACrF,MAAM,cAAc,GAAG,eAAe,CAAC,WAAW,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;IAE5D,IAAI,CAAC,YAAY,IAAI,CAAC,cAAc,EAAE,CAAC;QACrC,uCAAuC;QACvC,MAAM,QAAQ,GAAG,eAAe,CAAC,UAAU,CAAC,GAAG,CAAC;YAC9C,CAAC,CAAC,eAAe,CAAC,KAAK,CAAC,CAAC,CAAC;YAC1B,CAAC,CAAC,eAAe,CAAC;QACpB,OAAO,QAAQ,CAAC,CAAC,CAAC,IAAI,QAAQ,CAAC,WAAW,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;IACtD,CAAC;IAED,OAAO,IAAI,CAAC,OAAO,CAAC,eAAe,CAAC,CAAC,WAAW,EAAE,CAAC;AACrD,CAAC;AAED,8EAA8E;AAC9E,iBAAiB;AACjB,8EAA8E;AAE9E;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAgCG;AACH,MAAM,OAAO,cAAc;IAWzB;;;;;;;;;;;;;;OAcG;IACH;QAzBA;;;;;;;WAOG;QACc,aAAQ,GAAiC,IAAI,GAAG,EAAE,CAAC;QAkBlE,4BAA4B;QAC5B,IAAI,CAAC,QAAQ,CAAC,IAAI,UAAU,EAAE,CAAC,CAAC;QAChC,IAAI,CAAC,QAAQ,CAAC,IAAI,cAAc,EAAE,CAAC,CAAC;QACpC,IAAI,CAAC,QAAQ,CAAC,IAAI,UAAU,EAAE,CAAC,CAAC;QAEhC,wEAAwE;QACxE,0EAA0E;QAC1E,MAAM,SAAS,GAAG,kBAAkB,EAAE,CAAC;QACvC,MAAM,aAAa,GAAG,mBAAmB,EAAE,CAAC;QAE5C,+DAA+D;QAC/D,IAAI,CAAC,QAAQ,CAAC,IAAI,SAAS,CAAC,SAAS,EAAE,aAAa,CAAC,CAAC,CAAC;QAEvD,eAAe;QACf,IAAI,CAAC,QAAQ,CAAC,IAAI,UAAU,EAAE,CAAC,CAAC;QAEhC,wEAAwE;QACxE,6DAA6D;QAC7D,IAAI,aAAa,KAAK,IAAI,EAAE,CAAC;YAC3B,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC;QAC/B,CAAC;IACH,CAAC;IAED,4EAA4E;IAC5E,WAAW;IACX,4EAA4E;IAE5E;;;;;;;;;;;;;OAaG;IACH,QAAQ,CAAC,MAAuB;QAC9B,KAAK,MAAM,GAAG,IAAI,MAAM,CAAC,mBAAmB,EAAE,CAAC;YAC7C,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,GAAG,CAAC,WAAW,EAAE,EAAE,MAAM,CAAC,CAAC;QAC/C,CAAC;IACH,CAAC;IAED,4EAA4E;IAC5E,YAAY;IACZ,4EAA4E;IAE5E;;;;;;;;;;;;;;;OAeG;IACH,SAAS,CAAC,eAAuB;QAC/B,MAAM,GAAG,GAAG,YAAY,CAAC,eAAe,CAAC,CAAC;QAC1C,OAAO,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;IAChC,CAAC;IAED,4EAA4E;IAC5E,yBAAyB;IACzB,4EAA4E;IAE5E;;;;;;;OAOG;IACH,sBAAsB;QACpB,OAAO,CAAC,GAAG,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;IAC1C,CAAC;IAED,4EAA4E;IAC5E,WAAW;IACX,4EAA4E;IAE5E;;;;;;;;;;;;;;;OAeG;IACH,KAAK,CAAC,QAAQ,CAAC,QAAgB,EAAE,OAAqB;QACpD,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;QAExC,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,MAAM,GAAG,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;YACnC,MAAM,IAAI,KAAK,CACb,uDAAuD,GAAG,aAAa,QAAQ,MAAM;gBACrF,yBAAyB,IAAI,CAAC,sBAAsB,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CACrE,CAAC;QACJ,CAAC;QAED,OAAO,MAAM,CAAC,IAAI,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IACxC,CAAC;CACF"}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview MarkdownLoader — loads `.md` and `.mdx` documents.
|
|
3
|
+
*
|
|
4
|
+
* Parses YAML front-matter using the `gray-matter` library, strips it from
|
|
5
|
+
* the returned content, and promotes key metadata fields (title, author,
|
|
6
|
+
* createdAt, etc.) into the {@link DocumentMetadata} shape.
|
|
7
|
+
*
|
|
8
|
+
* When no `title` key is present in the front-matter the loader falls back
|
|
9
|
+
* to extracting the first ATX heading (`# …`) from the document body.
|
|
10
|
+
*
|
|
11
|
+
* @module memory/ingestion/MarkdownLoader
|
|
12
|
+
*/
|
|
13
|
+
import type { IDocumentLoader } from './IDocumentLoader.js';
|
|
14
|
+
import type { LoadOptions, LoadedDocument } from '../facade/types.js';
|
|
15
|
+
/**
|
|
16
|
+
* Document loader for Markdown (`.md`) and MDX (`.mdx`) files.
|
|
17
|
+
*
|
|
18
|
+
* ### Front-matter handling
|
|
19
|
+
* YAML front-matter delimited by `---` is parsed via `gray-matter`. All
|
|
20
|
+
* key-value pairs are merged into {@link DocumentMetadata} as-is, with a
|
|
21
|
+
* handful of well-known keys (`title`, `author`, `createdAt`, `modifiedAt`,
|
|
22
|
+
* `language`) mapped to the corresponding typed metadata fields.
|
|
23
|
+
*
|
|
24
|
+
* ### Title extraction fallback
|
|
25
|
+
* When the front-matter does **not** contain a `title` field the loader
|
|
26
|
+
* searches the document body for the first level-1 ATX heading (`# Title`)
|
|
27
|
+
* and uses that as the title.
|
|
28
|
+
*
|
|
29
|
+
* ### Returned content
|
|
30
|
+
* The `content` field in the returned {@link LoadedDocument} contains the
|
|
31
|
+
* Markdown body **without** the front-matter block.
|
|
32
|
+
*
|
|
33
|
+
* @implements {IDocumentLoader}
|
|
34
|
+
*
|
|
35
|
+
* @example
|
|
36
|
+
* ```ts
|
|
37
|
+
* const loader = new MarkdownLoader();
|
|
38
|
+
* const doc = await loader.load('/docs/architecture.md');
|
|
39
|
+
* console.log(doc.metadata.title); // from front-matter or first # heading
|
|
40
|
+
* ```
|
|
41
|
+
*/
|
|
42
|
+
export declare class MarkdownLoader implements IDocumentLoader {
|
|
43
|
+
/** @inheritdoc */
|
|
44
|
+
readonly supportedExtensions: string[];
|
|
45
|
+
/** @inheritdoc */
|
|
46
|
+
canLoad(source: string | Buffer): boolean;
|
|
47
|
+
/** @inheritdoc */
|
|
48
|
+
load(source: string | Buffer, _options?: LoadOptions): Promise<LoadedDocument>;
|
|
49
|
+
}
|
|
50
|
+
//# sourceMappingURL=MarkdownLoader.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"MarkdownLoader.d.ts","sourceRoot":"","sources":["../../../src/memory/ingestion/MarkdownLoader.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAKH,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAC;AAC5D,OAAO,KAAK,EAAE,WAAW,EAAE,cAAc,EAAoB,MAAM,oBAAoB,CAAC;AAoExF;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AACH,qBAAa,cAAe,YAAW,eAAe;IACpD,kBAAkB;IAClB,QAAQ,CAAC,mBAAmB,EAAE,MAAM,EAAE,CAA6B;IAMnE,kBAAkB;IAClB,OAAO,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,GAAG,OAAO;IAazC,kBAAkB;IACZ,IAAI,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,EAAE,QAAQ,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,cAAc,CAAC;CAqDrF"}
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview MarkdownLoader — loads `.md` and `.mdx` documents.
|
|
3
|
+
*
|
|
4
|
+
* Parses YAML front-matter using the `gray-matter` library, strips it from
|
|
5
|
+
* the returned content, and promotes key metadata fields (title, author,
|
|
6
|
+
* createdAt, etc.) into the {@link DocumentMetadata} shape.
|
|
7
|
+
*
|
|
8
|
+
* When no `title` key is present in the front-matter the loader falls back
|
|
9
|
+
* to extracting the first ATX heading (`# …`) from the document body.
|
|
10
|
+
*
|
|
11
|
+
* @module memory/ingestion/MarkdownLoader
|
|
12
|
+
*/
|
|
13
|
+
import fs from 'node:fs/promises';
|
|
14
|
+
import path from 'node:path';
|
|
15
|
+
import matter from 'gray-matter';
|
|
16
|
+
// ---------------------------------------------------------------------------
|
|
17
|
+
// Constants
|
|
18
|
+
// ---------------------------------------------------------------------------
|
|
19
|
+
/** Extensions handled by this loader, each with a leading dot. */
|
|
20
|
+
const SUPPORTED_EXTENSIONS = ['.md', '.mdx'];
|
|
21
|
+
// ---------------------------------------------------------------------------
|
|
22
|
+
// Helpers
|
|
23
|
+
// ---------------------------------------------------------------------------
|
|
24
|
+
/**
|
|
25
|
+
* Returns the lower-cased extension (with dot) of a file path.
|
|
26
|
+
*
|
|
27
|
+
* @param filePath - Absolute or relative file path.
|
|
28
|
+
*/
|
|
29
|
+
function extOf(filePath) {
|
|
30
|
+
return path.extname(filePath).toLowerCase();
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Approximate word count for the body text (excludes front-matter).
|
|
34
|
+
*
|
|
35
|
+
* @param text - Stripped Markdown body string.
|
|
36
|
+
*/
|
|
37
|
+
function wordCount(text) {
|
|
38
|
+
return text.trim() === '' ? 0 : text.trim().split(/\s+/).length;
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Extract the first ATX heading from a Markdown body.
|
|
42
|
+
*
|
|
43
|
+
* Matches `# Title` at the beginning of a line (with optional leading
|
|
44
|
+
* whitespace) and returns the trimmed heading text. Returns `undefined`
|
|
45
|
+
* when no heading is found.
|
|
46
|
+
*
|
|
47
|
+
* @param body - Markdown body text with front-matter already removed.
|
|
48
|
+
*/
|
|
49
|
+
function extractFirstHeading(body) {
|
|
50
|
+
// Match ATX headings at level 1 only (`# …`) — the most common title pattern.
|
|
51
|
+
const match = /^#{1}\s+(.+)/m.exec(body);
|
|
52
|
+
return match ? match[1].trim() : undefined;
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Coerce a raw front-matter date value (Date object, ISO string, or number)
|
|
56
|
+
* into an ISO 8601 string, returning `undefined` when conversion is not
|
|
57
|
+
* possible.
|
|
58
|
+
*
|
|
59
|
+
* @param value - Raw value from the parsed front-matter data object.
|
|
60
|
+
*/
|
|
61
|
+
function toIsoString(value) {
|
|
62
|
+
if (value instanceof Date) {
|
|
63
|
+
return value.toISOString();
|
|
64
|
+
}
|
|
65
|
+
if (typeof value === 'string' || typeof value === 'number') {
|
|
66
|
+
const d = new Date(value);
|
|
67
|
+
return isNaN(d.getTime()) ? undefined : d.toISOString();
|
|
68
|
+
}
|
|
69
|
+
return undefined;
|
|
70
|
+
}
|
|
71
|
+
// ---------------------------------------------------------------------------
|
|
72
|
+
// MarkdownLoader
|
|
73
|
+
// ---------------------------------------------------------------------------
|
|
74
|
+
/**
|
|
75
|
+
* Document loader for Markdown (`.md`) and MDX (`.mdx`) files.
|
|
76
|
+
*
|
|
77
|
+
* ### Front-matter handling
|
|
78
|
+
* YAML front-matter delimited by `---` is parsed via `gray-matter`. All
|
|
79
|
+
* key-value pairs are merged into {@link DocumentMetadata} as-is, with a
|
|
80
|
+
* handful of well-known keys (`title`, `author`, `createdAt`, `modifiedAt`,
|
|
81
|
+
* `language`) mapped to the corresponding typed metadata fields.
|
|
82
|
+
*
|
|
83
|
+
* ### Title extraction fallback
|
|
84
|
+
* When the front-matter does **not** contain a `title` field the loader
|
|
85
|
+
* searches the document body for the first level-1 ATX heading (`# Title`)
|
|
86
|
+
* and uses that as the title.
|
|
87
|
+
*
|
|
88
|
+
* ### Returned content
|
|
89
|
+
* The `content` field in the returned {@link LoadedDocument} contains the
|
|
90
|
+
* Markdown body **without** the front-matter block.
|
|
91
|
+
*
|
|
92
|
+
* @implements {IDocumentLoader}
|
|
93
|
+
*
|
|
94
|
+
* @example
|
|
95
|
+
* ```ts
|
|
96
|
+
* const loader = new MarkdownLoader();
|
|
97
|
+
* const doc = await loader.load('/docs/architecture.md');
|
|
98
|
+
* console.log(doc.metadata.title); // from front-matter or first # heading
|
|
99
|
+
* ```
|
|
100
|
+
*/
|
|
101
|
+
export class MarkdownLoader {
|
|
102
|
+
constructor() {
|
|
103
|
+
/** @inheritdoc */
|
|
104
|
+
this.supportedExtensions = [...SUPPORTED_EXTENSIONS];
|
|
105
|
+
}
|
|
106
|
+
// -------------------------------------------------------------------------
|
|
107
|
+
// canLoad
|
|
108
|
+
// -------------------------------------------------------------------------
|
|
109
|
+
/** @inheritdoc */
|
|
110
|
+
canLoad(source) {
|
|
111
|
+
if (Buffer.isBuffer(source)) {
|
|
112
|
+
// Without an extension we tentatively accept Buffers for flexibility;
|
|
113
|
+
// callers should prefer path-based loading to ensure correct routing.
|
|
114
|
+
return false;
|
|
115
|
+
}
|
|
116
|
+
return SUPPORTED_EXTENSIONS.includes(extOf(source));
|
|
117
|
+
}
|
|
118
|
+
// -------------------------------------------------------------------------
|
|
119
|
+
// load
|
|
120
|
+
// -------------------------------------------------------------------------
|
|
121
|
+
/** @inheritdoc */
|
|
122
|
+
async load(source, _options) {
|
|
123
|
+
let raw;
|
|
124
|
+
let resolvedPath;
|
|
125
|
+
if (Buffer.isBuffer(source)) {
|
|
126
|
+
raw = source.toString('utf8');
|
|
127
|
+
}
|
|
128
|
+
else {
|
|
129
|
+
resolvedPath = source;
|
|
130
|
+
const bytes = await fs.readFile(resolvedPath);
|
|
131
|
+
raw = bytes.toString('utf8');
|
|
132
|
+
}
|
|
133
|
+
// ---- Parse front-matter ----
|
|
134
|
+
const parsed = matter(raw);
|
|
135
|
+
// `parsed.content` is the body with front-matter stripped.
|
|
136
|
+
const body = parsed.content;
|
|
137
|
+
// ---- Build metadata ----
|
|
138
|
+
const fm = parsed.data;
|
|
139
|
+
// Attempt to resolve a title from: frontmatter > first heading.
|
|
140
|
+
const fmTitle = typeof fm['title'] === 'string' ? fm['title'] : undefined;
|
|
141
|
+
const headingTitle = fmTitle === undefined ? extractFirstHeading(body) : undefined;
|
|
142
|
+
const title = fmTitle ?? headingTitle;
|
|
143
|
+
const meta = {
|
|
144
|
+
// Well-known scalar fields.
|
|
145
|
+
...(title !== undefined ? { title } : {}),
|
|
146
|
+
...(typeof fm['author'] === 'string' ? { author: fm['author'] } : {}),
|
|
147
|
+
...(fm['createdAt'] !== undefined
|
|
148
|
+
? { createdAt: toIsoString(fm['createdAt']) }
|
|
149
|
+
: {}),
|
|
150
|
+
...(fm['modifiedAt'] !== undefined
|
|
151
|
+
? { modifiedAt: toIsoString(fm['modifiedAt']) }
|
|
152
|
+
: {}),
|
|
153
|
+
...(typeof fm['language'] === 'string' ? { language: fm['language'] } : {}),
|
|
154
|
+
// Spread all remaining front-matter keys as generic extras.
|
|
155
|
+
...fm,
|
|
156
|
+
// Override title with resolved value (may differ from fm.title when
|
|
157
|
+
// extracted from heading) and add standard fields.
|
|
158
|
+
...(title !== undefined ? { title } : {}),
|
|
159
|
+
wordCount: wordCount(body),
|
|
160
|
+
...(resolvedPath ? { source: resolvedPath } : {}),
|
|
161
|
+
};
|
|
162
|
+
return {
|
|
163
|
+
content: body,
|
|
164
|
+
metadata: meta,
|
|
165
|
+
format: 'md',
|
|
166
|
+
};
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
//# sourceMappingURL=MarkdownLoader.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"MarkdownLoader.js","sourceRoot":"","sources":["../../../src/memory/ingestion/MarkdownLoader.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,OAAO,EAAE,MAAM,kBAAkB,CAAC;AAClC,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,MAAM,MAAM,aAAa,CAAC;AAIjC,8EAA8E;AAC9E,YAAY;AACZ,8EAA8E;AAE9E,kEAAkE;AAClE,MAAM,oBAAoB,GAAG,CAAC,KAAK,EAAE,MAAM,CAAU,CAAC;AAEtD,8EAA8E;AAC9E,UAAU;AACV,8EAA8E;AAE9E;;;;GAIG;AACH,SAAS,KAAK,CAAC,QAAgB;IAC7B,OAAO,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;AAC9C,CAAC;AAED;;;;GAIG;AACH,SAAS,SAAS,CAAC,IAAY;IAC7B,OAAO,IAAI,CAAC,IAAI,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC;AAClE,CAAC;AAED;;;;;;;;GAQG;AACH,SAAS,mBAAmB,CAAC,IAAY;IACvC,8EAA8E;IAC9E,MAAM,KAAK,GAAG,eAAe,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACzC,OAAO,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC;AAC7C,CAAC;AAED;;;;;;GAMG;AACH,SAAS,WAAW,CAAC,KAAc;IACjC,IAAI,KAAK,YAAY,IAAI,EAAE,CAAC;QAC1B,OAAO,KAAK,CAAC,WAAW,EAAE,CAAC;IAC7B,CAAC;IACD,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QAC3D,MAAM,CAAC,GAAG,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC;QAC1B,OAAO,KAAK,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC;IAC1D,CAAC;IACD,OAAO,SAAS,CAAC;AACnB,CAAC;AAED,8EAA8E;AAC9E,iBAAiB;AACjB,8EAA8E;AAE9E;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AACH,MAAM,OAAO,cAAc;IAA3B;QACE,kBAAkB;QACT,wBAAmB,GAAa,CAAC,GAAG,oBAAoB,CAAC,CAAC;IA0ErE,CAAC;IAxEC,4EAA4E;IAC5E,UAAU;IACV,4EAA4E;IAE5E,kBAAkB;IAClB,OAAO,CAAC,MAAuB;QAC7B,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YAC5B,sEAAsE;YACtE,sEAAsE;YACtE,OAAO,KAAK,CAAC;QACf,CAAC;QACD,OAAQ,oBAA0C,CAAC,QAAQ,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC;IAC7E,CAAC;IAED,4EAA4E;IAC5E,OAAO;IACP,4EAA4E;IAE5E,kBAAkB;IAClB,KAAK,CAAC,IAAI,CAAC,MAAuB,EAAE,QAAsB;QACxD,IAAI,GAAW,CAAC;QAChB,IAAI,YAAgC,CAAC;QAErC,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YAC5B,GAAG,GAAG,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;QAChC,CAAC;aAAM,CAAC;YACN,YAAY,GAAG,MAAM,CAAC;YACtB,MAAM,KAAK,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAC;YAC9C,GAAG,GAAG,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;QAC/B,CAAC;QAED,+BAA+B;QAC/B,MAAM,MAAM,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC;QAE3B,2DAA2D;QAC3D,MAAM,IAAI,GAAG,MAAM,CAAC,OAAO,CAAC;QAE5B,2BAA2B;QAC3B,MAAM,EAAE,GAAG,MAAM,CAAC,IAA+B,CAAC;QAElD,gEAAgE;QAChE,MAAM,OAAO,GACX,OAAO,EAAE,CAAC,OAAO,CAAC,KAAK,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;QAC5D,MAAM,YAAY,GAAG,OAAO,KAAK,SAAS,CAAC,CAAC,CAAC,mBAAmB,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;QACnF,MAAM,KAAK,GAAG,OAAO,IAAI,YAAY,CAAC;QAEtC,MAAM,IAAI,GAAqB;YAC7B,4BAA4B;YAC5B,GAAG,CAAC,KAAK,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YACzC,GAAG,CAAC,OAAO,EAAE,CAAC,QAAQ,CAAC,KAAK,QAAQ,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YACrE,GAAG,CAAC,EAAE,CAAC,WAAW,CAAC,KAAK,SAAS;gBAC/B,CAAC,CAAC,EAAE,SAAS,EAAE,WAAW,CAAC,EAAE,CAAC,WAAW,CAAC,CAAC,EAAE;gBAC7C,CAAC,CAAC,EAAE,CAAC;YACP,GAAG,CAAC,EAAE,CAAC,YAAY,CAAC,KAAK,SAAS;gBAChC,CAAC,CAAC,EAAE,UAAU,EAAE,WAAW,CAAC,EAAE,CAAC,YAAY,CAAC,CAAC,EAAE;gBAC/C,CAAC,CAAC,EAAE,CAAC;YACP,GAAG,CAAC,OAAO,EAAE,CAAC,UAAU,CAAC,KAAK,QAAQ,CAAC,CAAC,CAAC,EAAE,QAAQ,EAAE,EAAE,CAAC,UAAU,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YAC3E,4DAA4D;YAC5D,GAAG,EAAE;YACL,oEAAoE;YACpE,mDAAmD;YACnD,GAAG,CAAC,KAAK,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YACzC,SAAS,EAAE,SAAS,CAAC,IAAI,CAAC;YAC1B,GAAG,CAAC,YAAY,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,YAAY,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;SAClD,CAAC;QAEF,OAAO;YACL,OAAO,EAAE,IAAI;YACb,QAAQ,EAAE,IAAI;YACd,MAAM,EAAE,IAAI;SACb,CAAC;IACJ,CAAC;CACF"}
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview MultimodalAggregator — post-processing stage for images
|
|
3
|
+
* extracted from documents.
|
|
4
|
+
*
|
|
5
|
+
* After loaders such as {@link PdfLoader} and {@link DocxLoader} extract
|
|
6
|
+
* embedded images as {@link ExtractedImage} objects, `MultimodalAggregator`
|
|
7
|
+
* enriches them with natural-language captions by optionally calling a
|
|
8
|
+
* vision-capable LLM function supplied by the application layer.
|
|
9
|
+
*
|
|
10
|
+
* The class is intentionally thin: it holds no state beyond the optional
|
|
11
|
+
* configuration and delegates all vision intelligence to the caller-supplied
|
|
12
|
+
* `describeImage` function. This keeps the aggregator testable without any
|
|
13
|
+
* live LLM dependencies.
|
|
14
|
+
*
|
|
15
|
+
* @module memory/ingestion/MultimodalAggregator
|
|
16
|
+
*/
|
|
17
|
+
import type { ExtractedImage } from '../facade/types.js';
|
|
18
|
+
/**
|
|
19
|
+
* Configuration for {@link MultimodalAggregator}.
|
|
20
|
+
*/
|
|
21
|
+
export interface MultimodalConfig {
|
|
22
|
+
/**
|
|
23
|
+
* Async function that accepts a raw image buffer and its MIME type and
|
|
24
|
+
* returns a natural-language description of the image.
|
|
25
|
+
*
|
|
26
|
+
* When this is `undefined` the aggregator passes images through unchanged.
|
|
27
|
+
*
|
|
28
|
+
* @param imageBuffer - Raw bytes of the image (PNG, JPEG, WebP, …).
|
|
29
|
+
* @param mimeType - MIME type of the image, e.g. `'image/png'`.
|
|
30
|
+
* @returns A promise resolving to a human-readable description string.
|
|
31
|
+
*
|
|
32
|
+
* @example
|
|
33
|
+
* ```ts
|
|
34
|
+
* async (buffer, mimeType) => {
|
|
35
|
+
* return openaiClient.vision(buffer, mimeType);
|
|
36
|
+
* }
|
|
37
|
+
* ```
|
|
38
|
+
*/
|
|
39
|
+
describeImage?: (imageBuffer: Buffer, mimeType: string) => Promise<string>;
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Adds auto-generated captions to {@link ExtractedImage} objects that lack
|
|
43
|
+
* one, using a caller-supplied vision LLM function.
|
|
44
|
+
*
|
|
45
|
+
* Images are processed in parallel via {@link Promise.allSettled} so a single
|
|
46
|
+
* failed captioning attempt does not block the rest. Images whose captioning
|
|
47
|
+
* fails retain their original (un-captioned) state rather than propagating the
|
|
48
|
+
* error.
|
|
49
|
+
*
|
|
50
|
+
* ### Example — with a vision LLM
|
|
51
|
+
* ```ts
|
|
52
|
+
* const aggregator = new MultimodalAggregator({
|
|
53
|
+
* describeImage: async (buf, mime) => myVisionLLM.describe(buf, mime),
|
|
54
|
+
* });
|
|
55
|
+
*
|
|
56
|
+
* const captioned = await aggregator.processImages(doc.images ?? []);
|
|
57
|
+
* ```
|
|
58
|
+
*
|
|
59
|
+
* ### Example — passthrough (no LLM configured)
|
|
60
|
+
* ```ts
|
|
61
|
+
* const aggregator = new MultimodalAggregator();
|
|
62
|
+
* const unchanged = await aggregator.processImages(doc.images ?? []);
|
|
63
|
+
* ```
|
|
64
|
+
*/
|
|
65
|
+
export declare class MultimodalAggregator {
|
|
66
|
+
private readonly config?;
|
|
67
|
+
/**
|
|
68
|
+
* @param config - Optional configuration. Omit to use in passthrough mode.
|
|
69
|
+
*/
|
|
70
|
+
constructor(config?: MultimodalConfig | undefined);
|
|
71
|
+
/**
|
|
72
|
+
* Enrich images with captions via the configured vision LLM.
|
|
73
|
+
*
|
|
74
|
+
* Only images that have no existing `caption` field are processed. Images
|
|
75
|
+
* that already carry a caption are left unchanged to avoid redundant LLM
|
|
76
|
+
* calls.
|
|
77
|
+
*
|
|
78
|
+
* When no `describeImage` function is configured all images are returned
|
|
79
|
+
* unchanged.
|
|
80
|
+
*
|
|
81
|
+
* @param images - Array of {@link ExtractedImage} objects to process.
|
|
82
|
+
* @returns A promise resolving to the same-length array of
|
|
83
|
+
* {@link ExtractedImage} objects, with captions filled in where
|
|
84
|
+
* possible.
|
|
85
|
+
*/
|
|
86
|
+
processImages(images: ExtractedImage[]): Promise<ExtractedImage[]>;
|
|
87
|
+
}
|
|
88
|
+
//# sourceMappingURL=MultimodalAggregator.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"MultimodalAggregator.d.ts","sourceRoot":"","sources":["../../../src/memory/ingestion/MultimodalAggregator.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AAMzD;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B;;;;;;;;;;;;;;;;OAgBG;IACH,aAAa,CAAC,EAAE,CAAC,WAAW,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC;CAC5E;AAMD;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,qBAAa,oBAAoB;IAInB,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC;IAHpC;;OAEG;gBAC0B,MAAM,CAAC,EAAE,gBAAgB,YAAA;IAMtD;;;;;;;;;;;;;;OAcG;IACG,aAAa,CAAC,MAAM,EAAE,cAAc,EAAE,GAAG,OAAO,CAAC,cAAc,EAAE,CAAC;CAiCzE"}
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview MultimodalAggregator — post-processing stage for images
|
|
3
|
+
* extracted from documents.
|
|
4
|
+
*
|
|
5
|
+
* After loaders such as {@link PdfLoader} and {@link DocxLoader} extract
|
|
6
|
+
* embedded images as {@link ExtractedImage} objects, `MultimodalAggregator`
|
|
7
|
+
* enriches them with natural-language captions by optionally calling a
|
|
8
|
+
* vision-capable LLM function supplied by the application layer.
|
|
9
|
+
*
|
|
10
|
+
* The class is intentionally thin: it holds no state beyond the optional
|
|
11
|
+
* configuration and delegates all vision intelligence to the caller-supplied
|
|
12
|
+
* `describeImage` function. This keeps the aggregator testable without any
|
|
13
|
+
* live LLM dependencies.
|
|
14
|
+
*
|
|
15
|
+
* @module memory/ingestion/MultimodalAggregator
|
|
16
|
+
*/
|
|
17
|
+
// ---------------------------------------------------------------------------
|
|
18
|
+
// MultimodalAggregator
|
|
19
|
+
// ---------------------------------------------------------------------------
|
|
20
|
+
/**
|
|
21
|
+
* Adds auto-generated captions to {@link ExtractedImage} objects that lack
|
|
22
|
+
* one, using a caller-supplied vision LLM function.
|
|
23
|
+
*
|
|
24
|
+
* Images are processed in parallel via {@link Promise.allSettled} so a single
|
|
25
|
+
* failed captioning attempt does not block the rest. Images whose captioning
|
|
26
|
+
* fails retain their original (un-captioned) state rather than propagating the
|
|
27
|
+
* error.
|
|
28
|
+
*
|
|
29
|
+
* ### Example — with a vision LLM
|
|
30
|
+
* ```ts
|
|
31
|
+
* const aggregator = new MultimodalAggregator({
|
|
32
|
+
* describeImage: async (buf, mime) => myVisionLLM.describe(buf, mime),
|
|
33
|
+
* });
|
|
34
|
+
*
|
|
35
|
+
* const captioned = await aggregator.processImages(doc.images ?? []);
|
|
36
|
+
* ```
|
|
37
|
+
*
|
|
38
|
+
* ### Example — passthrough (no LLM configured)
|
|
39
|
+
* ```ts
|
|
40
|
+
* const aggregator = new MultimodalAggregator();
|
|
41
|
+
* const unchanged = await aggregator.processImages(doc.images ?? []);
|
|
42
|
+
* ```
|
|
43
|
+
*/
|
|
44
|
+
export class MultimodalAggregator {
|
|
45
|
+
/**
|
|
46
|
+
* @param config - Optional configuration. Omit to use in passthrough mode.
|
|
47
|
+
*/
|
|
48
|
+
constructor(config) {
|
|
49
|
+
this.config = config;
|
|
50
|
+
}
|
|
51
|
+
// -------------------------------------------------------------------------
|
|
52
|
+
// processImages
|
|
53
|
+
// -------------------------------------------------------------------------
|
|
54
|
+
/**
|
|
55
|
+
* Enrich images with captions via the configured vision LLM.
|
|
56
|
+
*
|
|
57
|
+
* Only images that have no existing `caption` field are processed. Images
|
|
58
|
+
* that already carry a caption are left unchanged to avoid redundant LLM
|
|
59
|
+
* calls.
|
|
60
|
+
*
|
|
61
|
+
* When no `describeImage` function is configured all images are returned
|
|
62
|
+
* unchanged.
|
|
63
|
+
*
|
|
64
|
+
* @param images - Array of {@link ExtractedImage} objects to process.
|
|
65
|
+
* @returns A promise resolving to the same-length array of
|
|
66
|
+
* {@link ExtractedImage} objects, with captions filled in where
|
|
67
|
+
* possible.
|
|
68
|
+
*/
|
|
69
|
+
async processImages(images) {
|
|
70
|
+
// Fast path: no vision function configured — return a shallow copy as-is.
|
|
71
|
+
if (!this.config?.describeImage) {
|
|
72
|
+
return images.slice();
|
|
73
|
+
}
|
|
74
|
+
const describeImage = this.config.describeImage;
|
|
75
|
+
// Map each image to a settled promise so failures are isolated.
|
|
76
|
+
const results = await Promise.allSettled(images.map(async (image) => {
|
|
77
|
+
// Skip images that already have a caption.
|
|
78
|
+
if (image.caption !== undefined) {
|
|
79
|
+
return image;
|
|
80
|
+
}
|
|
81
|
+
try {
|
|
82
|
+
const caption = await describeImage(image.data, image.mimeType);
|
|
83
|
+
return { ...image, caption };
|
|
84
|
+
}
|
|
85
|
+
catch {
|
|
86
|
+
// Captioning failed — return the original image unchanged.
|
|
87
|
+
return image;
|
|
88
|
+
}
|
|
89
|
+
}));
|
|
90
|
+
// Extract the fulfilled values (allSettled always fulfils, but we spread
|
|
91
|
+
// the value for explicitness; rejected branches are unreachable here since
|
|
92
|
+
// the inner try/catch already handles errors, but typing requires it).
|
|
93
|
+
return results.map((result, index) => result.status === 'fulfilled' ? result.value : images[index]);
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
//# sourceMappingURL=MultimodalAggregator.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"MultimodalAggregator.js","sourceRoot":"","sources":["../../../src/memory/ingestion/MultimodalAggregator.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAgCH,8EAA8E;AAC9E,uBAAuB;AACvB,8EAA8E;AAE9E;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,MAAM,OAAO,oBAAoB;IAC/B;;OAEG;IACH,YAA6B,MAAyB;QAAzB,WAAM,GAAN,MAAM,CAAmB;IAAG,CAAC;IAE1D,4EAA4E;IAC5E,gBAAgB;IAChB,4EAA4E;IAE5E;;;;;;;;;;;;;;OAcG;IACH,KAAK,CAAC,aAAa,CAAC,MAAwB;QAC1C,0EAA0E;QAC1E,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,aAAa,EAAE,CAAC;YAChC,OAAO,MAAM,CAAC,KAAK,EAAE,CAAC;QACxB,CAAC;QAED,MAAM,aAAa,GAAG,IAAI,CAAC,MAAM,CAAC,aAAa,CAAC;QAEhD,gEAAgE;QAChE,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,UAAU,CACtC,MAAM,CAAC,GAAG,CAAC,KAAK,EAAE,KAAK,EAA2B,EAAE;YAClD,2CAA2C;YAC3C,IAAI,KAAK,CAAC,OAAO,KAAK,SAAS,EAAE,CAAC;gBAChC,OAAO,KAAK,CAAC;YACf,CAAC;YAED,IAAI,CAAC;gBACH,MAAM,OAAO,GAAG,MAAM,aAAa,CAAC,KAAK,CAAC,IAAI,EAAE,KAAK,CAAC,QAAQ,CAAC,CAAC;gBAChE,OAAO,EAAE,GAAG,KAAK,EAAE,OAAO,EAAE,CAAC;YAC/B,CAAC;YAAC,MAAM,CAAC;gBACP,2DAA2D;gBAC3D,OAAO,KAAK,CAAC;YACf,CAAC;QACH,CAAC,CAAC,CACH,CAAC;QAEF,yEAAyE;QACzE,2EAA2E;QAC3E,uEAAuE;QACvE,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,KAAK,EAAE,EAAE,CACnC,MAAM,CAAC,MAAM,KAAK,WAAW,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAC7D,CAAC;IACJ,CAAC;CACF"}
|