@framers/agentos 0.1.101 → 0.1.102
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -0
- package/dist/memory/config.d.ts +39 -0
- package/dist/memory/config.d.ts.map +1 -1
- package/dist/memory/config.js.map +1 -1
- package/dist/memory/consolidation/ConsolidationLoop.d.ts +177 -0
- package/dist/memory/consolidation/ConsolidationLoop.d.ts.map +1 -0
- package/dist/memory/consolidation/ConsolidationLoop.js +517 -0
- package/dist/memory/consolidation/ConsolidationLoop.js.map +1 -0
- package/dist/memory/consolidation/ConsolidationPipeline.d.ts.map +1 -1
- package/dist/memory/consolidation/ConsolidationPipeline.js +7 -0
- package/dist/memory/consolidation/ConsolidationPipeline.js.map +1 -1
- package/dist/memory/consolidation/index.d.ts +8 -0
- package/dist/memory/consolidation/index.d.ts.map +1 -0
- package/dist/memory/consolidation/index.js +7 -0
- package/dist/memory/consolidation/index.js.map +1 -0
- package/dist/memory/decay/DecayModel.d.ts +33 -0
- package/dist/memory/decay/DecayModel.d.ts.map +1 -1
- package/dist/memory/decay/DecayModel.js +31 -0
- package/dist/memory/decay/DecayModel.js.map +1 -1
- package/dist/memory/facade/Memory.d.ts +228 -0
- package/dist/memory/facade/Memory.d.ts.map +1 -0
- package/dist/memory/facade/Memory.js +823 -0
- package/dist/memory/facade/Memory.js.map +1 -0
- package/dist/memory/facade/index.d.ts +13 -0
- package/dist/memory/facade/index.d.ts.map +1 -0
- package/dist/memory/facade/index.js +11 -0
- package/dist/memory/facade/index.js.map +1 -0
- package/dist/memory/facade/types.d.ts +606 -0
- package/dist/memory/facade/types.d.ts.map +1 -0
- package/dist/memory/facade/types.js +11 -0
- package/dist/memory/facade/types.js.map +1 -0
- package/dist/memory/feedback/RetrievalFeedbackSignal.d.ts +132 -0
- package/dist/memory/feedback/RetrievalFeedbackSignal.d.ts.map +1 -0
- package/dist/memory/feedback/RetrievalFeedbackSignal.js +178 -0
- package/dist/memory/feedback/RetrievalFeedbackSignal.js.map +1 -0
- package/dist/memory/feedback/index.d.ts +13 -0
- package/dist/memory/feedback/index.d.ts.map +1 -0
- package/dist/memory/feedback/index.js +12 -0
- package/dist/memory/feedback/index.js.map +1 -0
- package/dist/memory/index.d.ts +22 -0
- package/dist/memory/index.d.ts.map +1 -1
- package/dist/memory/index.js +24 -0
- package/dist/memory/index.js.map +1 -1
- package/dist/memory/ingestion/ChunkingEngine.d.ts +143 -0
- package/dist/memory/ingestion/ChunkingEngine.d.ts.map +1 -0
- package/dist/memory/ingestion/ChunkingEngine.js +508 -0
- package/dist/memory/ingestion/ChunkingEngine.js.map +1 -0
- package/dist/memory/ingestion/DoclingLoader.d.ts +44 -0
- package/dist/memory/ingestion/DoclingLoader.d.ts.map +1 -0
- package/dist/memory/ingestion/DoclingLoader.js +228 -0
- package/dist/memory/ingestion/DoclingLoader.js.map +1 -0
- package/dist/memory/ingestion/DocxLoader.d.ts +37 -0
- package/dist/memory/ingestion/DocxLoader.d.ts.map +1 -0
- package/dist/memory/ingestion/DocxLoader.js +111 -0
- package/dist/memory/ingestion/DocxLoader.js.map +1 -0
- package/dist/memory/ingestion/FolderScanner.d.ts +116 -0
- package/dist/memory/ingestion/FolderScanner.d.ts.map +1 -0
- package/dist/memory/ingestion/FolderScanner.js +127 -0
- package/dist/memory/ingestion/FolderScanner.js.map +1 -0
- package/dist/memory/ingestion/HtmlLoader.d.ts +49 -0
- package/dist/memory/ingestion/HtmlLoader.d.ts.map +1 -0
- package/dist/memory/ingestion/HtmlLoader.js +202 -0
- package/dist/memory/ingestion/HtmlLoader.js.map +1 -0
- package/dist/memory/ingestion/IDocumentLoader.d.ts +63 -0
- package/dist/memory/ingestion/IDocumentLoader.d.ts.map +1 -0
- package/dist/memory/ingestion/IDocumentLoader.js +11 -0
- package/dist/memory/ingestion/IDocumentLoader.js.map +1 -0
- package/dist/memory/ingestion/LoaderRegistry.d.ts +140 -0
- package/dist/memory/ingestion/LoaderRegistry.d.ts.map +1 -0
- package/dist/memory/ingestion/LoaderRegistry.js +229 -0
- package/dist/memory/ingestion/LoaderRegistry.js.map +1 -0
- package/dist/memory/ingestion/MarkdownLoader.d.ts +50 -0
- package/dist/memory/ingestion/MarkdownLoader.d.ts.map +1 -0
- package/dist/memory/ingestion/MarkdownLoader.js +169 -0
- package/dist/memory/ingestion/MarkdownLoader.js.map +1 -0
- package/dist/memory/ingestion/MultimodalAggregator.d.ts +88 -0
- package/dist/memory/ingestion/MultimodalAggregator.d.ts.map +1 -0
- package/dist/memory/ingestion/MultimodalAggregator.js +96 -0
- package/dist/memory/ingestion/MultimodalAggregator.js.map +1 -0
- package/dist/memory/ingestion/OcrPdfLoader.d.ts +41 -0
- package/dist/memory/ingestion/OcrPdfLoader.d.ts.map +1 -0
- package/dist/memory/ingestion/OcrPdfLoader.js +149 -0
- package/dist/memory/ingestion/OcrPdfLoader.js.map +1 -0
- package/dist/memory/ingestion/PdfLoader.d.ts +78 -0
- package/dist/memory/ingestion/PdfLoader.d.ts.map +1 -0
- package/dist/memory/ingestion/PdfLoader.js +179 -0
- package/dist/memory/ingestion/PdfLoader.js.map +1 -0
- package/dist/memory/ingestion/TextLoader.d.ts +66 -0
- package/dist/memory/ingestion/TextLoader.d.ts.map +1 -0
- package/dist/memory/ingestion/TextLoader.js +207 -0
- package/dist/memory/ingestion/TextLoader.js.map +1 -0
- package/dist/memory/ingestion/UrlLoader.d.ts +95 -0
- package/dist/memory/ingestion/UrlLoader.d.ts.map +1 -0
- package/dist/memory/ingestion/UrlLoader.js +174 -0
- package/dist/memory/ingestion/UrlLoader.js.map +1 -0
- package/dist/memory/io/ChatGptImporter.d.ts +85 -0
- package/dist/memory/io/ChatGptImporter.d.ts.map +1 -0
- package/dist/memory/io/ChatGptImporter.js +231 -0
- package/dist/memory/io/ChatGptImporter.js.map +1 -0
- package/dist/memory/io/JsonExporter.d.ts +67 -0
- package/dist/memory/io/JsonExporter.d.ts.map +1 -0
- package/dist/memory/io/JsonExporter.js +132 -0
- package/dist/memory/io/JsonExporter.js.map +1 -0
- package/dist/memory/io/JsonImporter.d.ts +84 -0
- package/dist/memory/io/JsonImporter.d.ts.map +1 -0
- package/dist/memory/io/JsonImporter.js +234 -0
- package/dist/memory/io/JsonImporter.js.map +1 -0
- package/dist/memory/io/MarkdownExporter.d.ts +95 -0
- package/dist/memory/io/MarkdownExporter.d.ts.map +1 -0
- package/dist/memory/io/MarkdownExporter.js +130 -0
- package/dist/memory/io/MarkdownExporter.js.map +1 -0
- package/dist/memory/io/MarkdownImporter.d.ts +84 -0
- package/dist/memory/io/MarkdownImporter.d.ts.map +1 -0
- package/dist/memory/io/MarkdownImporter.js +166 -0
- package/dist/memory/io/MarkdownImporter.js.map +1 -0
- package/dist/memory/io/ObsidianExporter.d.ts +80 -0
- package/dist/memory/io/ObsidianExporter.d.ts.map +1 -0
- package/dist/memory/io/ObsidianExporter.js +127 -0
- package/dist/memory/io/ObsidianExporter.js.map +1 -0
- package/dist/memory/io/ObsidianImporter.d.ts +93 -0
- package/dist/memory/io/ObsidianImporter.d.ts.map +1 -0
- package/dist/memory/io/ObsidianImporter.js +221 -0
- package/dist/memory/io/ObsidianImporter.js.map +1 -0
- package/dist/memory/io/SqliteExporter.d.ts +47 -0
- package/dist/memory/io/SqliteExporter.d.ts.map +1 -0
- package/dist/memory/io/SqliteExporter.js +56 -0
- package/dist/memory/io/SqliteExporter.js.map +1 -0
- package/dist/memory/io/SqliteImporter.d.ts +82 -0
- package/dist/memory/io/SqliteImporter.d.ts.map +1 -0
- package/dist/memory/io/SqliteImporter.js +232 -0
- package/dist/memory/io/SqliteImporter.js.map +1 -0
- package/dist/memory/io/index.d.ts +31 -0
- package/dist/memory/io/index.d.ts.map +1 -0
- package/dist/memory/io/index.js +31 -0
- package/dist/memory/io/index.js.map +1 -0
- package/dist/memory/store/SqliteBrain.d.ts +125 -0
- package/dist/memory/store/SqliteBrain.d.ts.map +1 -0
- package/dist/memory/store/SqliteBrain.js +407 -0
- package/dist/memory/store/SqliteBrain.js.map +1 -0
- package/dist/memory/store/SqliteKnowledgeGraph.d.ts +259 -0
- package/dist/memory/store/SqliteKnowledgeGraph.d.ts.map +1 -0
- package/dist/memory/store/SqliteKnowledgeGraph.js +1062 -0
- package/dist/memory/store/SqliteKnowledgeGraph.js.map +1 -0
- package/dist/memory/store/SqliteMemoryGraph.d.ts +251 -0
- package/dist/memory/store/SqliteMemoryGraph.d.ts.map +1 -0
- package/dist/memory/store/SqliteMemoryGraph.js +637 -0
- package/dist/memory/store/SqliteMemoryGraph.js.map +1 -0
- package/dist/memory/tools/MemoryAddTool.d.ts +98 -0
- package/dist/memory/tools/MemoryAddTool.d.ts.map +1 -0
- package/dist/memory/tools/MemoryAddTool.js +131 -0
- package/dist/memory/tools/MemoryAddTool.js.map +1 -0
- package/dist/memory/tools/MemoryDeleteTool.d.ts +83 -0
- package/dist/memory/tools/MemoryDeleteTool.d.ts.map +1 -0
- package/dist/memory/tools/MemoryDeleteTool.js +96 -0
- package/dist/memory/tools/MemoryDeleteTool.js.map +1 -0
- package/dist/memory/tools/MemoryMergeTool.d.ts +95 -0
- package/dist/memory/tools/MemoryMergeTool.d.ts.map +1 -0
- package/dist/memory/tools/MemoryMergeTool.js +164 -0
- package/dist/memory/tools/MemoryMergeTool.js.map +1 -0
- package/dist/memory/tools/MemoryReflectTool.d.ts +86 -0
- package/dist/memory/tools/MemoryReflectTool.d.ts.map +1 -0
- package/dist/memory/tools/MemoryReflectTool.js +102 -0
- package/dist/memory/tools/MemoryReflectTool.js.map +1 -0
- package/dist/memory/tools/MemorySearchTool.d.ts +117 -0
- package/dist/memory/tools/MemorySearchTool.d.ts.map +1 -0
- package/dist/memory/tools/MemorySearchTool.js +162 -0
- package/dist/memory/tools/MemorySearchTool.js.map +1 -0
- package/dist/memory/tools/MemoryUpdateTool.d.ts +92 -0
- package/dist/memory/tools/MemoryUpdateTool.d.ts.map +1 -0
- package/dist/memory/tools/MemoryUpdateTool.js +125 -0
- package/dist/memory/tools/MemoryUpdateTool.js.map +1 -0
- package/dist/memory/tools/index.d.ts +32 -0
- package/dist/memory/tools/index.d.ts.map +1 -0
- package/dist/memory/tools/index.js +26 -0
- package/dist/memory/tools/index.js.map +1 -0
- package/package.json +6 -1
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview TextLoader — loads plain-text, CSV/TSV, JSON, and YAML files.
|
|
3
|
+
*
|
|
4
|
+
* This is the most general-purpose loader in the AgentOS ingestion pipeline.
|
|
5
|
+
* It handles six extensions that all share the same fundamental operation:
|
|
6
|
+
* read raw text and attach lightweight metadata derived from the file content
|
|
7
|
+
* and extension.
|
|
8
|
+
*
|
|
9
|
+
* Supported extensions: `.txt`, `.csv`, `.tsv`, `.json`, `.yaml`, `.yml`
|
|
10
|
+
*
|
|
11
|
+
* @module memory/ingestion/TextLoader
|
|
12
|
+
*/
|
|
13
|
+
import fs from 'node:fs/promises';
|
|
14
|
+
import path from 'node:path';
|
|
15
|
+
import { parse as parseYaml, stringify as stringifyYaml } from 'yaml';
|
|
16
|
+
// ---------------------------------------------------------------------------
|
|
17
|
+
// Constants
|
|
18
|
+
// ---------------------------------------------------------------------------
|
|
19
|
+
/** Extensions handled by this loader, all lower-cased with a leading dot. */
|
|
20
|
+
const SUPPORTED_EXTENSIONS = ['.txt', '.csv', '.tsv', '.json', '.yaml', '.yml'];
|
|
21
|
+
// ---------------------------------------------------------------------------
|
|
22
|
+
// Helpers
|
|
23
|
+
// ---------------------------------------------------------------------------
|
|
24
|
+
/**
|
|
25
|
+
* Returns the lower-cased extension (with dot) of a file path, or an empty
|
|
26
|
+
* string when the path has no extension.
|
|
27
|
+
*
|
|
28
|
+
* @param filePath - Absolute or relative file path string.
|
|
29
|
+
*/
|
|
30
|
+
function extOf(filePath) {
|
|
31
|
+
return path.extname(filePath).toLowerCase();
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Count the approximate number of words in a string.
|
|
35
|
+
*
|
|
36
|
+
* Splits on runs of whitespace — fast and allocation-light for the typical
|
|
37
|
+
* document sizes encountered during ingestion.
|
|
38
|
+
*
|
|
39
|
+
* @param text - Raw text to count.
|
|
40
|
+
*/
|
|
41
|
+
function wordCount(text) {
|
|
42
|
+
return text.trim() === '' ? 0 : text.trim().split(/\s+/).length;
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Maps a file extension to a human-readable format label returned inside
|
|
46
|
+
* `LoadedDocument.format`.
|
|
47
|
+
*
|
|
48
|
+
* @param ext - Lower-cased extension including leading dot.
|
|
49
|
+
*/
|
|
50
|
+
function formatLabel(ext) {
|
|
51
|
+
switch (ext) {
|
|
52
|
+
case '.txt':
|
|
53
|
+
return 'txt';
|
|
54
|
+
case '.csv':
|
|
55
|
+
return 'csv';
|
|
56
|
+
case '.tsv':
|
|
57
|
+
return 'tsv';
|
|
58
|
+
case '.json':
|
|
59
|
+
return 'json';
|
|
60
|
+
case '.yaml':
|
|
61
|
+
case '.yml':
|
|
62
|
+
return 'yaml';
|
|
63
|
+
default:
|
|
64
|
+
return 'txt';
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* For JSON files: parse, then re-serialise with two-space indentation.
|
|
69
|
+
*
|
|
70
|
+
* This makes the stored `content` more human-readable and consistent
|
|
71
|
+
* regardless of how the source file was originally formatted.
|
|
72
|
+
*
|
|
73
|
+
* Returns `raw` unchanged when parsing fails (e.g. when a `.json` file
|
|
74
|
+
* contains invalid JSON) so the loader never throws on bad input.
|
|
75
|
+
*
|
|
76
|
+
* @param raw - Raw UTF-8 content of the JSON file.
|
|
77
|
+
*/
|
|
78
|
+
function prettyJson(raw) {
|
|
79
|
+
try {
|
|
80
|
+
return JSON.stringify(JSON.parse(raw), null, 2);
|
|
81
|
+
}
|
|
82
|
+
catch {
|
|
83
|
+
// Invalid JSON — return raw text so the caller still gets something.
|
|
84
|
+
return raw;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
// ---------------------------------------------------------------------------
|
|
88
|
+
// TextLoader
|
|
89
|
+
// ---------------------------------------------------------------------------
|
|
90
|
+
/**
|
|
91
|
+
* Loader for plain-text, CSV, TSV, JSON, and YAML files.
|
|
92
|
+
*
|
|
93
|
+
* The loader performs minimal transformation:
|
|
94
|
+
* - **`.json`** — re-serialises with pretty-printing so stored content is
|
|
95
|
+
* consistently formatted.
|
|
96
|
+
* - **`.yaml` / `.yml`** — the `yaml` package is used to parse and re-dump
|
|
97
|
+
* for consistent formatting; falls back to raw text on parse error.
|
|
98
|
+
* - All other extensions — content is returned as-is.
|
|
99
|
+
*
|
|
100
|
+
* Metadata includes the approximate `wordCount` and a `format` label derived
|
|
101
|
+
* from the file extension.
|
|
102
|
+
*
|
|
103
|
+
* @implements {IDocumentLoader}
|
|
104
|
+
*
|
|
105
|
+
* @example
|
|
106
|
+
* ```ts
|
|
107
|
+
* const loader = new TextLoader();
|
|
108
|
+
* const doc = await loader.load('/data/notes.txt');
|
|
109
|
+
* console.log(doc.metadata.wordCount); // e.g. 312
|
|
110
|
+
* ```
|
|
111
|
+
*/
|
|
112
|
+
export class TextLoader {
|
|
113
|
+
constructor() {
|
|
114
|
+
/** @inheritdoc */
|
|
115
|
+
this.supportedExtensions = [...SUPPORTED_EXTENSIONS];
|
|
116
|
+
}
|
|
117
|
+
// -------------------------------------------------------------------------
|
|
118
|
+
// canLoad
|
|
119
|
+
// -------------------------------------------------------------------------
|
|
120
|
+
/** @inheritdoc */
|
|
121
|
+
canLoad(source) {
|
|
122
|
+
// Buffer sources: we have no extension to check, so we conservatively
|
|
123
|
+
// return false unless the caller passes a string path.
|
|
124
|
+
if (Buffer.isBuffer(source)) {
|
|
125
|
+
return false;
|
|
126
|
+
}
|
|
127
|
+
return SUPPORTED_EXTENSIONS.includes(extOf(source));
|
|
128
|
+
}
|
|
129
|
+
// -------------------------------------------------------------------------
|
|
130
|
+
// load
|
|
131
|
+
// -------------------------------------------------------------------------
|
|
132
|
+
/** @inheritdoc */
|
|
133
|
+
async load(source, _options) {
|
|
134
|
+
let raw;
|
|
135
|
+
let resolvedPath;
|
|
136
|
+
let ext;
|
|
137
|
+
if (Buffer.isBuffer(source)) {
|
|
138
|
+
// In-memory buffer: treat as plain text, no extension info available.
|
|
139
|
+
raw = source.toString('utf8');
|
|
140
|
+
ext = '.txt';
|
|
141
|
+
}
|
|
142
|
+
else {
|
|
143
|
+
// File path: read from disk.
|
|
144
|
+
resolvedPath = source;
|
|
145
|
+
ext = extOf(source);
|
|
146
|
+
const bytes = await fs.readFile(resolvedPath);
|
|
147
|
+
raw = bytes.toString('utf8');
|
|
148
|
+
}
|
|
149
|
+
// ---- Content normalisation ----
|
|
150
|
+
const content = this._normalise(raw, ext);
|
|
151
|
+
// ---- Metadata assembly ----
|
|
152
|
+
const meta = {
|
|
153
|
+
wordCount: wordCount(raw),
|
|
154
|
+
format: formatLabel(ext),
|
|
155
|
+
...(resolvedPath ? { source: resolvedPath } : {}),
|
|
156
|
+
};
|
|
157
|
+
return {
|
|
158
|
+
content,
|
|
159
|
+
metadata: meta,
|
|
160
|
+
format: formatLabel(ext),
|
|
161
|
+
};
|
|
162
|
+
}
|
|
163
|
+
// -------------------------------------------------------------------------
|
|
164
|
+
// Private helpers
|
|
165
|
+
// -------------------------------------------------------------------------
|
|
166
|
+
/**
|
|
167
|
+
* Normalises raw file content based on the detected extension.
|
|
168
|
+
*
|
|
169
|
+
* - JSON files are pretty-printed.
|
|
170
|
+
* - YAML files are parsed and re-dumped for consistent formatting.
|
|
171
|
+
* - All other formats are returned unchanged.
|
|
172
|
+
*
|
|
173
|
+
* @param raw - Raw UTF-8 string read from the source.
|
|
174
|
+
* @param ext - Lower-cased extension with leading dot.
|
|
175
|
+
*/
|
|
176
|
+
_normalise(raw, ext) {
|
|
177
|
+
switch (ext) {
|
|
178
|
+
case '.json':
|
|
179
|
+
return prettyJson(raw);
|
|
180
|
+
case '.yaml':
|
|
181
|
+
case '.yml':
|
|
182
|
+
return this._prettyYaml(raw);
|
|
183
|
+
default:
|
|
184
|
+
// .txt, .csv, .tsv — return raw content unchanged.
|
|
185
|
+
return raw;
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
/**
|
|
189
|
+
* Parse and re-serialise YAML content for consistent formatting.
|
|
190
|
+
*
|
|
191
|
+
* Uses the `yaml` package that is already a production dependency of the
|
|
192
|
+
* `@framers/agentos` package. Falls back to the original raw string on
|
|
193
|
+
* any parse error so the loader never throws on malformed YAML.
|
|
194
|
+
*
|
|
195
|
+
* @param raw - Raw YAML string.
|
|
196
|
+
*/
|
|
197
|
+
_prettyYaml(raw) {
|
|
198
|
+
try {
|
|
199
|
+
const parsed = parseYaml(raw);
|
|
200
|
+
return stringifyYaml(parsed);
|
|
201
|
+
}
|
|
202
|
+
catch {
|
|
203
|
+
return raw;
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
//# sourceMappingURL=TextLoader.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"TextLoader.js","sourceRoot":"","sources":["../../../src/memory/ingestion/TextLoader.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,OAAO,EAAE,MAAM,kBAAkB,CAAC;AAClC,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,KAAK,IAAI,SAAS,EAAE,SAAS,IAAI,aAAa,EAAE,MAAM,MAAM,CAAC;AAItE,8EAA8E;AAC9E,YAAY;AACZ,8EAA8E;AAE9E,6EAA6E;AAC7E,MAAM,oBAAoB,GAAG,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,CAAU,CAAC;AAKzF,8EAA8E;AAC9E,UAAU;AACV,8EAA8E;AAE9E;;;;;GAKG;AACH,SAAS,KAAK,CAAC,QAAgB;IAC7B,OAAO,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;AAC9C,CAAC;AAED;;;;;;;GAOG;AACH,SAAS,SAAS,CAAC,IAAY;IAC7B,OAAO,IAAI,CAAC,IAAI,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC;AAClE,CAAC;AAED;;;;;GAKG;AACH,SAAS,WAAW,CAAC,GAA0B;IAC7C,QAAQ,GAAmB,EAAE,CAAC;QAC5B,KAAK,MAAM;YACT,OAAO,KAAK,CAAC;QACf,KAAK,MAAM;YACT,OAAO,KAAK,CAAC;QACf,KAAK,MAAM;YACT,OAAO,KAAK,CAAC;QACf,KAAK,OAAO;YACV,OAAO,MAAM,CAAC;QAChB,KAAK,OAAO,CAAC;QACb,KAAK,MAAM;YACT,OAAO,MAAM,CAAC;QAChB;YACE,OAAO,KAAK,CAAC;IACjB,CAAC;AACH,CAAC;AAED;;;;;;;;;;GAUG;AACH,SAAS,UAAU,CAAC,GAAW;IAC7B,IAAI,CAAC;QACH,OAAO,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;IAClD,CAAC;IAAC,MAAM,CAAC;QACP,qEAAqE;QACrE,OAAO,GAAG,CAAC;IACb,CAAC;AACH,CAAC;AAED,8EAA8E;AAC9E,aAAa;AACb,8EAA8E;AAE9E;;;;;;;;;;;;;;;;;;;;;GAqBG;AACH,MAAM,OAAO,UAAU;IAAvB;QACE,kBAAkB;QACT,wBAAmB,GAAa,CAAC,GAAG,oBAAoB,CAAC,CAAC;IAqGrE,CAAC;IAnGC,4EAA4E;IAC5E,UAAU;IACV,4EAA4E;IAE5E,kBAAkB;IAClB,OAAO,CAAC,MAAuB;QAC7B,sEAAsE;QACtE,uDAAuD;QACvD,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YAC5B,OAAO,KAAK,CAAC;QACf,CAAC;QACD,OAAQ,oBAA0C,CAAC,QAAQ,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC;IAC7E,CAAC;IAED,4EAA4E;IAC5E,OAAO;IACP,4EAA4E;IAE5E,kBAAkB;IAClB,KAAK,CAAC,IAAI,CAAC,MAAuB,EAAE,QAAsB;QACxD,IAAI,GAAW,CAAC;QAChB,IAAI,YAAgC,CAAC;QACrC,IAAI,GAAW,CAAC;QAEhB,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YAC5B,sEAAsE;YACtE,GAAG,GAAG,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;YAC9B,GAAG,GAAG,MAAM,CAAC;QACf,CAAC;aAAM,CAAC;YACN,6BAA6B;YAC7B,YAAY,GAAG,MAAM,CAAC;YACtB,GAAG,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC;YACpB,MAAM,KAAK,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAC;YAC9C,GAAG,GAAG,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;QAC/B,CAAC;QAED,kCAAkC;QAClC,MAAM,OAAO,GAAG,IAAI,CAAC,UAAU,CAAC,GAAG,EAAE,GAAmB,CAAC,CAAC;QAE1D,8BAA8B;QAC9B,MAAM,IAAI,GAAqB;YAC7B,SAAS,EAAE,SAAS,CAAC,GAAG,CAAC;YACzB,MAAM,EAAE,WAAW,CAAC,GAAG,CAAC;YACxB,GAAG,CAAC,YAAY,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,YAAY,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;SAClD,CAAC;QAEF,OAAO;YACL,OAAO;YACP,QAAQ,EAAE,IAAI;YACd,MAAM,EAAE,WAAW,CAAC,GAAG,CAAC;SACzB,CAAC;IACJ,CAAC;IAED,4EAA4E;IAC5E,kBAAkB;IAClB,4EAA4E;IAE5E;;;;;;;;;OASG;IACK,UAAU,CAAC,GAAW,EAAE,GAA0B;QACxD,QAAQ,GAAmB,EAAE,CAAC;YAC5B,KAAK,OAAO;gBACV,OAAO,UAAU,CAAC,GAAG,CAAC,CAAC;YAEzB,KAAK,OAAO,CAAC;YACb,KAAK,MAAM;gBACT,OAAO,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;YAE/B;gBACE,mDAAmD;gBACnD,OAAO,GAAG,CAAC;QACf,CAAC;IACH,CAAC;IAED;;;;;;;;OAQG;IACK,WAAW,CAAC,GAAW;QAC7B,IAAI,CAAC;YACH,MAAM,MAAM,GAAY,SAAS,CAAC,GAAG,CAAC,CAAC;YACvC,OAAO,aAAa,CAAC,MAAM,CAAC,CAAC;QAC/B,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,GAAG,CAAC;QACb,CAAC;IACH,CAAC;CACF"}
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview UrlLoader — fetch-and-delegate loader for HTTP/HTTPS URLs.
|
|
3
|
+
*
|
|
4
|
+
* `UrlLoader` implements {@link IDocumentLoader} and handles `http://` and
|
|
5
|
+
* `https://` sources. It fetches the remote resource, inspects the
|
|
6
|
+
* `Content-Type` response header, and delegates to the most appropriate
|
|
7
|
+
* registered loader:
|
|
8
|
+
*
|
|
9
|
+
* - `text/html` → {@link HtmlLoader} (via the registry)
|
|
10
|
+
* - `application/pdf` → {@link PdfLoader} (via the registry)
|
|
11
|
+
* - Anything else → raw UTF-8 text, format `'text'`
|
|
12
|
+
*
|
|
13
|
+
* Because URLs have no file extension in the traditional sense,
|
|
14
|
+
* `supportedExtensions` is deliberately empty. Routing to `UrlLoader` must
|
|
15
|
+
* be done explicitly — either by calling `UrlLoader.load()` directly or by
|
|
16
|
+
* checking `UrlLoader.canLoad()` before dispatching.
|
|
17
|
+
*
|
|
18
|
+
* @module memory/ingestion/UrlLoader
|
|
19
|
+
*/
|
|
20
|
+
import type { IDocumentLoader } from './IDocumentLoader.js';
|
|
21
|
+
import type { LoadOptions, LoadedDocument } from '../facade/types.js';
|
|
22
|
+
import type { LoaderRegistry } from './LoaderRegistry.js';
|
|
23
|
+
/**
|
|
24
|
+
* An {@link IDocumentLoader} that fetches a remote URL and delegates parsing
|
|
25
|
+
* to the appropriate registered loader based on the response `Content-Type`.
|
|
26
|
+
*
|
|
27
|
+
* ### Supported content types
|
|
28
|
+
* | Content-Type | Delegates to |
|
|
29
|
+
* |-----------------------|-----------------------|
|
|
30
|
+
* | `text/html` | HtmlLoader (registry) |
|
|
31
|
+
* | `application/pdf` | PdfLoader (registry) |
|
|
32
|
+
* | Everything else | Plain UTF-8 text |
|
|
33
|
+
*
|
|
34
|
+
* ### Example
|
|
35
|
+
* ```ts
|
|
36
|
+
* const registry = new LoaderRegistry();
|
|
37
|
+
* const urlLoader = new UrlLoader(registry);
|
|
38
|
+
*
|
|
39
|
+
* // Register so the registry also dispatches URLs via canLoad checks.
|
|
40
|
+
* // (Optional — UrlLoader can be used standalone too.)
|
|
41
|
+
*
|
|
42
|
+
* if (urlLoader.canLoad('https://example.com/report.pdf')) {
|
|
43
|
+
* const doc = await urlLoader.load('https://example.com/report.pdf');
|
|
44
|
+
* console.log(doc.format); // 'pdf'
|
|
45
|
+
* }
|
|
46
|
+
* ```
|
|
47
|
+
*
|
|
48
|
+
* @implements {IDocumentLoader}
|
|
49
|
+
*/
|
|
50
|
+
export declare class UrlLoader implements IDocumentLoader {
|
|
51
|
+
private readonly registry;
|
|
52
|
+
/**
|
|
53
|
+
* URLs have no file extension so this array is always empty.
|
|
54
|
+
*
|
|
55
|
+
* Routing to this loader must be performed via {@link canLoad} rather than
|
|
56
|
+
* the registry's extension-based lookup.
|
|
57
|
+
*/
|
|
58
|
+
readonly supportedExtensions: string[];
|
|
59
|
+
/**
|
|
60
|
+
* @param registry - The {@link LoaderRegistry} used to resolve format-specific
|
|
61
|
+
* loaders once the remote content type is known.
|
|
62
|
+
*/
|
|
63
|
+
constructor(registry: LoaderRegistry);
|
|
64
|
+
/**
|
|
65
|
+
* Returns `true` when `source` is a string that starts with `http://` or
|
|
66
|
+
* `https://`.
|
|
67
|
+
*
|
|
68
|
+
* Buffer sources are always rejected — raw bytes cannot be a URL.
|
|
69
|
+
*
|
|
70
|
+
* @param source - Absolute file path, URL string, or raw bytes.
|
|
71
|
+
*/
|
|
72
|
+
canLoad(source: string | Buffer): boolean;
|
|
73
|
+
/**
|
|
74
|
+
* Fetch `source` over HTTP/HTTPS and return a {@link LoadedDocument}.
|
|
75
|
+
*
|
|
76
|
+
* The response body is buffered in memory and then handed to the appropriate
|
|
77
|
+
* sub-loader according to the `Content-Type` header:
|
|
78
|
+
*
|
|
79
|
+
* - `text/html` → fetched as text, passed to the HTML loader as a `Buffer`.
|
|
80
|
+
* - `application/pdf` → fetched as bytes, passed to the PDF loader as a
|
|
81
|
+
* `Buffer`.
|
|
82
|
+
* - Anything else → returned as plain text with format `'text'` and
|
|
83
|
+
* `source` metadata set to the URL.
|
|
84
|
+
*
|
|
85
|
+
* @param source - HTTP/HTTPS URL string.
|
|
86
|
+
* @param options - Optional load hints forwarded to the delegated loader.
|
|
87
|
+
* @returns A promise resolving to the {@link LoadedDocument}.
|
|
88
|
+
*
|
|
89
|
+
* @throws {Error} When `source` is a `Buffer` (URLs must be strings).
|
|
90
|
+
* @throws {Error} When the HTTP request fails (network error or non-2xx
|
|
91
|
+
* status).
|
|
92
|
+
*/
|
|
93
|
+
load(source: string | Buffer, options?: LoadOptions): Promise<LoadedDocument>;
|
|
94
|
+
}
|
|
95
|
+
//# sourceMappingURL=UrlLoader.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"UrlLoader.d.ts","sourceRoot":"","sources":["../../../src/memory/ingestion/UrlLoader.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAEH,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAC;AAC5D,OAAO,KAAK,EAAE,WAAW,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACtE,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AAa1D;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AACH,qBAAa,SAAU,YAAW,eAAe;IAanC,OAAO,CAAC,QAAQ,CAAC,QAAQ;IAZrC;;;;;OAKG;IACH,QAAQ,CAAC,mBAAmB,EAAE,MAAM,EAAE,CAAM;IAE5C;;;OAGG;gBAC0B,QAAQ,EAAE,cAAc;IAMrD;;;;;;;OAOG;IACH,OAAO,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,GAAG,OAAO;IASzC;;;;;;;;;;;;;;;;;;;OAmBG;IACG,IAAI,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,EAAE,OAAO,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,cAAc,CAAC;CAiFpF"}
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview UrlLoader — fetch-and-delegate loader for HTTP/HTTPS URLs.
|
|
3
|
+
*
|
|
4
|
+
* `UrlLoader` implements {@link IDocumentLoader} and handles `http://` and
|
|
5
|
+
* `https://` sources. It fetches the remote resource, inspects the
|
|
6
|
+
* `Content-Type` response header, and delegates to the most appropriate
|
|
7
|
+
* registered loader:
|
|
8
|
+
*
|
|
9
|
+
* - `text/html` → {@link HtmlLoader} (via the registry)
|
|
10
|
+
* - `application/pdf` → {@link PdfLoader} (via the registry)
|
|
11
|
+
* - Anything else → raw UTF-8 text, format `'text'`
|
|
12
|
+
*
|
|
13
|
+
* Because URLs have no file extension in the traditional sense,
|
|
14
|
+
* `supportedExtensions` is deliberately empty. Routing to `UrlLoader` must
|
|
15
|
+
* be done explicitly — either by calling `UrlLoader.load()` directly or by
|
|
16
|
+
* checking `UrlLoader.canLoad()` before dispatching.
|
|
17
|
+
*
|
|
18
|
+
* @module memory/ingestion/UrlLoader
|
|
19
|
+
*/
|
|
20
|
+
// ---------------------------------------------------------------------------
|
|
21
|
+
// Constants
|
|
22
|
+
// ---------------------------------------------------------------------------
|
|
23
|
+
/** Prefix patterns that identify an HTTP/HTTPS URL. */
|
|
24
|
+
const URL_PREFIXES = ['http://', 'https://'];
|
|
25
|
+
// ---------------------------------------------------------------------------
|
|
26
|
+
// UrlLoader
|
|
27
|
+
// ---------------------------------------------------------------------------
|
|
28
|
+
/**
|
|
29
|
+
* An {@link IDocumentLoader} that fetches a remote URL and delegates parsing
|
|
30
|
+
* to the appropriate registered loader based on the response `Content-Type`.
|
|
31
|
+
*
|
|
32
|
+
* ### Supported content types
|
|
33
|
+
* | Content-Type | Delegates to |
|
|
34
|
+
* |-----------------------|-----------------------|
|
|
35
|
+
* | `text/html` | HtmlLoader (registry) |
|
|
36
|
+
* | `application/pdf` | PdfLoader (registry) |
|
|
37
|
+
* | Everything else | Plain UTF-8 text |
|
|
38
|
+
*
|
|
39
|
+
* ### Example
|
|
40
|
+
* ```ts
|
|
41
|
+
* const registry = new LoaderRegistry();
|
|
42
|
+
* const urlLoader = new UrlLoader(registry);
|
|
43
|
+
*
|
|
44
|
+
* // Register so the registry also dispatches URLs via canLoad checks.
|
|
45
|
+
* // (Optional — UrlLoader can be used standalone too.)
|
|
46
|
+
*
|
|
47
|
+
* if (urlLoader.canLoad('https://example.com/report.pdf')) {
|
|
48
|
+
* const doc = await urlLoader.load('https://example.com/report.pdf');
|
|
49
|
+
* console.log(doc.format); // 'pdf'
|
|
50
|
+
* }
|
|
51
|
+
* ```
|
|
52
|
+
*
|
|
53
|
+
* @implements {IDocumentLoader}
|
|
54
|
+
*/
|
|
55
|
+
export class UrlLoader {
|
|
56
|
+
/**
|
|
57
|
+
* @param registry - The {@link LoaderRegistry} used to resolve format-specific
|
|
58
|
+
* loaders once the remote content type is known.
|
|
59
|
+
*/
|
|
60
|
+
constructor(registry) {
|
|
61
|
+
this.registry = registry;
|
|
62
|
+
/**
|
|
63
|
+
* URLs have no file extension so this array is always empty.
|
|
64
|
+
*
|
|
65
|
+
* Routing to this loader must be performed via {@link canLoad} rather than
|
|
66
|
+
* the registry's extension-based lookup.
|
|
67
|
+
*/
|
|
68
|
+
this.supportedExtensions = [];
|
|
69
|
+
}
|
|
70
|
+
// -------------------------------------------------------------------------
|
|
71
|
+
// canLoad
|
|
72
|
+
// -------------------------------------------------------------------------
|
|
73
|
+
/**
|
|
74
|
+
* Returns `true` when `source` is a string that starts with `http://` or
|
|
75
|
+
* `https://`.
|
|
76
|
+
*
|
|
77
|
+
* Buffer sources are always rejected — raw bytes cannot be a URL.
|
|
78
|
+
*
|
|
79
|
+
* @param source - Absolute file path, URL string, or raw bytes.
|
|
80
|
+
*/
|
|
81
|
+
canLoad(source) {
|
|
82
|
+
if (Buffer.isBuffer(source))
|
|
83
|
+
return false;
|
|
84
|
+
return URL_PREFIXES.some((prefix) => source.startsWith(prefix));
|
|
85
|
+
}
|
|
86
|
+
// -------------------------------------------------------------------------
|
|
87
|
+
// load
|
|
88
|
+
// -------------------------------------------------------------------------
|
|
89
|
+
/**
|
|
90
|
+
* Fetch `source` over HTTP/HTTPS and return a {@link LoadedDocument}.
|
|
91
|
+
*
|
|
92
|
+
* The response body is buffered in memory and then handed to the appropriate
|
|
93
|
+
* sub-loader according to the `Content-Type` header:
|
|
94
|
+
*
|
|
95
|
+
* - `text/html` → fetched as text, passed to the HTML loader as a `Buffer`.
|
|
96
|
+
* - `application/pdf` → fetched as bytes, passed to the PDF loader as a
|
|
97
|
+
* `Buffer`.
|
|
98
|
+
* - Anything else → returned as plain text with format `'text'` and
|
|
99
|
+
* `source` metadata set to the URL.
|
|
100
|
+
*
|
|
101
|
+
* @param source - HTTP/HTTPS URL string.
|
|
102
|
+
* @param options - Optional load hints forwarded to the delegated loader.
|
|
103
|
+
* @returns A promise resolving to the {@link LoadedDocument}.
|
|
104
|
+
*
|
|
105
|
+
* @throws {Error} When `source` is a `Buffer` (URLs must be strings).
|
|
106
|
+
* @throws {Error} When the HTTP request fails (network error or non-2xx
|
|
107
|
+
* status).
|
|
108
|
+
*/
|
|
109
|
+
async load(source, options) {
|
|
110
|
+
if (Buffer.isBuffer(source)) {
|
|
111
|
+
throw new Error('UrlLoader: source must be a URL string, not a Buffer.');
|
|
112
|
+
}
|
|
113
|
+
const url = source;
|
|
114
|
+
// Fetch the remote resource.
|
|
115
|
+
const response = await fetch(url);
|
|
116
|
+
if (!response.ok) {
|
|
117
|
+
throw new Error(`UrlLoader: HTTP ${response.status} ${response.statusText} for URL "${url}".`);
|
|
118
|
+
}
|
|
119
|
+
// Determine content type from the response header, stripping parameters
|
|
120
|
+
// such as `; charset=utf-8`.
|
|
121
|
+
const contentTypeHeader = response.headers.get('content-type') ?? '';
|
|
122
|
+
const contentType = contentTypeHeader.split(';')[0].trim().toLowerCase();
|
|
123
|
+
// ------------------------------------------------------------------
|
|
124
|
+
// Delegate based on content type.
|
|
125
|
+
// ------------------------------------------------------------------
|
|
126
|
+
if (contentType.includes('text/html')) {
|
|
127
|
+
// Fetch as text and pass as a UTF-8 Buffer to the HTML loader.
|
|
128
|
+
const text = await response.text();
|
|
129
|
+
const htmlBuffer = Buffer.from(text, 'utf8');
|
|
130
|
+
const htmlLoader = this.registry.getLoader('.html');
|
|
131
|
+
if (htmlLoader) {
|
|
132
|
+
const doc = await htmlLoader.load(htmlBuffer, options);
|
|
133
|
+
// Attach the URL as the source metadata since the loader receives a
|
|
134
|
+
// Buffer and cannot derive the origin URL itself.
|
|
135
|
+
return {
|
|
136
|
+
...doc,
|
|
137
|
+
metadata: { ...doc.metadata, source: url },
|
|
138
|
+
};
|
|
139
|
+
}
|
|
140
|
+
// Fallback: return raw HTML text if no HTML loader is registered.
|
|
141
|
+
return {
|
|
142
|
+
content: text,
|
|
143
|
+
metadata: { source: url, wordCount: text.trim().split(/\s+/).length },
|
|
144
|
+
format: 'html',
|
|
145
|
+
};
|
|
146
|
+
}
|
|
147
|
+
if (contentType.includes('application/pdf')) {
|
|
148
|
+
// Fetch as bytes and pass as a Buffer to the PDF loader.
|
|
149
|
+
const bytes = await response.arrayBuffer();
|
|
150
|
+
const pdfBuffer = Buffer.from(bytes);
|
|
151
|
+
const pdfLoader = this.registry.getLoader('.pdf');
|
|
152
|
+
if (pdfLoader) {
|
|
153
|
+
const doc = await pdfLoader.load(pdfBuffer, options);
|
|
154
|
+
return {
|
|
155
|
+
...doc,
|
|
156
|
+
metadata: { ...doc.metadata, source: url },
|
|
157
|
+
};
|
|
158
|
+
}
|
|
159
|
+
// Fallback: cannot parse PDF without a loader.
|
|
160
|
+
throw new Error(`UrlLoader: received application/pdf from "${url}" but no PDF loader is registered.`);
|
|
161
|
+
}
|
|
162
|
+
// Default: treat the response body as plain UTF-8 text.
|
|
163
|
+
const text = await response.text();
|
|
164
|
+
return {
|
|
165
|
+
content: text,
|
|
166
|
+
metadata: {
|
|
167
|
+
source: url,
|
|
168
|
+
wordCount: text.trim() === '' ? 0 : text.trim().split(/\s+/).length,
|
|
169
|
+
},
|
|
170
|
+
format: 'text',
|
|
171
|
+
};
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
//# sourceMappingURL=UrlLoader.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"UrlLoader.js","sourceRoot":"","sources":["../../../src/memory/ingestion/UrlLoader.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAMH,8EAA8E;AAC9E,YAAY;AACZ,8EAA8E;AAE9E,uDAAuD;AACvD,MAAM,YAAY,GAAG,CAAC,SAAS,EAAE,UAAU,CAAU,CAAC;AAEtD,8EAA8E;AAC9E,YAAY;AACZ,8EAA8E;AAE9E;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AACH,MAAM,OAAO,SAAS;IASpB;;;OAGG;IACH,YAA6B,QAAwB;QAAxB,aAAQ,GAAR,QAAQ,CAAgB;QAZrD;;;;;WAKG;QACM,wBAAmB,GAAa,EAAE,CAAC;IAMY,CAAC;IAEzD,4EAA4E;IAC5E,UAAU;IACV,4EAA4E;IAE5E;;;;;;;OAOG;IACH,OAAO,CAAC,MAAuB;QAC7B,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC;YAAE,OAAO,KAAK,CAAC;QAC1C,OAAO,YAAY,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC;IAClE,CAAC;IAED,4EAA4E;IAC5E,OAAO;IACP,4EAA4E;IAE5E;;;;;;;;;;;;;;;;;;;OAmBG;IACH,KAAK,CAAC,IAAI,CAAC,MAAuB,EAAE,OAAqB;QACvD,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YAC5B,MAAM,IAAI,KAAK,CAAC,uDAAuD,CAAC,CAAC;QAC3E,CAAC;QAED,MAAM,GAAG,GAAG,MAAM,CAAC;QAEnB,6BAA6B;QAC7B,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,CAAC;QAElC,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,IAAI,KAAK,CACb,mBAAmB,QAAQ,CAAC,MAAM,IAAI,QAAQ,CAAC,UAAU,aAAa,GAAG,IAAI,CAC9E,CAAC;QACJ,CAAC;QAED,wEAAwE;QACxE,6BAA6B;QAC7B,MAAM,iBAAiB,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,EAAE,CAAC;QACrE,MAAM,WAAW,GAAG,iBAAiB,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QAEzE,qEAAqE;QACrE,kCAAkC;QAClC,qEAAqE;QAErE,IAAI,WAAW,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC;YACtC,+DAA+D;YAC/D,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YACnC,MAAM,UAAU,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;YAE7C,MAAM,UAAU,GAAG,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;YACpD,IAAI,UAAU,EAAE,CAAC;gBACf,MAAM,GAAG,GAAG,MAAM,UAAU,CAAC,IAAI,CAAC,UAAU,EAAE,OAAO,CAAC,CAAC;gBACvD,oEAAoE;gBACpE,kDAAkD;gBAClD,OAAO;oBACL,GAAG,GAAG;oBACN,QAAQ,EAAE,EAAE,GAAG,GAAG,CAAC,QAAQ,EAAE,MAAM,EAAE,GAAG,EAAE;iBAC3C,CAAC;YACJ,CAAC;YAED,kEAAkE;YAClE,OAAO;gBACL,OAAO,EAAE,IAAI;gBACb,QAAQ,EAAE,EAAE,MAAM,EAAE,GAAG,EAAE,SAAS,EAAE,IAAI,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,EAAE;gBACrE,MAAM,EAAE,MAAM;aACf,CAAC;QACJ,CAAC;QAED,IAAI,WAAW,CAAC,QAAQ,CAAC,iBAAiB,CAAC,EAAE,CAAC;YAC5C,yDAAyD;YACzD,MAAM,KAAK,GAAG,MAAM,QAAQ,CAAC,WAAW,EAAE,CAAC;YAC3C,MAAM,SAAS,GAAG,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YAErC,MAAM,SAAS,GAAG,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;YAClD,IAAI,SAAS,EAAE,CAAC;gBACd,MAAM,GAAG,GAAG,MAAM,SAAS,CAAC,IAAI,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;gBACrD,OAAO;oBACL,GAAG,GAAG;oBACN,QAAQ,EAAE,EAAE,GAAG,GAAG,CAAC,QAAQ,EAAE,MAAM,EAAE,GAAG,EAAE;iBAC3C,CAAC;YACJ,CAAC;YAED,+CAA+C;YAC/C,MAAM,IAAI,KAAK,CACb,6CAA6C,GAAG,oCAAoC,CACrF,CAAC;QACJ,CAAC;QAED,wDAAwD;QACxD,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QAEnC,OAAO;YACL,OAAO,EAAE,IAAI;YACb,QAAQ,EAAE;gBACR,MAAM,EAAE,GAAG;gBACX,SAAS,EAAE,IAAI,CAAC,IAAI,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM;aACpE;YACD,MAAM,EAAE,MAAM;SACf,CAAC;IACJ,CAAC;CACF"}
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview ChatGPT export importer for AgentOS memory brain.
|
|
3
|
+
*
|
|
4
|
+
* Parses the `conversations.json` file produced by ChatGPT's "Export data"
|
|
5
|
+
* feature and imports each conversation into the target `SqliteBrain`.
|
|
6
|
+
*
|
|
7
|
+
* ## Import strategy
|
|
8
|
+
*
|
|
9
|
+
* For each conversation object in `conversations.json`:
|
|
10
|
+
* 1. A row is inserted into the `conversations` table (deduped by title + created_at).
|
|
11
|
+
* 2. Every user/assistant message pair is encoded as an **episodic memory trace**
|
|
12
|
+
* whose content is `"[user]: {user_text}\n[assistant]: {assistant_text}"`.
|
|
13
|
+
* This preserves conversational context in a single retrievable unit.
|
|
14
|
+
* 3. System messages and tool messages are skipped (they are not episodic memories).
|
|
15
|
+
*
|
|
16
|
+
* ## ChatGPT export format
|
|
17
|
+
* ```json
|
|
18
|
+
* [
|
|
19
|
+
* {
|
|
20
|
+
* "title": "Conversation title",
|
|
21
|
+
* "create_time": 1711234567.89,
|
|
22
|
+
* "mapping": {
|
|
23
|
+
* "node-id": {
|
|
24
|
+
* "message": {
|
|
25
|
+
* "author": { "role": "user" },
|
|
26
|
+
* "content": { "parts": ["Hello!"] }
|
|
27
|
+
* },
|
|
28
|
+
* "children": ["next-node-id"]
|
|
29
|
+
* }
|
|
30
|
+
* }
|
|
31
|
+
* }
|
|
32
|
+
* ]
|
|
33
|
+
* ```
|
|
34
|
+
*
|
|
35
|
+
* @module memory/io/ChatGptImporter
|
|
36
|
+
*/
|
|
37
|
+
import type { ImportResult } from '../facade/types.js';
|
|
38
|
+
import type { SqliteBrain } from '../store/SqliteBrain.js';
|
|
39
|
+
/**
|
|
40
|
+
* Imports a ChatGPT `conversations.json` export into a `SqliteBrain`.
|
|
41
|
+
*
|
|
42
|
+
* **Usage:**
|
|
43
|
+
* ```ts
|
|
44
|
+
* const importer = new ChatGptImporter(brain);
|
|
45
|
+
* const result = await importer.import('/path/to/conversations.json');
|
|
46
|
+
* ```
|
|
47
|
+
*/
|
|
48
|
+
export declare class ChatGptImporter {
|
|
49
|
+
private readonly brain;
|
|
50
|
+
/**
|
|
51
|
+
* @param brain - The target `SqliteBrain` to import into.
|
|
52
|
+
*/
|
|
53
|
+
constructor(brain: SqliteBrain);
|
|
54
|
+
/**
|
|
55
|
+
* Parse `conversations.json` and import all conversations and message pairs.
|
|
56
|
+
*
|
|
57
|
+
* @param sourcePath - Absolute path to the ChatGPT `conversations.json` file.
|
|
58
|
+
* @returns `ImportResult` with counts of imported traces, skipped duplicates,
|
|
59
|
+
* and any per-item error messages.
|
|
60
|
+
*/
|
|
61
|
+
import(sourcePath: string): Promise<ImportResult>;
|
|
62
|
+
/**
|
|
63
|
+
* Import a single ChatGPT conversation object.
|
|
64
|
+
*
|
|
65
|
+
* Creates a `conversations` row and then iterates through the message
|
|
66
|
+
* mapping in tree order (BFS), pairing adjacent user/assistant messages
|
|
67
|
+
* into episodic memory traces.
|
|
68
|
+
*
|
|
69
|
+
* @param convo - Raw ChatGPT conversation object.
|
|
70
|
+
* @param result - Mutable result accumulator.
|
|
71
|
+
*/
|
|
72
|
+
private _importConversation;
|
|
73
|
+
/**
|
|
74
|
+
* Insert a single episodic memory trace derived from a message pair.
|
|
75
|
+
*
|
|
76
|
+
* Deduplication is based on SHA-256 of the combined `content` string.
|
|
77
|
+
*
|
|
78
|
+
* @param content - The `[user]:...\n[assistant]:...` content string.
|
|
79
|
+
* @param createdAt - Unix timestamp (ms) of the user message.
|
|
80
|
+
* @param conversationId - ID of the parent conversation row.
|
|
81
|
+
* @param result - Mutable result accumulator.
|
|
82
|
+
*/
|
|
83
|
+
private _insertEpisodicTrace;
|
|
84
|
+
}
|
|
85
|
+
//# sourceMappingURL=ChatGptImporter.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ChatGptImporter.d.ts","sourceRoot":"","sources":["../../../src/memory/io/ChatGptImporter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAmCG;AAKH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AACvD,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,yBAAyB,CAAC;AA4C3D;;;;;;;;GAQG;AACH,qBAAa,eAAe;IAId,OAAO,CAAC,QAAQ,CAAC,KAAK;IAHlC;;OAEG;gBAC0B,KAAK,EAAE,WAAW;IAM/C;;;;;;OAMG;IACG,MAAM,CAAC,UAAU,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC;IAyCvD;;;;;;;;;OASG;IACH,OAAO,CAAC,mBAAmB;IAuG3B;;;;;;;;;OASG;IACH,OAAO,CAAC,oBAAoB;CA4C7B"}
|