@framers/agentos 0.1.101 → 0.1.102

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (176) hide show
  1. package/README.md +16 -0
  2. package/dist/memory/config.d.ts +39 -0
  3. package/dist/memory/config.d.ts.map +1 -1
  4. package/dist/memory/config.js.map +1 -1
  5. package/dist/memory/consolidation/ConsolidationLoop.d.ts +177 -0
  6. package/dist/memory/consolidation/ConsolidationLoop.d.ts.map +1 -0
  7. package/dist/memory/consolidation/ConsolidationLoop.js +517 -0
  8. package/dist/memory/consolidation/ConsolidationLoop.js.map +1 -0
  9. package/dist/memory/consolidation/ConsolidationPipeline.d.ts.map +1 -1
  10. package/dist/memory/consolidation/ConsolidationPipeline.js +7 -0
  11. package/dist/memory/consolidation/ConsolidationPipeline.js.map +1 -1
  12. package/dist/memory/consolidation/index.d.ts +8 -0
  13. package/dist/memory/consolidation/index.d.ts.map +1 -0
  14. package/dist/memory/consolidation/index.js +7 -0
  15. package/dist/memory/consolidation/index.js.map +1 -0
  16. package/dist/memory/decay/DecayModel.d.ts +33 -0
  17. package/dist/memory/decay/DecayModel.d.ts.map +1 -1
  18. package/dist/memory/decay/DecayModel.js +31 -0
  19. package/dist/memory/decay/DecayModel.js.map +1 -1
  20. package/dist/memory/facade/Memory.d.ts +228 -0
  21. package/dist/memory/facade/Memory.d.ts.map +1 -0
  22. package/dist/memory/facade/Memory.js +823 -0
  23. package/dist/memory/facade/Memory.js.map +1 -0
  24. package/dist/memory/facade/index.d.ts +13 -0
  25. package/dist/memory/facade/index.d.ts.map +1 -0
  26. package/dist/memory/facade/index.js +11 -0
  27. package/dist/memory/facade/index.js.map +1 -0
  28. package/dist/memory/facade/types.d.ts +606 -0
  29. package/dist/memory/facade/types.d.ts.map +1 -0
  30. package/dist/memory/facade/types.js +11 -0
  31. package/dist/memory/facade/types.js.map +1 -0
  32. package/dist/memory/feedback/RetrievalFeedbackSignal.d.ts +132 -0
  33. package/dist/memory/feedback/RetrievalFeedbackSignal.d.ts.map +1 -0
  34. package/dist/memory/feedback/RetrievalFeedbackSignal.js +178 -0
  35. package/dist/memory/feedback/RetrievalFeedbackSignal.js.map +1 -0
  36. package/dist/memory/feedback/index.d.ts +13 -0
  37. package/dist/memory/feedback/index.d.ts.map +1 -0
  38. package/dist/memory/feedback/index.js +12 -0
  39. package/dist/memory/feedback/index.js.map +1 -0
  40. package/dist/memory/index.d.ts +22 -0
  41. package/dist/memory/index.d.ts.map +1 -1
  42. package/dist/memory/index.js +24 -0
  43. package/dist/memory/index.js.map +1 -1
  44. package/dist/memory/ingestion/ChunkingEngine.d.ts +143 -0
  45. package/dist/memory/ingestion/ChunkingEngine.d.ts.map +1 -0
  46. package/dist/memory/ingestion/ChunkingEngine.js +508 -0
  47. package/dist/memory/ingestion/ChunkingEngine.js.map +1 -0
  48. package/dist/memory/ingestion/DoclingLoader.d.ts +44 -0
  49. package/dist/memory/ingestion/DoclingLoader.d.ts.map +1 -0
  50. package/dist/memory/ingestion/DoclingLoader.js +228 -0
  51. package/dist/memory/ingestion/DoclingLoader.js.map +1 -0
  52. package/dist/memory/ingestion/DocxLoader.d.ts +37 -0
  53. package/dist/memory/ingestion/DocxLoader.d.ts.map +1 -0
  54. package/dist/memory/ingestion/DocxLoader.js +111 -0
  55. package/dist/memory/ingestion/DocxLoader.js.map +1 -0
  56. package/dist/memory/ingestion/FolderScanner.d.ts +116 -0
  57. package/dist/memory/ingestion/FolderScanner.d.ts.map +1 -0
  58. package/dist/memory/ingestion/FolderScanner.js +127 -0
  59. package/dist/memory/ingestion/FolderScanner.js.map +1 -0
  60. package/dist/memory/ingestion/HtmlLoader.d.ts +49 -0
  61. package/dist/memory/ingestion/HtmlLoader.d.ts.map +1 -0
  62. package/dist/memory/ingestion/HtmlLoader.js +202 -0
  63. package/dist/memory/ingestion/HtmlLoader.js.map +1 -0
  64. package/dist/memory/ingestion/IDocumentLoader.d.ts +63 -0
  65. package/dist/memory/ingestion/IDocumentLoader.d.ts.map +1 -0
  66. package/dist/memory/ingestion/IDocumentLoader.js +11 -0
  67. package/dist/memory/ingestion/IDocumentLoader.js.map +1 -0
  68. package/dist/memory/ingestion/LoaderRegistry.d.ts +140 -0
  69. package/dist/memory/ingestion/LoaderRegistry.d.ts.map +1 -0
  70. package/dist/memory/ingestion/LoaderRegistry.js +229 -0
  71. package/dist/memory/ingestion/LoaderRegistry.js.map +1 -0
  72. package/dist/memory/ingestion/MarkdownLoader.d.ts +50 -0
  73. package/dist/memory/ingestion/MarkdownLoader.d.ts.map +1 -0
  74. package/dist/memory/ingestion/MarkdownLoader.js +169 -0
  75. package/dist/memory/ingestion/MarkdownLoader.js.map +1 -0
  76. package/dist/memory/ingestion/MultimodalAggregator.d.ts +88 -0
  77. package/dist/memory/ingestion/MultimodalAggregator.d.ts.map +1 -0
  78. package/dist/memory/ingestion/MultimodalAggregator.js +96 -0
  79. package/dist/memory/ingestion/MultimodalAggregator.js.map +1 -0
  80. package/dist/memory/ingestion/OcrPdfLoader.d.ts +41 -0
  81. package/dist/memory/ingestion/OcrPdfLoader.d.ts.map +1 -0
  82. package/dist/memory/ingestion/OcrPdfLoader.js +149 -0
  83. package/dist/memory/ingestion/OcrPdfLoader.js.map +1 -0
  84. package/dist/memory/ingestion/PdfLoader.d.ts +78 -0
  85. package/dist/memory/ingestion/PdfLoader.d.ts.map +1 -0
  86. package/dist/memory/ingestion/PdfLoader.js +179 -0
  87. package/dist/memory/ingestion/PdfLoader.js.map +1 -0
  88. package/dist/memory/ingestion/TextLoader.d.ts +66 -0
  89. package/dist/memory/ingestion/TextLoader.d.ts.map +1 -0
  90. package/dist/memory/ingestion/TextLoader.js +207 -0
  91. package/dist/memory/ingestion/TextLoader.js.map +1 -0
  92. package/dist/memory/ingestion/UrlLoader.d.ts +95 -0
  93. package/dist/memory/ingestion/UrlLoader.d.ts.map +1 -0
  94. package/dist/memory/ingestion/UrlLoader.js +174 -0
  95. package/dist/memory/ingestion/UrlLoader.js.map +1 -0
  96. package/dist/memory/io/ChatGptImporter.d.ts +85 -0
  97. package/dist/memory/io/ChatGptImporter.d.ts.map +1 -0
  98. package/dist/memory/io/ChatGptImporter.js +231 -0
  99. package/dist/memory/io/ChatGptImporter.js.map +1 -0
  100. package/dist/memory/io/JsonExporter.d.ts +67 -0
  101. package/dist/memory/io/JsonExporter.d.ts.map +1 -0
  102. package/dist/memory/io/JsonExporter.js +132 -0
  103. package/dist/memory/io/JsonExporter.js.map +1 -0
  104. package/dist/memory/io/JsonImporter.d.ts +84 -0
  105. package/dist/memory/io/JsonImporter.d.ts.map +1 -0
  106. package/dist/memory/io/JsonImporter.js +234 -0
  107. package/dist/memory/io/JsonImporter.js.map +1 -0
  108. package/dist/memory/io/MarkdownExporter.d.ts +95 -0
  109. package/dist/memory/io/MarkdownExporter.d.ts.map +1 -0
  110. package/dist/memory/io/MarkdownExporter.js +130 -0
  111. package/dist/memory/io/MarkdownExporter.js.map +1 -0
  112. package/dist/memory/io/MarkdownImporter.d.ts +84 -0
  113. package/dist/memory/io/MarkdownImporter.d.ts.map +1 -0
  114. package/dist/memory/io/MarkdownImporter.js +166 -0
  115. package/dist/memory/io/MarkdownImporter.js.map +1 -0
  116. package/dist/memory/io/ObsidianExporter.d.ts +80 -0
  117. package/dist/memory/io/ObsidianExporter.d.ts.map +1 -0
  118. package/dist/memory/io/ObsidianExporter.js +127 -0
  119. package/dist/memory/io/ObsidianExporter.js.map +1 -0
  120. package/dist/memory/io/ObsidianImporter.d.ts +93 -0
  121. package/dist/memory/io/ObsidianImporter.d.ts.map +1 -0
  122. package/dist/memory/io/ObsidianImporter.js +221 -0
  123. package/dist/memory/io/ObsidianImporter.js.map +1 -0
  124. package/dist/memory/io/SqliteExporter.d.ts +47 -0
  125. package/dist/memory/io/SqliteExporter.d.ts.map +1 -0
  126. package/dist/memory/io/SqliteExporter.js +56 -0
  127. package/dist/memory/io/SqliteExporter.js.map +1 -0
  128. package/dist/memory/io/SqliteImporter.d.ts +82 -0
  129. package/dist/memory/io/SqliteImporter.d.ts.map +1 -0
  130. package/dist/memory/io/SqliteImporter.js +232 -0
  131. package/dist/memory/io/SqliteImporter.js.map +1 -0
  132. package/dist/memory/io/index.d.ts +31 -0
  133. package/dist/memory/io/index.d.ts.map +1 -0
  134. package/dist/memory/io/index.js +31 -0
  135. package/dist/memory/io/index.js.map +1 -0
  136. package/dist/memory/store/SqliteBrain.d.ts +125 -0
  137. package/dist/memory/store/SqliteBrain.d.ts.map +1 -0
  138. package/dist/memory/store/SqliteBrain.js +407 -0
  139. package/dist/memory/store/SqliteBrain.js.map +1 -0
  140. package/dist/memory/store/SqliteKnowledgeGraph.d.ts +259 -0
  141. package/dist/memory/store/SqliteKnowledgeGraph.d.ts.map +1 -0
  142. package/dist/memory/store/SqliteKnowledgeGraph.js +1062 -0
  143. package/dist/memory/store/SqliteKnowledgeGraph.js.map +1 -0
  144. package/dist/memory/store/SqliteMemoryGraph.d.ts +251 -0
  145. package/dist/memory/store/SqliteMemoryGraph.d.ts.map +1 -0
  146. package/dist/memory/store/SqliteMemoryGraph.js +637 -0
  147. package/dist/memory/store/SqliteMemoryGraph.js.map +1 -0
  148. package/dist/memory/tools/MemoryAddTool.d.ts +98 -0
  149. package/dist/memory/tools/MemoryAddTool.d.ts.map +1 -0
  150. package/dist/memory/tools/MemoryAddTool.js +131 -0
  151. package/dist/memory/tools/MemoryAddTool.js.map +1 -0
  152. package/dist/memory/tools/MemoryDeleteTool.d.ts +83 -0
  153. package/dist/memory/tools/MemoryDeleteTool.d.ts.map +1 -0
  154. package/dist/memory/tools/MemoryDeleteTool.js +96 -0
  155. package/dist/memory/tools/MemoryDeleteTool.js.map +1 -0
  156. package/dist/memory/tools/MemoryMergeTool.d.ts +95 -0
  157. package/dist/memory/tools/MemoryMergeTool.d.ts.map +1 -0
  158. package/dist/memory/tools/MemoryMergeTool.js +164 -0
  159. package/dist/memory/tools/MemoryMergeTool.js.map +1 -0
  160. package/dist/memory/tools/MemoryReflectTool.d.ts +86 -0
  161. package/dist/memory/tools/MemoryReflectTool.d.ts.map +1 -0
  162. package/dist/memory/tools/MemoryReflectTool.js +102 -0
  163. package/dist/memory/tools/MemoryReflectTool.js.map +1 -0
  164. package/dist/memory/tools/MemorySearchTool.d.ts +117 -0
  165. package/dist/memory/tools/MemorySearchTool.d.ts.map +1 -0
  166. package/dist/memory/tools/MemorySearchTool.js +162 -0
  167. package/dist/memory/tools/MemorySearchTool.js.map +1 -0
  168. package/dist/memory/tools/MemoryUpdateTool.d.ts +92 -0
  169. package/dist/memory/tools/MemoryUpdateTool.d.ts.map +1 -0
  170. package/dist/memory/tools/MemoryUpdateTool.js +125 -0
  171. package/dist/memory/tools/MemoryUpdateTool.js.map +1 -0
  172. package/dist/memory/tools/index.d.ts +32 -0
  173. package/dist/memory/tools/index.d.ts.map +1 -0
  174. package/dist/memory/tools/index.js +26 -0
  175. package/dist/memory/tools/index.js.map +1 -0
  176. package/package.json +6 -1
@@ -0,0 +1,49 @@
1
+ /**
2
+ * @fileoverview HtmlLoader — basic HTML-to-text document loader.
3
+ *
4
+ * Converts `.html` and `.htm` files into plain text using lightweight regex
5
+ * transformations. This is intentionally a *simple* loader — it covers the
6
+ * common case of stripping tag soup and decoding standard HTML entities. For
7
+ * complex documents (nested frames, JavaScript-rendered content) a headless
8
+ * browser or DOM-parsing library would be more appropriate.
9
+ *
10
+ * Supported extensions: `.html`, `.htm`
11
+ *
12
+ * @module memory/ingestion/HtmlLoader
13
+ */
14
+ import type { IDocumentLoader } from './IDocumentLoader.js';
15
+ import type { LoadOptions, LoadedDocument } from '../facade/types.js';
16
+ /**
17
+ * Basic document loader for HTML (`.html`, `.htm`) files.
18
+ *
19
+ * ### Text extraction strategy
20
+ * 1. `<script>` and `<style>` blocks are removed entirely.
21
+ * 2. Block-level elements (`<p>`, `<div>`, `<h1>`–`<h6>`, etc.) are replaced
22
+ * with newline characters to preserve paragraph structure.
23
+ * 3. All remaining HTML tags are stripped.
24
+ * 4. A common subset of HTML entities is decoded.
25
+ * 5. Excessive whitespace is collapsed.
26
+ *
27
+ * ### Metadata
28
+ * - `title` — extracted from the `<title>` element when present.
29
+ * - `wordCount` — approximate count of words in the extracted text.
30
+ * - `source` — absolute file path (when loaded from disk).
31
+ *
32
+ * @implements {IDocumentLoader}
33
+ *
34
+ * @example
35
+ * ```ts
36
+ * const loader = new HtmlLoader();
37
+ * const doc = await loader.load('/public/index.html');
38
+ * console.log(doc.metadata.title); // e.g. 'Welcome to AgentOS'
39
+ * ```
40
+ */
41
+ export declare class HtmlLoader implements IDocumentLoader {
42
+ /** @inheritdoc */
43
+ readonly supportedExtensions: string[];
44
+ /** @inheritdoc */
45
+ canLoad(source: string | Buffer): boolean;
46
+ /** @inheritdoc */
47
+ load(source: string | Buffer, _options?: LoadOptions): Promise<LoadedDocument>;
48
+ }
49
+ //# sourceMappingURL=HtmlLoader.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"HtmlLoader.d.ts","sourceRoot":"","sources":["../../../src/memory/ingestion/HtmlLoader.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAIH,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAC;AAC5D,OAAO,KAAK,EAAE,WAAW,EAAE,cAAc,EAAoB,MAAM,oBAAoB,CAAC;AAmIxF;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AACH,qBAAa,UAAW,YAAW,eAAe;IAChD,kBAAkB;IAClB,QAAQ,CAAC,mBAAmB,EAAE,MAAM,EAAE,CAA6B;IAMnE,kBAAkB;IAClB,OAAO,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,GAAG,OAAO;IAWzC,kBAAkB;IACZ,IAAI,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,EAAE,QAAQ,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,cAAc,CAAC;CAiCrF"}
@@ -0,0 +1,202 @@
1
+ /**
2
+ * @fileoverview HtmlLoader — basic HTML-to-text document loader.
3
+ *
4
+ * Converts `.html` and `.htm` files into plain text using lightweight regex
5
+ * transformations. This is intentionally a *simple* loader — it covers the
6
+ * common case of stripping tag soup and decoding standard HTML entities. For
7
+ * complex documents (nested frames, JavaScript-rendered content) a headless
8
+ * browser or DOM-parsing library would be more appropriate.
9
+ *
10
+ * Supported extensions: `.html`, `.htm`
11
+ *
12
+ * @module memory/ingestion/HtmlLoader
13
+ */
14
+ import fs from 'node:fs/promises';
15
+ import path from 'node:path';
16
+ // ---------------------------------------------------------------------------
17
+ // Constants
18
+ // ---------------------------------------------------------------------------
19
+ /** Extensions handled by this loader, each with a leading dot. */
20
+ const SUPPORTED_EXTENSIONS = ['.html', '.htm'];
21
+ // ---------------------------------------------------------------------------
22
+ // Regex helpers
23
+ // ---------------------------------------------------------------------------
24
+ /**
25
+ * Strip all HTML/XML tags from a string.
26
+ *
27
+ * Handles multi-line tags and self-closing tags. Does **not** attempt to
28
+ * parse the HTML into a DOM; purely textual.
29
+ *
30
+ * @param html - Raw HTML string.
31
+ */
32
+ function stripTags(html) {
33
+ // Remove `<script>…</script>` and `<style>…</style>` blocks entirely so
34
+ // their content doesn't leak into the extracted text.
35
+ let text = html
36
+ .replace(/<script\b[^>]*>[\s\S]*?<\/script>/gi, ' ')
37
+ .replace(/<style\b[^>]*>[\s\S]*?<\/style>/gi, ' ');
38
+ // Replace block-level elements with newlines to preserve paragraph breaks.
39
+ text = text.replace(/<\/?(p|div|section|article|header|footer|h[1-6]|li|tr|blockquote)[^>]*>/gi, '\n');
40
+ // Replace `<br>` and `<hr>` with newlines.
41
+ text = text.replace(/<br\s*\/?>/gi, '\n');
42
+ text = text.replace(/<hr\s*\/?>/gi, '\n');
43
+ // Strip remaining tags.
44
+ text = text.replace(/<[^>]+>/g, '');
45
+ return text;
46
+ }
47
+ /**
48
+ * Decode a small but common set of named and numeric HTML entities.
49
+ *
50
+ * Covers the most frequently encountered entities in prose documents.
51
+ * Full entity decoding would require an external library; this subset handles
52
+ * ~95% of real-world cases without dependencies.
53
+ *
54
+ * @param text - Text containing HTML entity references.
55
+ */
56
+ function decodeEntities(text) {
57
+ return text
58
+ // Named character entities (most common subset).
59
+ .replace(/&amp;/gi, '&')
60
+ .replace(/&lt;/gi, '<')
61
+ .replace(/&gt;/gi, '>')
62
+ .replace(/&quot;/gi, '"')
63
+ .replace(/&apos;/gi, "'")
64
+ .replace(/&nbsp;/gi, ' ')
65
+ .replace(/&mdash;/gi, '—')
66
+ .replace(/&ndash;/gi, '–')
67
+ .replace(/&hellip;/gi, '…')
68
+ .replace(/&lsquo;/gi, '\u2018')
69
+ .replace(/&rsquo;/gi, '\u2019')
70
+ .replace(/&ldquo;/gi, '\u201C')
71
+ .replace(/&rdquo;/gi, '\u201D')
72
+ .replace(/&copy;/gi, '©')
73
+ .replace(/&reg;/gi, '®')
74
+ .replace(/&trade;/gi, '™')
75
+ // Numeric decimal entities: &#160; &#8212; etc.
76
+ .replace(/&#(\d+);/g, (_, code) => String.fromCodePoint(parseInt(code, 10)))
77
+ // Numeric hexadecimal entities: &#x00A0; &#x2014; etc.
78
+ .replace(/&#x([0-9a-f]+);/gi, (_, hex) => String.fromCodePoint(parseInt(hex, 16)));
79
+ }
80
+ /**
81
+ * Extract the text content of the first `<title>` element, if present.
82
+ *
83
+ * Returns `undefined` when no `<title>` tag is found.
84
+ *
85
+ * @param html - Raw HTML string.
86
+ */
87
+ function extractTitle(html) {
88
+ const match = /<title[^>]*>([\s\S]*?)<\/title>/i.exec(html);
89
+ if (!match)
90
+ return undefined;
91
+ // Decode entities inside the title and trim surrounding whitespace.
92
+ return decodeEntities(match[1]).trim() || undefined;
93
+ }
94
+ /**
95
+ * Collapse repeated whitespace and blank lines in extracted text.
96
+ *
97
+ * Runs of more than two consecutive newlines are folded to two (paragraph
98
+ * boundary). Runs of horizontal whitespace within a line are collapsed to a
99
+ * single space.
100
+ *
101
+ * @param text - Text with raw whitespace inherited from the HTML source.
102
+ */
103
+ function normaliseWhitespace(text) {
104
+ return text
105
+ .replace(/[ \t]+/g, ' ') // Collapse horizontal runs.
106
+ .replace(/\n{3,}/g, '\n\n') // Collapse blank-line runs.
107
+ .trim();
108
+ }
109
+ /**
110
+ * Approximate word count.
111
+ *
112
+ * @param text - Plain text string.
113
+ */
114
+ function wordCount(text) {
115
+ return text.trim() === '' ? 0 : text.trim().split(/\s+/).length;
116
+ }
117
+ /**
118
+ * Returns the lower-cased extension of a file path.
119
+ *
120
+ * @param filePath - File path string.
121
+ */
122
+ function extOf(filePath) {
123
+ return path.extname(filePath).toLowerCase();
124
+ }
125
+ // ---------------------------------------------------------------------------
126
+ // HtmlLoader
127
+ // ---------------------------------------------------------------------------
128
+ /**
129
+ * Basic document loader for HTML (`.html`, `.htm`) files.
130
+ *
131
+ * ### Text extraction strategy
132
+ * 1. `<script>` and `<style>` blocks are removed entirely.
133
+ * 2. Block-level elements (`<p>`, `<div>`, `<h1>`–`<h6>`, etc.) are replaced
134
+ * with newline characters to preserve paragraph structure.
135
+ * 3. All remaining HTML tags are stripped.
136
+ * 4. A common subset of HTML entities is decoded.
137
+ * 5. Excessive whitespace is collapsed.
138
+ *
139
+ * ### Metadata
140
+ * - `title` — extracted from the `<title>` element when present.
141
+ * - `wordCount` — approximate count of words in the extracted text.
142
+ * - `source` — absolute file path (when loaded from disk).
143
+ *
144
+ * @implements {IDocumentLoader}
145
+ *
146
+ * @example
147
+ * ```ts
148
+ * const loader = new HtmlLoader();
149
+ * const doc = await loader.load('/public/index.html');
150
+ * console.log(doc.metadata.title); // e.g. 'Welcome to AgentOS'
151
+ * ```
152
+ */
153
+ export class HtmlLoader {
154
+ constructor() {
155
+ /** @inheritdoc */
156
+ this.supportedExtensions = [...SUPPORTED_EXTENSIONS];
157
+ }
158
+ // -------------------------------------------------------------------------
159
+ // canLoad
160
+ // -------------------------------------------------------------------------
161
+ /** @inheritdoc */
162
+ canLoad(source) {
163
+ if (Buffer.isBuffer(source)) {
164
+ return false;
165
+ }
166
+ return SUPPORTED_EXTENSIONS.includes(extOf(source));
167
+ }
168
+ // -------------------------------------------------------------------------
169
+ // load
170
+ // -------------------------------------------------------------------------
171
+ /** @inheritdoc */
172
+ async load(source, _options) {
173
+ let html;
174
+ let resolvedPath;
175
+ if (Buffer.isBuffer(source)) {
176
+ html = source.toString('utf8');
177
+ }
178
+ else {
179
+ resolvedPath = source;
180
+ const bytes = await fs.readFile(resolvedPath);
181
+ html = bytes.toString('utf8');
182
+ }
183
+ // ---- Extract title before stripping tags ----
184
+ const title = extractTitle(html);
185
+ // ---- Convert HTML to plain text ----
186
+ const rawText = stripTags(html);
187
+ const decoded = decodeEntities(rawText);
188
+ const content = normaliseWhitespace(decoded);
189
+ // ---- Assemble metadata ----
190
+ const meta = {
191
+ ...(title !== undefined ? { title } : {}),
192
+ wordCount: wordCount(content),
193
+ ...(resolvedPath ? { source: resolvedPath } : {}),
194
+ };
195
+ return {
196
+ content,
197
+ metadata: meta,
198
+ format: 'html',
199
+ };
200
+ }
201
+ }
202
+ //# sourceMappingURL=HtmlLoader.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"HtmlLoader.js","sourceRoot":"","sources":["../../../src/memory/ingestion/HtmlLoader.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAEH,OAAO,EAAE,MAAM,kBAAkB,CAAC;AAClC,OAAO,IAAI,MAAM,WAAW,CAAC;AAI7B,8EAA8E;AAC9E,YAAY;AACZ,8EAA8E;AAE9E,kEAAkE;AAClE,MAAM,oBAAoB,GAAG,CAAC,OAAO,EAAE,MAAM,CAAU,CAAC;AAExD,8EAA8E;AAC9E,gBAAgB;AAChB,8EAA8E;AAE9E;;;;;;;GAOG;AACH,SAAS,SAAS,CAAC,IAAY;IAC7B,wEAAwE;IACxE,sDAAsD;IACtD,IAAI,IAAI,GAAG,IAAI;SACZ,OAAO,CAAC,qCAAqC,EAAE,GAAG,CAAC;SACnD,OAAO,CAAC,mCAAmC,EAAE,GAAG,CAAC,CAAC;IAErD,2EAA2E;IAC3E,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,2EAA2E,EAAE,IAAI,CAAC,CAAC;IAEvG,2CAA2C;IAC3C,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,cAAc,EAAE,IAAI,CAAC,CAAC;IAC1C,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,cAAc,EAAE,IAAI,CAAC,CAAC;IAE1C,wBAAwB;IACxB,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC;IAEpC,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;;;;;;;GAQG;AACH,SAAS,cAAc,CAAC,IAAY;IAClC,OAAO,IAAI;QACT,iDAAiD;SAChD,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;SACvB,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC;SACtB,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC;SACtB,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC;SACxB,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC;SACxB,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC;SACxB,OAAO,CAAC,WAAW,EAAE,GAAG,CAAC;SACzB,OAAO,CAAC,WAAW,EAAE,GAAG,CAAC;SACzB,OAAO,CAAC,YAAY,EAAE,GAAG,CAAC;SAC1B,OAAO,CAAC,WAAW,EAAE,QAAQ,CAAC;SAC9B,OAAO,CAAC,WAAW,EAAE,QAAQ,CAAC;SAC9B,OAAO,CAAC,WAAW,EAAE,QAAQ,CAAC;SAC9B,OAAO,CAAC,WAAW,EAAE,QAAQ,CAAC;SAC9B,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC;SACxB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;SACvB,OAAO,CAAC,WAAW,EAAE,GAAG,CAAC;QAC1B,gDAAgD;SAC/C,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC,EAAE,IAAY,EAAE,EAAE,CACxC,MAAM,CAAC,aAAa,CAAC,QAAQ,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,CACzC;QACD,uDAAuD;SACtD,OAAO,CAAC,mBAAmB,EAAE,CAAC,CAAC,EAAE,GAAW,EAAE,EAAE,CAC/C,MAAM,CAAC,aAAa,CAAC,QAAQ,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,CACxC,CAAC;AACN,CAAC;AAED;;;;;;GAMG;AACH,SAAS,YAAY,CAAC,IAAY;IAChC,MAAM,KAAK,GAAG,kCAAkC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC5D,IAAI,CAAC,KAAK;QAAE,OAAO,SAAS,CAAC;IAC7B,oEAAoE;IACpE,OAAO,cAAc,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,IAAI,SAAS,CAAC;AACtD,CAAC;AAED;;;;;;;;GAQG;AACH,SAAS,mBAAmB,CAAC,IAAY;IACvC,OAAO,IAAI;SACR,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC,CAAU,4BAA4B;SAC7D,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC,CAAO,4BAA4B;SAC7D,IAAI,EAAE,CAAC;AACZ,CAAC;AAED;;;;GAIG;AACH,SAAS,SAAS,CAAC,IAAY;IAC7B,OAAO,IAAI,CAAC,IAAI,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC;AAClE,CAAC;AAED;;;;GAIG;AACH,SAAS,KAAK,CAAC,QAAgB;IAC7B,OAAO,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;AAC9C,CAAC;AAED,8EAA8E;AAC9E,aAAa;AACb,8EAA8E;AAE9E;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AACH,MAAM,OAAO,UAAU;IAAvB;QACE,kBAAkB;QACT,wBAAmB,GAAa,CAAC,GAAG,oBAAoB,CAAC,CAAC;IAoDrE,CAAC;IAlDC,4EAA4E;IAC5E,UAAU;IACV,4EAA4E;IAE5E,kBAAkB;IAClB,OAAO,CAAC,MAAuB;QAC7B,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YAC5B,OAAO,KAAK,CAAC;QACf,CAAC;QACD,OAAQ,oBAA0C,CAAC,QAAQ,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC;IAC7E,CAAC;IAED,4EAA4E;IAC5E,OAAO;IACP,4EAA4E;IAE5E,kBAAkB;IAClB,KAAK,CAAC,IAAI,CAAC,MAAuB,EAAE,QAAsB;QACxD,IAAI,IAAY,CAAC;QACjB,IAAI,YAAgC,CAAC;QAErC,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YAC5B,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;QACjC,CAAC;aAAM,CAAC;YACN,YAAY,GAAG,MAAM,CAAC;YACtB,MAAM,KAAK,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAC;YAC9C,IAAI,GAAG,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;QAChC,CAAC;QAED,gDAAgD;QAChD,MAAM,KAAK,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC;QAEjC,uCAAuC;QACvC,MAAM,OAAO,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;QAChC,MAAM,OAAO,GAAG,cAAc,CAAC,OAAO,CAAC,CAAC;QACxC,MAAM,OAAO,GAAG,mBAAmB,CAAC,OAAO,CAAC,CAAC;QAE7C,8BAA8B;QAC9B,MAAM,IAAI,GAAqB;YAC7B,GAAG,CAAC,KAAK,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YACzC,SAAS,EAAE,SAAS,CAAC,OAAO,CAAC;YAC7B,GAAG,CAAC,YAAY,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,YAAY,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;SAClD,CAAC;QAEF,OAAO;YACL,OAAO;YACP,QAAQ,EAAE,IAAI;YACd,MAAM,EAAE,MAAM;SACf,CAAC;IACJ,CAAC;CACF"}
@@ -0,0 +1,63 @@
1
+ /**
2
+ * @fileoverview IDocumentLoader — contract for all document loading strategies.
3
+ *
4
+ * Every loader targets one or more file extensions and exposes a uniform
5
+ * `load()` primitive that normalises diverse document formats into a single
6
+ * {@link LoadedDocument} shape ready for chunking and indexing.
7
+ *
8
+ * @module memory/ingestion/IDocumentLoader
9
+ */
10
+ import type { LoadOptions, LoadedDocument } from '../facade/types.js';
11
+ /**
12
+ * Unified interface for all document loaders in the AgentOS ingestion
13
+ * pipeline.
14
+ *
15
+ * Implementations handle a specific set of file extensions and are
16
+ * responsible for:
17
+ * 1. Reading raw bytes from a file path or in-memory `Buffer`.
18
+ * 2. Extracting plain-text `content` and structured `metadata`.
19
+ * 3. Returning a {@link LoadedDocument} ready for downstream chunking.
20
+ *
21
+ * @example
22
+ * ```ts
23
+ * const loader: IDocumentLoader = new MarkdownLoader();
24
+ * if (loader.canLoad('README.md')) {
25
+ * const doc = await loader.load('README.md');
26
+ * console.log(doc.metadata.title);
27
+ * }
28
+ * ```
29
+ */
30
+ export interface IDocumentLoader {
31
+ /**
32
+ * File extensions this loader handles, each with a leading dot.
33
+ *
34
+ * Used by {@link LoaderRegistry} to route file paths to the correct loader.
35
+ *
36
+ * @example ['.md', '.mdx']
37
+ */
38
+ readonly supportedExtensions: string[];
39
+ /**
40
+ * Parses `source` and returns a normalised {@link LoadedDocument}.
41
+ *
42
+ * When `source` is a `string` the loader treats it as an absolute (or
43
+ * resolvable) file path and reads the file from disk. When `source` is a
44
+ * `Buffer` the loader parses the bytes directly and derives as much
45
+ * metadata as possible from the buffer content alone.
46
+ *
47
+ * @param source - Absolute file path OR raw document bytes.
48
+ * @param options - Optional hints such as a format override.
49
+ * @returns A promise resolving to the fully-populated {@link LoadedDocument}.
50
+ * @throws When the file cannot be read or the format is not parsable.
51
+ */
52
+ load(source: string | Buffer, options?: LoadOptions): Promise<LoadedDocument>;
53
+ /**
54
+ * Returns `true` when this loader is capable of handling `source`.
55
+ *
56
+ * For string sources the check is purely extension-based. For `Buffer`
57
+ * sources the loader may inspect magic bytes when relevant.
58
+ *
59
+ * @param source - Absolute file path or raw bytes.
60
+ */
61
+ canLoad(source: string | Buffer): boolean;
62
+ }
63
+ //# sourceMappingURL=IDocumentLoader.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"IDocumentLoader.d.ts","sourceRoot":"","sources":["../../../src/memory/ingestion/IDocumentLoader.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,KAAK,EAAE,WAAW,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AAEtE;;;;;;;;;;;;;;;;;;GAkBG;AACH,MAAM,WAAW,eAAe;IAC9B;;;;;;OAMG;IACH,QAAQ,CAAC,mBAAmB,EAAE,MAAM,EAAE,CAAC;IAEvC;;;;;;;;;;;;OAYG;IACH,IAAI,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,EAAE,OAAO,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,cAAc,CAAC,CAAC;IAE9E;;;;;;;OAOG;IACH,OAAO,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,GAAG,OAAO,CAAC;CAC3C"}
@@ -0,0 +1,11 @@
1
+ /**
2
+ * @fileoverview IDocumentLoader — contract for all document loading strategies.
3
+ *
4
+ * Every loader targets one or more file extensions and exposes a uniform
5
+ * `load()` primitive that normalises diverse document formats into a single
6
+ * {@link LoadedDocument} shape ready for chunking and indexing.
7
+ *
8
+ * @module memory/ingestion/IDocumentLoader
9
+ */
10
+ export {};
11
+ //# sourceMappingURL=IDocumentLoader.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"IDocumentLoader.js","sourceRoot":"","sources":["../../../src/memory/ingestion/IDocumentLoader.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG"}
@@ -0,0 +1,140 @@
1
+ /**
2
+ * @fileoverview LoaderRegistry — extension-to-loader routing table.
3
+ *
4
+ * The registry maintains a map of file extensions to {@link IDocumentLoader}
5
+ * implementations and provides a convenience `loadFile()` method that
6
+ * auto-detects the format from a file path before delegating to the
7
+ * appropriate loader.
8
+ *
9
+ * On construction the registry pre-registers five built-in loaders:
10
+ * {@link TextLoader}, {@link MarkdownLoader}, {@link HtmlLoader},
11
+ * {@link PdfLoader}, and {@link DocxLoader}. In addition, the optional
12
+ * {@link OcrPdfLoader} and {@link DoclingLoader} are registered when their
13
+ * respective factories return non-null values (i.e. when `tesseract.js` and
14
+ * `python3 -m docling` are available in the environment).
15
+ *
16
+ * Additional loaders can be added at runtime via {@link LoaderRegistry.register}.
17
+ *
18
+ * @module memory/ingestion/LoaderRegistry
19
+ */
20
+ import type { IDocumentLoader } from './IDocumentLoader.js';
21
+ import type { LoadOptions, LoadedDocument } from '../facade/types.js';
22
+ /**
23
+ * Central registry mapping file extensions to {@link IDocumentLoader}
24
+ * implementations.
25
+ *
26
+ * ### Built-in loaders (registered automatically)
27
+ * | Extensions | Loader |
28
+ * |----------------------------------------------------|-----------------------|
29
+ * | `.txt`, `.csv`, `.tsv`, `.json`, `.yaml`, `.yml` | {@link TextLoader} |
30
+ * | `.md`, `.mdx` | {@link MarkdownLoader} |
31
+ * | `.html`, `.htm` | {@link HtmlLoader} |
32
+ * | `.pdf` | {@link PdfLoader} |
33
+ * | `.docx` | {@link DocxLoader} |
34
+ *
35
+ * ### Conditional loaders (registered when available)
36
+ * | Condition | Loader |
37
+ * |-------------------------------|-------------------------------------|
38
+ * | `tesseract.js` installed | {@link OcrPdfLoader} (overrides PDF) |
39
+ * | `python3 -m docling` available | {@link DoclingLoader} (overrides PDF + DOCX) |
40
+ *
41
+ * ### Registering a custom loader
42
+ * ```ts
43
+ * const registry = new LoaderRegistry();
44
+ * registry.register(new PdfLoader());
45
+ * const doc = await registry.loadFile('/reports/q3.pdf');
46
+ * ```
47
+ *
48
+ * ### Using loadFile
49
+ * ```ts
50
+ * const registry = new LoaderRegistry();
51
+ * const doc = await registry.loadFile('/notes/meeting.md');
52
+ * console.log(doc.metadata.title);
53
+ * ```
54
+ */
55
+ export declare class LoaderRegistry {
56
+ /**
57
+ * Internal map from lower-cased extension (with dot) to the loader
58
+ * responsible for that extension.
59
+ *
60
+ * When multiple loaders claim the same extension the last one registered
61
+ * wins (newest-registration-wins semantics), allowing callers to override
62
+ * built-in loaders.
63
+ */
64
+ private readonly _loaders;
65
+ /**
66
+ * Creates a new registry pre-populated with the built-in loaders.
67
+ *
68
+ * Loader registration order determines conflict resolution: later
69
+ * registrations override earlier ones for the same extension.
70
+ *
71
+ * Registration order:
72
+ * 1. {@link TextLoader}, {@link MarkdownLoader}, {@link HtmlLoader} — core text formats.
73
+ * 2. {@link PdfLoader} (with injected OCR + Docling loaders) — PDF extraction.
74
+ * 3. {@link DocxLoader} — DOCX extraction.
75
+ * 4. Optional: an {@link OcrPdfLoader} override when `tesseract.js` is installed.
76
+ * 5. Optional: a {@link DoclingLoader} override when Python Docling is available.
77
+ * DoclingLoader supports both `.pdf` and `.docx`, so it supersedes both
78
+ * PdfLoader and DocxLoader when present.
79
+ */
80
+ constructor();
81
+ /**
82
+ * Register a loader for all extensions it declares.
83
+ *
84
+ * If a previously registered loader already handles one of the extension,
85
+ * it is replaced. This makes it trivial to swap in a higher-fidelity
86
+ * implementation for any format.
87
+ *
88
+ * @param loader - The loader instance to register.
89
+ *
90
+ * @example
91
+ * ```ts
92
+ * registry.register(new PdfLoader());
93
+ * ```
94
+ */
95
+ register(loader: IDocumentLoader): void;
96
+ /**
97
+ * Retrieve the loader registered for `extensionOrPath`.
98
+ *
99
+ * Both bare extensions (`.md`, `md`) and full file paths
100
+ * (`/docs/guide.md`) are accepted.
101
+ *
102
+ * @param extensionOrPath - File extension or full path.
103
+ * @returns The matching {@link IDocumentLoader}, or `undefined` when no
104
+ * loader is registered for the detected extension.
105
+ *
106
+ * @example
107
+ * ```ts
108
+ * const loader = registry.getLoader('.md');
109
+ * const loader2 = registry.getLoader('README.md');
110
+ * ```
111
+ */
112
+ getLoader(extensionOrPath: string): IDocumentLoader | undefined;
113
+ /**
114
+ * Return a sorted array of all extensions currently registered.
115
+ *
116
+ * Each extension is returned with a leading dot in lower-case, e.g.
117
+ * `['.csv', '.htm', '.html', '.json', '.md', …]`.
118
+ *
119
+ * @returns Sorted array of registered extension strings.
120
+ */
121
+ getSupportedExtensions(): string[];
122
+ /**
123
+ * Convenience method: detect format from `filePath`, find the matching
124
+ * loader, and delegate to its `load()` method.
125
+ *
126
+ * @param filePath - Absolute (or resolvable relative) file path.
127
+ * @param options - Optional load hints forwarded to the loader.
128
+ * @returns A promise resolving to the {@link LoadedDocument}.
129
+ *
130
+ * @throws {Error} When no loader is registered for the file's extension.
131
+ * @throws {Error} When the underlying loader's `load()` throws.
132
+ *
133
+ * @example
134
+ * ```ts
135
+ * const doc = await registry.loadFile('/notes/architecture.md');
136
+ * ```
137
+ */
138
+ loadFile(filePath: string, options?: LoadOptions): Promise<LoadedDocument>;
139
+ }
140
+ //# sourceMappingURL=LoaderRegistry.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"LoaderRegistry.d.ts","sourceRoot":"","sources":["../../../src/memory/ingestion/LoaderRegistry.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAGH,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAC;AAC5D,OAAO,KAAK,EAAE,WAAW,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AAkDtE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAgCG;AACH,qBAAa,cAAc;IACzB;;;;;;;OAOG;IACH,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAA2C;IAEpE;;;;;;;;;;;;;;OAcG;;IA6BH;;;;;;;;;;;;;OAaG;IACH,QAAQ,CAAC,MAAM,EAAE,eAAe,GAAG,IAAI;IAUvC;;;;;;;;;;;;;;;OAeG;IACH,SAAS,CAAC,eAAe,EAAE,MAAM,GAAG,eAAe,GAAG,SAAS;IAS/D;;;;;;;OAOG;IACH,sBAAsB,IAAI,MAAM,EAAE;IAQlC;;;;;;;;;;;;;;;OAeG;IACG,QAAQ,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,cAAc,CAAC;CAajF"}