@framers/agentos 0.1.101 → 0.1.102

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (176) hide show
  1. package/README.md +16 -0
  2. package/dist/memory/config.d.ts +39 -0
  3. package/dist/memory/config.d.ts.map +1 -1
  4. package/dist/memory/config.js.map +1 -1
  5. package/dist/memory/consolidation/ConsolidationLoop.d.ts +177 -0
  6. package/dist/memory/consolidation/ConsolidationLoop.d.ts.map +1 -0
  7. package/dist/memory/consolidation/ConsolidationLoop.js +517 -0
  8. package/dist/memory/consolidation/ConsolidationLoop.js.map +1 -0
  9. package/dist/memory/consolidation/ConsolidationPipeline.d.ts.map +1 -1
  10. package/dist/memory/consolidation/ConsolidationPipeline.js +7 -0
  11. package/dist/memory/consolidation/ConsolidationPipeline.js.map +1 -1
  12. package/dist/memory/consolidation/index.d.ts +8 -0
  13. package/dist/memory/consolidation/index.d.ts.map +1 -0
  14. package/dist/memory/consolidation/index.js +7 -0
  15. package/dist/memory/consolidation/index.js.map +1 -0
  16. package/dist/memory/decay/DecayModel.d.ts +33 -0
  17. package/dist/memory/decay/DecayModel.d.ts.map +1 -1
  18. package/dist/memory/decay/DecayModel.js +31 -0
  19. package/dist/memory/decay/DecayModel.js.map +1 -1
  20. package/dist/memory/facade/Memory.d.ts +228 -0
  21. package/dist/memory/facade/Memory.d.ts.map +1 -0
  22. package/dist/memory/facade/Memory.js +823 -0
  23. package/dist/memory/facade/Memory.js.map +1 -0
  24. package/dist/memory/facade/index.d.ts +13 -0
  25. package/dist/memory/facade/index.d.ts.map +1 -0
  26. package/dist/memory/facade/index.js +11 -0
  27. package/dist/memory/facade/index.js.map +1 -0
  28. package/dist/memory/facade/types.d.ts +606 -0
  29. package/dist/memory/facade/types.d.ts.map +1 -0
  30. package/dist/memory/facade/types.js +11 -0
  31. package/dist/memory/facade/types.js.map +1 -0
  32. package/dist/memory/feedback/RetrievalFeedbackSignal.d.ts +132 -0
  33. package/dist/memory/feedback/RetrievalFeedbackSignal.d.ts.map +1 -0
  34. package/dist/memory/feedback/RetrievalFeedbackSignal.js +178 -0
  35. package/dist/memory/feedback/RetrievalFeedbackSignal.js.map +1 -0
  36. package/dist/memory/feedback/index.d.ts +13 -0
  37. package/dist/memory/feedback/index.d.ts.map +1 -0
  38. package/dist/memory/feedback/index.js +12 -0
  39. package/dist/memory/feedback/index.js.map +1 -0
  40. package/dist/memory/index.d.ts +22 -0
  41. package/dist/memory/index.d.ts.map +1 -1
  42. package/dist/memory/index.js +24 -0
  43. package/dist/memory/index.js.map +1 -1
  44. package/dist/memory/ingestion/ChunkingEngine.d.ts +143 -0
  45. package/dist/memory/ingestion/ChunkingEngine.d.ts.map +1 -0
  46. package/dist/memory/ingestion/ChunkingEngine.js +508 -0
  47. package/dist/memory/ingestion/ChunkingEngine.js.map +1 -0
  48. package/dist/memory/ingestion/DoclingLoader.d.ts +44 -0
  49. package/dist/memory/ingestion/DoclingLoader.d.ts.map +1 -0
  50. package/dist/memory/ingestion/DoclingLoader.js +228 -0
  51. package/dist/memory/ingestion/DoclingLoader.js.map +1 -0
  52. package/dist/memory/ingestion/DocxLoader.d.ts +37 -0
  53. package/dist/memory/ingestion/DocxLoader.d.ts.map +1 -0
  54. package/dist/memory/ingestion/DocxLoader.js +111 -0
  55. package/dist/memory/ingestion/DocxLoader.js.map +1 -0
  56. package/dist/memory/ingestion/FolderScanner.d.ts +116 -0
  57. package/dist/memory/ingestion/FolderScanner.d.ts.map +1 -0
  58. package/dist/memory/ingestion/FolderScanner.js +127 -0
  59. package/dist/memory/ingestion/FolderScanner.js.map +1 -0
  60. package/dist/memory/ingestion/HtmlLoader.d.ts +49 -0
  61. package/dist/memory/ingestion/HtmlLoader.d.ts.map +1 -0
  62. package/dist/memory/ingestion/HtmlLoader.js +202 -0
  63. package/dist/memory/ingestion/HtmlLoader.js.map +1 -0
  64. package/dist/memory/ingestion/IDocumentLoader.d.ts +63 -0
  65. package/dist/memory/ingestion/IDocumentLoader.d.ts.map +1 -0
  66. package/dist/memory/ingestion/IDocumentLoader.js +11 -0
  67. package/dist/memory/ingestion/IDocumentLoader.js.map +1 -0
  68. package/dist/memory/ingestion/LoaderRegistry.d.ts +140 -0
  69. package/dist/memory/ingestion/LoaderRegistry.d.ts.map +1 -0
  70. package/dist/memory/ingestion/LoaderRegistry.js +229 -0
  71. package/dist/memory/ingestion/LoaderRegistry.js.map +1 -0
  72. package/dist/memory/ingestion/MarkdownLoader.d.ts +50 -0
  73. package/dist/memory/ingestion/MarkdownLoader.d.ts.map +1 -0
  74. package/dist/memory/ingestion/MarkdownLoader.js +169 -0
  75. package/dist/memory/ingestion/MarkdownLoader.js.map +1 -0
  76. package/dist/memory/ingestion/MultimodalAggregator.d.ts +88 -0
  77. package/dist/memory/ingestion/MultimodalAggregator.d.ts.map +1 -0
  78. package/dist/memory/ingestion/MultimodalAggregator.js +96 -0
  79. package/dist/memory/ingestion/MultimodalAggregator.js.map +1 -0
  80. package/dist/memory/ingestion/OcrPdfLoader.d.ts +41 -0
  81. package/dist/memory/ingestion/OcrPdfLoader.d.ts.map +1 -0
  82. package/dist/memory/ingestion/OcrPdfLoader.js +149 -0
  83. package/dist/memory/ingestion/OcrPdfLoader.js.map +1 -0
  84. package/dist/memory/ingestion/PdfLoader.d.ts +78 -0
  85. package/dist/memory/ingestion/PdfLoader.d.ts.map +1 -0
  86. package/dist/memory/ingestion/PdfLoader.js +179 -0
  87. package/dist/memory/ingestion/PdfLoader.js.map +1 -0
  88. package/dist/memory/ingestion/TextLoader.d.ts +66 -0
  89. package/dist/memory/ingestion/TextLoader.d.ts.map +1 -0
  90. package/dist/memory/ingestion/TextLoader.js +207 -0
  91. package/dist/memory/ingestion/TextLoader.js.map +1 -0
  92. package/dist/memory/ingestion/UrlLoader.d.ts +95 -0
  93. package/dist/memory/ingestion/UrlLoader.d.ts.map +1 -0
  94. package/dist/memory/ingestion/UrlLoader.js +174 -0
  95. package/dist/memory/ingestion/UrlLoader.js.map +1 -0
  96. package/dist/memory/io/ChatGptImporter.d.ts +85 -0
  97. package/dist/memory/io/ChatGptImporter.d.ts.map +1 -0
  98. package/dist/memory/io/ChatGptImporter.js +231 -0
  99. package/dist/memory/io/ChatGptImporter.js.map +1 -0
  100. package/dist/memory/io/JsonExporter.d.ts +67 -0
  101. package/dist/memory/io/JsonExporter.d.ts.map +1 -0
  102. package/dist/memory/io/JsonExporter.js +132 -0
  103. package/dist/memory/io/JsonExporter.js.map +1 -0
  104. package/dist/memory/io/JsonImporter.d.ts +84 -0
  105. package/dist/memory/io/JsonImporter.d.ts.map +1 -0
  106. package/dist/memory/io/JsonImporter.js +234 -0
  107. package/dist/memory/io/JsonImporter.js.map +1 -0
  108. package/dist/memory/io/MarkdownExporter.d.ts +95 -0
  109. package/dist/memory/io/MarkdownExporter.d.ts.map +1 -0
  110. package/dist/memory/io/MarkdownExporter.js +130 -0
  111. package/dist/memory/io/MarkdownExporter.js.map +1 -0
  112. package/dist/memory/io/MarkdownImporter.d.ts +84 -0
  113. package/dist/memory/io/MarkdownImporter.d.ts.map +1 -0
  114. package/dist/memory/io/MarkdownImporter.js +166 -0
  115. package/dist/memory/io/MarkdownImporter.js.map +1 -0
  116. package/dist/memory/io/ObsidianExporter.d.ts +80 -0
  117. package/dist/memory/io/ObsidianExporter.d.ts.map +1 -0
  118. package/dist/memory/io/ObsidianExporter.js +127 -0
  119. package/dist/memory/io/ObsidianExporter.js.map +1 -0
  120. package/dist/memory/io/ObsidianImporter.d.ts +93 -0
  121. package/dist/memory/io/ObsidianImporter.d.ts.map +1 -0
  122. package/dist/memory/io/ObsidianImporter.js +221 -0
  123. package/dist/memory/io/ObsidianImporter.js.map +1 -0
  124. package/dist/memory/io/SqliteExporter.d.ts +47 -0
  125. package/dist/memory/io/SqliteExporter.d.ts.map +1 -0
  126. package/dist/memory/io/SqliteExporter.js +56 -0
  127. package/dist/memory/io/SqliteExporter.js.map +1 -0
  128. package/dist/memory/io/SqliteImporter.d.ts +82 -0
  129. package/dist/memory/io/SqliteImporter.d.ts.map +1 -0
  130. package/dist/memory/io/SqliteImporter.js +232 -0
  131. package/dist/memory/io/SqliteImporter.js.map +1 -0
  132. package/dist/memory/io/index.d.ts +31 -0
  133. package/dist/memory/io/index.d.ts.map +1 -0
  134. package/dist/memory/io/index.js +31 -0
  135. package/dist/memory/io/index.js.map +1 -0
  136. package/dist/memory/store/SqliteBrain.d.ts +125 -0
  137. package/dist/memory/store/SqliteBrain.d.ts.map +1 -0
  138. package/dist/memory/store/SqliteBrain.js +407 -0
  139. package/dist/memory/store/SqliteBrain.js.map +1 -0
  140. package/dist/memory/store/SqliteKnowledgeGraph.d.ts +259 -0
  141. package/dist/memory/store/SqliteKnowledgeGraph.d.ts.map +1 -0
  142. package/dist/memory/store/SqliteKnowledgeGraph.js +1062 -0
  143. package/dist/memory/store/SqliteKnowledgeGraph.js.map +1 -0
  144. package/dist/memory/store/SqliteMemoryGraph.d.ts +251 -0
  145. package/dist/memory/store/SqliteMemoryGraph.d.ts.map +1 -0
  146. package/dist/memory/store/SqliteMemoryGraph.js +637 -0
  147. package/dist/memory/store/SqliteMemoryGraph.js.map +1 -0
  148. package/dist/memory/tools/MemoryAddTool.d.ts +98 -0
  149. package/dist/memory/tools/MemoryAddTool.d.ts.map +1 -0
  150. package/dist/memory/tools/MemoryAddTool.js +131 -0
  151. package/dist/memory/tools/MemoryAddTool.js.map +1 -0
  152. package/dist/memory/tools/MemoryDeleteTool.d.ts +83 -0
  153. package/dist/memory/tools/MemoryDeleteTool.d.ts.map +1 -0
  154. package/dist/memory/tools/MemoryDeleteTool.js +96 -0
  155. package/dist/memory/tools/MemoryDeleteTool.js.map +1 -0
  156. package/dist/memory/tools/MemoryMergeTool.d.ts +95 -0
  157. package/dist/memory/tools/MemoryMergeTool.d.ts.map +1 -0
  158. package/dist/memory/tools/MemoryMergeTool.js +164 -0
  159. package/dist/memory/tools/MemoryMergeTool.js.map +1 -0
  160. package/dist/memory/tools/MemoryReflectTool.d.ts +86 -0
  161. package/dist/memory/tools/MemoryReflectTool.d.ts.map +1 -0
  162. package/dist/memory/tools/MemoryReflectTool.js +102 -0
  163. package/dist/memory/tools/MemoryReflectTool.js.map +1 -0
  164. package/dist/memory/tools/MemorySearchTool.d.ts +117 -0
  165. package/dist/memory/tools/MemorySearchTool.d.ts.map +1 -0
  166. package/dist/memory/tools/MemorySearchTool.js +162 -0
  167. package/dist/memory/tools/MemorySearchTool.js.map +1 -0
  168. package/dist/memory/tools/MemoryUpdateTool.d.ts +92 -0
  169. package/dist/memory/tools/MemoryUpdateTool.d.ts.map +1 -0
  170. package/dist/memory/tools/MemoryUpdateTool.js +125 -0
  171. package/dist/memory/tools/MemoryUpdateTool.js.map +1 -0
  172. package/dist/memory/tools/index.d.ts +32 -0
  173. package/dist/memory/tools/index.d.ts.map +1 -0
  174. package/dist/memory/tools/index.js +26 -0
  175. package/dist/memory/tools/index.js.map +1 -0
  176. package/package.json +6 -1
@@ -0,0 +1,41 @@
1
+ /**
2
+ * @fileoverview OcrPdfLoader — optional OCR-based PDF loader using Tesseract.js.
3
+ *
4
+ * This module provides a factory function {@link createOcrPdfLoader} that
5
+ * attempts to lazy-load `tesseract.js` at runtime. When the package is not
6
+ * installed the factory returns `null` gracefully so callers can treat OCR as
7
+ * fully opt-in without any hard dependency.
8
+ *
9
+ * ### Opting in
10
+ * ```sh
11
+ * pnpm add tesseract.js
12
+ * ```
13
+ *
14
+ * Once installed, pass the result of {@link createOcrPdfLoader} to
15
+ * {@link PdfLoader}'s constructor as the `ocrLoader` argument.
16
+ *
17
+ * @module memory/ingestion/OcrPdfLoader
18
+ */
19
+ import type { IDocumentLoader } from './IDocumentLoader.js';
20
+ /**
21
+ * Checks whether `tesseract.js` is available in the current environment and,
22
+ * if so, returns a new {@link OcrPdfLoader} instance; otherwise returns `null`.
23
+ *
24
+ * The check is performed by attempting to resolve the package path using
25
+ * Node's `createRequire`. This avoids a full async dynamic import at call
26
+ * time while still being accurate.
27
+ *
28
+ * ### Usage
29
+ * ```ts
30
+ * import { createOcrPdfLoader } from './OcrPdfLoader.js';
31
+ * import { PdfLoader } from './PdfLoader.js';
32
+ *
33
+ * const ocrLoader = createOcrPdfLoader();
34
+ * const loader = new PdfLoader(ocrLoader);
35
+ * ```
36
+ *
37
+ * @returns An `OcrPdfLoader` instance when tesseract.js is installed, or
38
+ * `null` when it is not.
39
+ */
40
+ export declare function createOcrPdfLoader(): IDocumentLoader | null;
41
+ //# sourceMappingURL=OcrPdfLoader.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"OcrPdfLoader.d.ts","sourceRoot":"","sources":["../../../src/memory/ingestion/OcrPdfLoader.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAKH,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAC;AAgH5D;;;;;;;;;;;;;;;;;;;GAmBG;AACH,wBAAgB,kBAAkB,IAAI,eAAe,GAAG,IAAI,CAY3D"}
@@ -0,0 +1,149 @@
1
+ /**
2
+ * @fileoverview OcrPdfLoader — optional OCR-based PDF loader using Tesseract.js.
3
+ *
4
+ * This module provides a factory function {@link createOcrPdfLoader} that
5
+ * attempts to lazy-load `tesseract.js` at runtime. When the package is not
6
+ * installed the factory returns `null` gracefully so callers can treat OCR as
7
+ * fully opt-in without any hard dependency.
8
+ *
9
+ * ### Opting in
10
+ * ```sh
11
+ * pnpm add tesseract.js
12
+ * ```
13
+ *
14
+ * Once installed, pass the result of {@link createOcrPdfLoader} to
15
+ * {@link PdfLoader}'s constructor as the `ocrLoader` argument.
16
+ *
17
+ * @module memory/ingestion/OcrPdfLoader
18
+ */
19
+ import path from 'node:path';
20
+ import fs from 'node:fs/promises';
21
+ import { createRequire } from 'node:module';
22
+ // ---------------------------------------------------------------------------
23
+ // Constants
24
+ // ---------------------------------------------------------------------------
25
+ /** Extensions this loader handles. */
26
+ const SUPPORTED_EXTENSIONS = ['.pdf'];
27
+ // ---------------------------------------------------------------------------
28
+ // Helpers
29
+ // ---------------------------------------------------------------------------
30
+ /**
31
+ * Returns the lower-cased extension (with dot) of a file path.
32
+ *
33
+ * @param filePath - Absolute or relative file path.
34
+ */
35
+ function extOf(filePath) {
36
+ return path.extname(filePath).toLowerCase();
37
+ }
38
+ // ---------------------------------------------------------------------------
39
+ // OcrPdfLoader (internal class)
40
+ // ---------------------------------------------------------------------------
41
+ /**
42
+ * Internal loader implementation. Consumers should use {@link createOcrPdfLoader}
43
+ * rather than instantiating this class directly, as the factory performs the
44
+ * availability check and returns `null` when tesseract.js is absent.
45
+ *
46
+ * @implements {IDocumentLoader}
47
+ */
48
+ class OcrPdfLoader {
49
+ constructor() {
50
+ /** @inheritdoc */
51
+ this.supportedExtensions = [...SUPPORTED_EXTENSIONS];
52
+ }
53
+ // -------------------------------------------------------------------------
54
+ // canLoad
55
+ // -------------------------------------------------------------------------
56
+ /** @inheritdoc */
57
+ canLoad(source) {
58
+ if (Buffer.isBuffer(source)) {
59
+ // Detect PDF magic bytes: %PDF- at offset 0.
60
+ return source.length >= 4 &&
61
+ source[0] === 0x25 && source[1] === 0x50 &&
62
+ source[2] === 0x44 && source[3] === 0x46;
63
+ }
64
+ return SUPPORTED_EXTENSIONS.includes(extOf(source));
65
+ }
66
+ // -------------------------------------------------------------------------
67
+ // load
68
+ // -------------------------------------------------------------------------
69
+ /** @inheritdoc */
70
+ async load(source, _options) {
71
+ let buffer;
72
+ let resolvedPath;
73
+ if (Buffer.isBuffer(source)) {
74
+ buffer = source;
75
+ }
76
+ else {
77
+ resolvedPath = source;
78
+ buffer = await fs.readFile(resolvedPath);
79
+ }
80
+ // Attempt to dynamically import tesseract.js. The import is wrapped in
81
+ // try/catch so a missing package yields a clear message rather than a
82
+ // cryptic MODULE_NOT_FOUND stack trace. We use `any` here so that the
83
+ // optional peer dependency does not require installed type declarations.
84
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
85
+ let Tesseract;
86
+ try {
87
+ // eslint-disable-next-line @typescript-eslint/ban-ts-comment
88
+ // @ts-expect-error — optional peer dependency; types not guaranteed to be installed
89
+ Tesseract = await import('tesseract.js');
90
+ }
91
+ catch {
92
+ throw new Error('OcrPdfLoader: tesseract.js is not installed. ' +
93
+ 'Run `pnpm add tesseract.js` (or the equivalent for your package manager) ' +
94
+ 'to enable OCR-based PDF extraction.');
95
+ }
96
+ // Perform OCR on the raw PDF bytes.
97
+ // Tesseract.js accepts a Buffer, base64 string, Blob, or image URL.
98
+ // For PDF buffers we pass the raw bytes and let Tesseract handle decoding.
99
+ const { data: { text }, } = await Tesseract.recognize(buffer, 'eng', {
100
+ logger: () => { },
101
+ });
102
+ const meta = {
103
+ ...(resolvedPath ? { source: resolvedPath } : {}),
104
+ };
105
+ return {
106
+ content: text,
107
+ metadata: meta,
108
+ format: 'pdf',
109
+ };
110
+ }
111
+ }
112
+ // ---------------------------------------------------------------------------
113
+ // Factory
114
+ // ---------------------------------------------------------------------------
115
+ /**
116
+ * Checks whether `tesseract.js` is available in the current environment and,
117
+ * if so, returns a new {@link OcrPdfLoader} instance; otherwise returns `null`.
118
+ *
119
+ * The check is performed by attempting to resolve the package path using
120
+ * Node's `createRequire`. This avoids a full async dynamic import at call
121
+ * time while still being accurate.
122
+ *
123
+ * ### Usage
124
+ * ```ts
125
+ * import { createOcrPdfLoader } from './OcrPdfLoader.js';
126
+ * import { PdfLoader } from './PdfLoader.js';
127
+ *
128
+ * const ocrLoader = createOcrPdfLoader();
129
+ * const loader = new PdfLoader(ocrLoader);
130
+ * ```
131
+ *
132
+ * @returns An `OcrPdfLoader` instance when tesseract.js is installed, or
133
+ * `null` when it is not.
134
+ */
135
+ export function createOcrPdfLoader() {
136
+ try {
137
+ // createRequire lets us resolve package paths from an ESM context without
138
+ // actually importing the module. If resolve throws, the package is absent.
139
+ const require = createRequire(import.meta.url);
140
+ require.resolve('tesseract.js');
141
+ // Package is resolvable — return a loader instance.
142
+ return new OcrPdfLoader();
143
+ }
144
+ catch {
145
+ // Package not installed — OCR is unavailable.
146
+ return null;
147
+ }
148
+ }
149
+ //# sourceMappingURL=OcrPdfLoader.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"OcrPdfLoader.js","sourceRoot":"","sources":["../../../src/memory/ingestion/OcrPdfLoader.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAEH,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,MAAM,kBAAkB,CAAC;AAClC,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAI5C,8EAA8E;AAC9E,YAAY;AACZ,8EAA8E;AAE9E,sCAAsC;AACtC,MAAM,oBAAoB,GAAG,CAAC,MAAM,CAAU,CAAC;AAE/C,8EAA8E;AAC9E,UAAU;AACV,8EAA8E;AAE9E;;;;GAIG;AACH,SAAS,KAAK,CAAC,QAAgB;IAC7B,OAAO,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;AAC9C,CAAC;AAED,8EAA8E;AAC9E,gCAAgC;AAChC,8EAA8E;AAE9E;;;;;;GAMG;AACH,MAAM,YAAY;IAAlB;QACE,kBAAkB;QACT,wBAAmB,GAAa,CAAC,GAAG,oBAAoB,CAAC,CAAC;IAsErE,CAAC;IApEC,4EAA4E;IAC5E,UAAU;IACV,4EAA4E;IAE5E,kBAAkB;IAClB,OAAO,CAAC,MAAuB;QAC7B,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YAC5B,6CAA6C;YAC7C,OAAO,MAAM,CAAC,MAAM,IAAI,CAAC;gBACvB,MAAM,CAAC,CAAC,CAAC,KAAK,IAAI,IAAI,MAAM,CAAC,CAAC,CAAC,KAAK,IAAI;gBACxC,MAAM,CAAC,CAAC,CAAC,KAAK,IAAI,IAAI,MAAM,CAAC,CAAC,CAAC,KAAK,IAAI,CAAC;QAC7C,CAAC;QACD,OAAQ,oBAA0C,CAAC,QAAQ,CAAC,KAAK,CAAC,MAAM,CAAW,CAAC,CAAC;IACvF,CAAC;IAED,4EAA4E;IAC5E,OAAO;IACP,4EAA4E;IAE5E,kBAAkB;IAClB,KAAK,CAAC,IAAI,CAAC,MAAuB,EAAE,QAAsB;QACxD,IAAI,MAAc,CAAC;QACnB,IAAI,YAAgC,CAAC;QAErC,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YAC5B,MAAM,GAAG,MAAM,CAAC;QAClB,CAAC;aAAM,CAAC;YACN,YAAY,GAAG,MAAM,CAAC;YACtB,MAAM,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAC;QAC3C,CAAC;QAED,wEAAwE;QACxE,sEAAsE;QACtE,uEAAuE;QACvE,yEAAyE;QACzE,8DAA8D;QAC9D,IAAI,SAAc,CAAC;QACnB,IAAI,CAAC;YACH,6DAA6D;YAC7D,oFAAoF;YACpF,SAAS,GAAG,MAAM,MAAM,CAAC,cAAc,CAAC,CAAC;QAC3C,CAAC;QAAC,MAAM,CAAC;YACP,MAAM,IAAI,KAAK,CACb,gDAAgD;gBAChD,2EAA2E;gBAC3E,qCAAqC,CACtC,CAAC;QACJ,CAAC;QAED,oCAAoC;QACpC,oEAAoE;QACpE,2EAA2E;QAC3E,MAAM,EACJ,IAAI,EAAE,EAAE,IAAI,EAAE,GACf,GAAG,MAAM,SAAS,CAAC,SAAS,CAAC,MAAM,EAAE,KAAK,EAAE;YAC3C,MAAM,EAAE,GAAG,EAAE,GAAkC,CAAC;SACjD,CAAC,CAAC;QAEH,MAAM,IAAI,GAAqB;YAC7B,GAAG,CAAC,YAAY,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,YAAY,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;SAClD,CAAC;QAEF,OAAO;YACL,OAAO,EAAE,IAAI;YACb,QAAQ,EAAE,IAAI;YACd,MAAM,EAAE,KAAK;SACd,CAAC;IACJ,CAAC;CACF;AAED,8EAA8E;AAC9E,UAAU;AACV,8EAA8E;AAE9E;;;;;;;;;;;;;;;;;;;GAmBG;AACH,MAAM,UAAU,kBAAkB;IAChC,IAAI,CAAC;QACH,0EAA0E;QAC1E,4EAA4E;QAC5E,MAAM,OAAO,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC/C,OAAO,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC;QAChC,oDAAoD;QACpD,OAAO,IAAI,YAAY,EAAE,CAAC;IAC5B,CAAC;IAAC,MAAM,CAAC;QACP,8CAA8C;QAC9C,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC"}
@@ -0,0 +1,78 @@
1
+ /**
2
+ * @fileoverview PdfLoader — loads `.pdf` documents using `unpdf`.
3
+ *
4
+ * Implements a tiered extraction strategy:
5
+ * 1. **unpdf** (Tier 1, always available) — pure-JS text extraction via
6
+ * `getDocumentProxy` + `extractText`. Fast and dependency-free.
7
+ * 2. **OcrPdfLoader** (Tier 2, opt-in) — Tesseract.js OCR, engaged when the
8
+ * unpdf extraction produces sparse text (< 50 chars per page on average).
9
+ * 3. **DoclingLoader** (Tier 3, opt-in) — Python `docling` subprocess, engaged
10
+ * when explicitly configured via the `docling` loader option.
11
+ *
12
+ * Both fallback loaders are optional and injected via constructor parameters;
13
+ * callers supply them by calling {@link createOcrPdfLoader} and
14
+ * {@link createDoclingLoader} and checking for non-null values.
15
+ *
16
+ * @module memory/ingestion/PdfLoader
17
+ */
18
+ import type { IDocumentLoader } from './IDocumentLoader.js';
19
+ import type { LoadOptions, LoadedDocument } from '../facade/types.js';
20
+ /**
21
+ * Document loader for PDF files.
22
+ *
23
+ * ### Extraction tiers
24
+ * 1. **unpdf** — always used as the primary extraction engine. Performs
25
+ * pure-JS PDF text layer extraction with no native binaries required.
26
+ * 2. **OcrPdfLoader** (optional) — supplied at construction time and engaged
27
+ * automatically when unpdf yields sparse text (< 50 chars per page on
28
+ * average), indicating a scanned document.
29
+ * 3. **DoclingLoader** (optional) — when provided, takes precedence over both
30
+ * unpdf and OCR, yielding the highest-fidelity extraction at the cost of
31
+ * requiring a Python runtime.
32
+ *
33
+ * @implements {IDocumentLoader}
34
+ *
35
+ * @example
36
+ * ```ts
37
+ * const ocrLoader = createOcrPdfLoader(); // null if tesseract.js absent
38
+ * const doclingLoader = createDoclingLoader(); // null if docling absent
39
+ * const pdfLoader = new PdfLoader(ocrLoader, doclingLoader);
40
+ * const doc = await pdfLoader.load('/reports/q3.pdf');
41
+ * ```
42
+ */
43
+ export declare class PdfLoader implements IDocumentLoader {
44
+ /** @inheritdoc */
45
+ readonly supportedExtensions: string[];
46
+ /**
47
+ * Optional OCR fallback loader, engaged when primary extraction is sparse.
48
+ * Pass `null` to disable OCR fallback.
49
+ */
50
+ private readonly _ocrLoader;
51
+ /**
52
+ * Optional Docling loader that, when present, takes precedence over the
53
+ * entire unpdf + OCR pipeline.
54
+ * Pass `null` to disable Docling.
55
+ */
56
+ private readonly _doclingLoader;
57
+ /**
58
+ * Creates a new PdfLoader.
59
+ *
60
+ * @param ocrLoader - Optional OCR fallback (e.g. from {@link createOcrPdfLoader}).
61
+ * @param doclingLoader - Optional Docling loader (e.g. from {@link createDoclingLoader}).
62
+ */
63
+ constructor(ocrLoader?: IDocumentLoader | null, doclingLoader?: IDocumentLoader | null);
64
+ /** @inheritdoc */
65
+ canLoad(source: string | Buffer): boolean;
66
+ /** @inheritdoc */
67
+ load(source: string | Buffer, options?: LoadOptions): Promise<LoadedDocument>;
68
+ /**
69
+ * Extract text from a PDF buffer using the `unpdf` package.
70
+ *
71
+ * Returns the concatenated page text, the page count, and an optional title
72
+ * string extracted from the PDF metadata dictionary when available.
73
+ *
74
+ * @param buffer - Raw PDF bytes.
75
+ */
76
+ private _extractWithUnpdf;
77
+ }
78
+ //# sourceMappingURL=PdfLoader.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"PdfLoader.d.ts","sourceRoot":"","sources":["../../../src/memory/ingestion/PdfLoader.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAIH,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAC;AAC5D,OAAO,KAAK,EAAE,WAAW,EAAE,cAAc,EAAoB,MAAM,oBAAoB,CAAC;AA+CxF;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,qBAAa,SAAU,YAAW,eAAe;IAC/C,kBAAkB;IAClB,QAAQ,CAAC,mBAAmB,EAAE,MAAM,EAAE,CAA6B;IAEnE;;;OAGG;IACH,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAyB;IAEpD;;;;OAIG;IACH,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAyB;IAExD;;;;;OAKG;gBAED,SAAS,GAAE,eAAe,GAAG,IAAW,EACxC,aAAa,GAAE,eAAe,GAAG,IAAW;IAU9C,kBAAkB;IAClB,OAAO,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,GAAG,OAAO;IAazC,kBAAkB;IACZ,IAAI,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,EAAE,OAAO,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,cAAc,CAAC;IA2CnF;;;;;;;OAOG;YACW,iBAAiB;CA+BhC"}
@@ -0,0 +1,179 @@
1
+ /**
2
+ * @fileoverview PdfLoader — loads `.pdf` documents using `unpdf`.
3
+ *
4
+ * Implements a tiered extraction strategy:
5
+ * 1. **unpdf** (Tier 1, always available) — pure-JS text extraction via
6
+ * `getDocumentProxy` + `extractText`. Fast and dependency-free.
7
+ * 2. **OcrPdfLoader** (Tier 2, opt-in) — Tesseract.js OCR, engaged when the
8
+ * unpdf extraction produces sparse text (< 50 chars per page on average).
9
+ * 3. **DoclingLoader** (Tier 3, opt-in) — Python `docling` subprocess, engaged
10
+ * when explicitly configured via the `docling` loader option.
11
+ *
12
+ * Both fallback loaders are optional and injected via constructor parameters;
13
+ * callers supply them by calling {@link createOcrPdfLoader} and
14
+ * {@link createDoclingLoader} and checking for non-null values.
15
+ *
16
+ * @module memory/ingestion/PdfLoader
17
+ */
18
+ import fs from 'node:fs/promises';
19
+ import path from 'node:path';
20
+ // ---------------------------------------------------------------------------
21
+ // Constants
22
+ // ---------------------------------------------------------------------------
23
+ /** Extensions handled by this loader, each with a leading dot. */
24
+ const SUPPORTED_EXTENSIONS = ['.pdf'];
25
+ /**
26
+ * Minimum average character count per page below which the primary extraction
27
+ * result is considered sparse and a fallback strategy is engaged.
28
+ */
29
+ const SPARSE_THRESHOLD_CHARS_PER_PAGE = 50;
30
+ // ---------------------------------------------------------------------------
31
+ // Helpers
32
+ // ---------------------------------------------------------------------------
33
+ /**
34
+ * Returns the lower-cased extension (with dot) of a file path.
35
+ *
36
+ * @param filePath - Absolute or relative file path.
37
+ */
38
+ function extOf(filePath) {
39
+ return path.extname(filePath).toLowerCase();
40
+ }
41
+ /**
42
+ * Determine whether extracted text is considered sparse given the page count.
43
+ *
44
+ * Sparse text typically indicates a scanned PDF where textual content was not
45
+ * embedded during creation and OCR is required.
46
+ *
47
+ * @param text - Full extracted text.
48
+ * @param pageCount - Number of pages in the document.
49
+ */
50
+ function isSparse(text, pageCount) {
51
+ if (pageCount <= 0)
52
+ return false;
53
+ const avgCharsPerPage = text.length / pageCount;
54
+ return avgCharsPerPage < SPARSE_THRESHOLD_CHARS_PER_PAGE;
55
+ }
56
+ // ---------------------------------------------------------------------------
57
+ // PdfLoader
58
+ // ---------------------------------------------------------------------------
59
+ /**
60
+ * Document loader for PDF files.
61
+ *
62
+ * ### Extraction tiers
63
+ * 1. **unpdf** — always used as the primary extraction engine. Performs
64
+ * pure-JS PDF text layer extraction with no native binaries required.
65
+ * 2. **OcrPdfLoader** (optional) — supplied at construction time and engaged
66
+ * automatically when unpdf yields sparse text (< 50 chars per page on
67
+ * average), indicating a scanned document.
68
+ * 3. **DoclingLoader** (optional) — when provided, takes precedence over both
69
+ * unpdf and OCR, yielding the highest-fidelity extraction at the cost of
70
+ * requiring a Python runtime.
71
+ *
72
+ * @implements {IDocumentLoader}
73
+ *
74
+ * @example
75
+ * ```ts
76
+ * const ocrLoader = createOcrPdfLoader(); // null if tesseract.js absent
77
+ * const doclingLoader = createDoclingLoader(); // null if docling absent
78
+ * const pdfLoader = new PdfLoader(ocrLoader, doclingLoader);
79
+ * const doc = await pdfLoader.load('/reports/q3.pdf');
80
+ * ```
81
+ */
82
+ export class PdfLoader {
83
+ /**
84
+ * Creates a new PdfLoader.
85
+ *
86
+ * @param ocrLoader - Optional OCR fallback (e.g. from {@link createOcrPdfLoader}).
87
+ * @param doclingLoader - Optional Docling loader (e.g. from {@link createDoclingLoader}).
88
+ */
89
+ constructor(ocrLoader = null, doclingLoader = null) {
90
+ /** @inheritdoc */
91
+ this.supportedExtensions = [...SUPPORTED_EXTENSIONS];
92
+ this._ocrLoader = ocrLoader;
93
+ this._doclingLoader = doclingLoader;
94
+ }
95
+ // -------------------------------------------------------------------------
96
+ // canLoad
97
+ // -------------------------------------------------------------------------
98
+ /** @inheritdoc */
99
+ canLoad(source) {
100
+ if (Buffer.isBuffer(source)) {
101
+ // Detect PDF magic bytes: %PDF- at offset 0.
102
+ return source.length >= 4 && source[0] === 0x25 && source[1] === 0x50 &&
103
+ source[2] === 0x44 && source[3] === 0x46;
104
+ }
105
+ return SUPPORTED_EXTENSIONS.includes(extOf(source));
106
+ }
107
+ // -------------------------------------------------------------------------
108
+ // load
109
+ // -------------------------------------------------------------------------
110
+ /** @inheritdoc */
111
+ async load(source, options) {
112
+ // Prefer Docling when available — highest fidelity.
113
+ if (this._doclingLoader !== null) {
114
+ return this._doclingLoader.load(source, options);
115
+ }
116
+ // Read bytes from disk if a path was supplied.
117
+ let buffer;
118
+ let resolvedPath;
119
+ if (Buffer.isBuffer(source)) {
120
+ buffer = source;
121
+ }
122
+ else {
123
+ resolvedPath = source;
124
+ buffer = await fs.readFile(resolvedPath);
125
+ }
126
+ // Primary extraction via unpdf.
127
+ const { content, pageCount, title } = await this._extractWithUnpdf(buffer);
128
+ // If primary extraction is sparse and OCR is available, delegate entirely
129
+ // to the OCR loader which has access to image-level content.
130
+ if (isSparse(content, pageCount) && this._ocrLoader !== null) {
131
+ return this._ocrLoader.load(source, options);
132
+ }
133
+ const meta = {
134
+ pageCount,
135
+ ...(title ? { title } : {}),
136
+ ...(resolvedPath ? { source: resolvedPath } : {}),
137
+ };
138
+ return {
139
+ content,
140
+ metadata: meta,
141
+ format: 'pdf',
142
+ };
143
+ }
144
+ // -------------------------------------------------------------------------
145
+ // Private: unpdf extraction
146
+ // -------------------------------------------------------------------------
147
+ /**
148
+ * Extract text from a PDF buffer using the `unpdf` package.
149
+ *
150
+ * Returns the concatenated page text, the page count, and an optional title
151
+ * string extracted from the PDF metadata dictionary when available.
152
+ *
153
+ * @param buffer - Raw PDF bytes.
154
+ */
155
+ async _extractWithUnpdf(buffer) {
156
+ // Dynamic import keeps unpdf tree-shakeable and avoids module-resolution
157
+ // errors in environments that don't bundle the package.
158
+ const { getDocumentProxy, extractText } = await import('unpdf');
159
+ // getDocumentProxy accepts a Uint8Array — wrap the Node Buffer.
160
+ const doc = await getDocumentProxy(new Uint8Array(buffer));
161
+ // Extract all pages at once.
162
+ const { text } = await extractText(doc, { mergePages: true });
163
+ // Attempt to read the Title field from the PDF info dictionary.
164
+ let title;
165
+ try {
166
+ const metadata = await doc.getMetadata();
167
+ const info = metadata?.info;
168
+ if (info && typeof info['Title'] === 'string' && info['Title'].trim()) {
169
+ title = info['Title'].trim();
170
+ }
171
+ }
172
+ catch {
173
+ // Metadata access is optional — swallow errors silently.
174
+ }
175
+ const pageCount = doc.numPages ?? 0;
176
+ return { content: text, pageCount, title };
177
+ }
178
+ }
179
+ //# sourceMappingURL=PdfLoader.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"PdfLoader.js","sourceRoot":"","sources":["../../../src/memory/ingestion/PdfLoader.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAEH,OAAO,EAAE,MAAM,kBAAkB,CAAC;AAClC,OAAO,IAAI,MAAM,WAAW,CAAC;AAI7B,8EAA8E;AAC9E,YAAY;AACZ,8EAA8E;AAE9E,kEAAkE;AAClE,MAAM,oBAAoB,GAAG,CAAC,MAAM,CAAU,CAAC;AAE/C;;;GAGG;AACH,MAAM,+BAA+B,GAAG,EAAE,CAAC;AAE3C,8EAA8E;AAC9E,UAAU;AACV,8EAA8E;AAE9E;;;;GAIG;AACH,SAAS,KAAK,CAAC,QAAgB;IAC7B,OAAO,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;AAC9C,CAAC;AAED;;;;;;;;GAQG;AACH,SAAS,QAAQ,CAAC,IAAY,EAAE,SAAiB;IAC/C,IAAI,SAAS,IAAI,CAAC;QAAE,OAAO,KAAK,CAAC;IACjC,MAAM,eAAe,GAAG,IAAI,CAAC,MAAM,GAAG,SAAS,CAAC;IAChD,OAAO,eAAe,GAAG,+BAA+B,CAAC;AAC3D,CAAC;AAED,8EAA8E;AAC9E,YAAY;AACZ,8EAA8E;AAE9E;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,MAAM,OAAO,SAAS;IAiBpB;;;;;OAKG;IACH,YACE,YAAoC,IAAI,EACxC,gBAAwC,IAAI;QAxB9C,kBAAkB;QACT,wBAAmB,GAAa,CAAC,GAAG,oBAAoB,CAAC,CAAC;QAyBjE,IAAI,CAAC,UAAU,GAAG,SAAS,CAAC;QAC5B,IAAI,CAAC,cAAc,GAAG,aAAa,CAAC;IACtC,CAAC;IAED,4EAA4E;IAC5E,UAAU;IACV,4EAA4E;IAE5E,kBAAkB;IAClB,OAAO,CAAC,MAAuB;QAC7B,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YAC5B,6CAA6C;YAC7C,OAAO,MAAM,CAAC,MAAM,IAAI,CAAC,IAAI,MAAM,CAAC,CAAC,CAAC,KAAK,IAAI,IAAI,MAAM,CAAC,CAAC,CAAC,KAAK,IAAI;gBACnE,MAAM,CAAC,CAAC,CAAC,KAAK,IAAI,IAAI,MAAM,CAAC,CAAC,CAAC,KAAK,IAAI,CAAC;QAC7C,CAAC;QACD,OAAQ,oBAA0C,CAAC,QAAQ,CAAC,KAAK,CAAC,MAAM,CAAW,CAAC,CAAC;IACvF,CAAC;IAED,4EAA4E;IAC5E,OAAO;IACP,4EAA4E;IAE5E,kBAAkB;IAClB,KAAK,CAAC,IAAI,CAAC,MAAuB,EAAE,OAAqB;QACvD,oDAAoD;QACpD,IAAI,IAAI,CAAC,cAAc,KAAK,IAAI,EAAE,CAAC;YACjC,OAAO,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QACnD,CAAC;QAED,+CAA+C;QAC/C,IAAI,MAAc,CAAC;QACnB,IAAI,YAAgC,CAAC;QAErC,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YAC5B,MAAM,GAAG,MAAM,CAAC;QAClB,CAAC;aAAM,CAAC;YACN,YAAY,GAAG,MAAM,CAAC;YACtB,MAAM,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAC;QAC3C,CAAC;QAED,gCAAgC;QAChC,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE,KAAK,EAAE,GAAG,MAAM,IAAI,CAAC,iBAAiB,CAAC,MAAM,CAAC,CAAC;QAE3E,0EAA0E;QAC1E,6DAA6D;QAC7D,IAAI,QAAQ,CAAC,OAAO,EAAE,SAAS,CAAC,IAAI,IAAI,CAAC,UAAU,KAAK,IAAI,EAAE,CAAC;YAC7D,OAAO,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC/C,CAAC;QAED,MAAM,IAAI,GAAqB;YAC7B,SAAS;YACT,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YAC3B,GAAG,CAAC,YAAY,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,YAAY,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;SAClD,CAAC;QAEF,OAAO;YACL,OAAO;YACP,QAAQ,EAAE,IAAI;YACd,MAAM,EAAE,KAAK;SACd,CAAC;IACJ,CAAC;IAED,4EAA4E;IAC5E,4BAA4B;IAC5B,4EAA4E;IAE5E;;;;;;;OAOG;IACK,KAAK,CAAC,iBAAiB,CAAC,MAAc;QAK5C,yEAAyE;QACzE,wDAAwD;QACxD,MAAM,EAAE,gBAAgB,EAAE,WAAW,EAAE,GAAG,MAAM,MAAM,CAAC,OAAO,CAAC,CAAC;QAEhE,gEAAgE;QAChE,MAAM,GAAG,GAAG,MAAM,gBAAgB,CAAC,IAAI,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC;QAE3D,6BAA6B;QAC7B,MAAM,EAAE,IAAI,EAAE,GAAG,MAAM,WAAW,CAAC,GAAG,EAAE,EAAE,UAAU,EAAE,IAAI,EAAE,CAAC,CAAC;QAE9D,gEAAgE;QAChE,IAAI,KAAyB,CAAC;QAC9B,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,GAAG,CAAC,WAAW,EAAE,CAAC;YACzC,MAAM,IAAI,GAAG,QAAQ,EAAE,IAA2C,CAAC;YACnE,IAAI,IAAI,IAAI,OAAO,IAAI,CAAC,OAAO,CAAC,KAAK,QAAQ,IAAI,IAAI,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC;gBACtE,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC;YAC/B,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,yDAAyD;QAC3D,CAAC;QAED,MAAM,SAAS,GAAG,GAAG,CAAC,QAAQ,IAAI,CAAC,CAAC;QAEpC,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,KAAK,EAAE,CAAC;IAC7C,CAAC;CACF"}
@@ -0,0 +1,66 @@
1
+ /**
2
+ * @fileoverview TextLoader — loads plain-text, CSV/TSV, JSON, and YAML files.
3
+ *
4
+ * This is the most general-purpose loader in the AgentOS ingestion pipeline.
5
+ * It handles six extensions that all share the same fundamental operation:
6
+ * read raw text and attach lightweight metadata derived from the file content
7
+ * and extension.
8
+ *
9
+ * Supported extensions: `.txt`, `.csv`, `.tsv`, `.json`, `.yaml`, `.yml`
10
+ *
11
+ * @module memory/ingestion/TextLoader
12
+ */
13
+ import type { IDocumentLoader } from './IDocumentLoader.js';
14
+ import type { LoadOptions, LoadedDocument } from '../facade/types.js';
15
+ /**
16
+ * Loader for plain-text, CSV, TSV, JSON, and YAML files.
17
+ *
18
+ * The loader performs minimal transformation:
19
+ * - **`.json`** — re-serialises with pretty-printing so stored content is
20
+ * consistently formatted.
21
+ * - **`.yaml` / `.yml`** — the `yaml` package is used to parse and re-dump
22
+ * for consistent formatting; falls back to raw text on parse error.
23
+ * - All other extensions — content is returned as-is.
24
+ *
25
+ * Metadata includes the approximate `wordCount` and a `format` label derived
26
+ * from the file extension.
27
+ *
28
+ * @implements {IDocumentLoader}
29
+ *
30
+ * @example
31
+ * ```ts
32
+ * const loader = new TextLoader();
33
+ * const doc = await loader.load('/data/notes.txt');
34
+ * console.log(doc.metadata.wordCount); // e.g. 312
35
+ * ```
36
+ */
37
+ export declare class TextLoader implements IDocumentLoader {
38
+ /** @inheritdoc */
39
+ readonly supportedExtensions: string[];
40
+ /** @inheritdoc */
41
+ canLoad(source: string | Buffer): boolean;
42
+ /** @inheritdoc */
43
+ load(source: string | Buffer, _options?: LoadOptions): Promise<LoadedDocument>;
44
+ /**
45
+ * Normalises raw file content based on the detected extension.
46
+ *
47
+ * - JSON files are pretty-printed.
48
+ * - YAML files are parsed and re-dumped for consistent formatting.
49
+ * - All other formats are returned unchanged.
50
+ *
51
+ * @param raw - Raw UTF-8 string read from the source.
52
+ * @param ext - Lower-cased extension with leading dot.
53
+ */
54
+ private _normalise;
55
+ /**
56
+ * Parse and re-serialise YAML content for consistent formatting.
57
+ *
58
+ * Uses the `yaml` package that is already a production dependency of the
59
+ * `@framers/agentos` package. Falls back to the original raw string on
60
+ * any parse error so the loader never throws on malformed YAML.
61
+ *
62
+ * @param raw - Raw YAML string.
63
+ */
64
+ private _prettyYaml;
65
+ }
66
+ //# sourceMappingURL=TextLoader.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"TextLoader.d.ts","sourceRoot":"","sources":["../../../src/memory/ingestion/TextLoader.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAKH,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAC;AAC5D,OAAO,KAAK,EAAE,WAAW,EAAE,cAAc,EAAoB,MAAM,oBAAoB,CAAC;AAsFxF;;;;;;;;;;;;;;;;;;;;;GAqBG;AACH,qBAAa,UAAW,YAAW,eAAe;IAChD,kBAAkB;IAClB,QAAQ,CAAC,mBAAmB,EAAE,MAAM,EAAE,CAA6B;IAMnE,kBAAkB;IAClB,OAAO,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,GAAG,OAAO;IAazC,kBAAkB;IACZ,IAAI,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,EAAE,QAAQ,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,cAAc,CAAC;IAsCpF;;;;;;;;;OASG;IACH,OAAO,CAAC,UAAU;IAelB;;;;;;;;OAQG;IACH,OAAO,CAAC,WAAW;CAQpB"}