@agentionai/agents 0.11.0 → 0.12.0-beta

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,118 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ Object.defineProperty(exports, "__esModule", { value: true });
36
+ exports.UnstructuredLocalParser = void 0;
37
+ const DocumentParser_1 = require("./DocumentParser");
38
+ /**
39
+ * Document parser that uses the **local** (open-source Python) version of
40
+ * Unstructured via the `@epilogo/unstructured-io-node` npm bridge.
41
+ *
42
+ * The bridge spawns a Python virtual environment and calls the Python
43
+ * `unstructured` library directly — no API key required, but Python 3.8+
44
+ * and system dependencies (poppler, tesseract, etc.) must be available.
45
+ *
46
+ * **Peer dependency:** `@epilogo/unstructured-io-node`
47
+ *
48
+ * @example
49
+ * ```typescript
50
+ * import { UnstructuredLocalParser } from "@agentionai/agents/parsers";
51
+ *
52
+ * const parser = new UnstructuredLocalParser();
53
+ * const doc = await parser.parse("/path/to/report.pdf", {
54
+ * strategy: "hi_res",
55
+ * languages: ["eng"],
56
+ * });
57
+ * console.log(doc.elements?.length, "elements");
58
+ *
59
+ * // Use with IngestionPipeline
60
+ * await pipeline.ingestFile("/path/to/report.pdf", parser);
61
+ * ```
62
+ */
63
+ class UnstructuredLocalParser extends DocumentParser_1.DocumentParser {
64
+ constructor() {
65
+ super(...arguments);
66
+ this.name = "unstructured-local";
67
+ }
68
+ /**
69
+ * Parse a file using the local Python Unstructured library.
70
+ *
71
+ * On first call, `ensureEnvironmentSetup()` is invoked to download the
72
+ * Python venv if it does not already exist (one-time, slow operation).
73
+ *
74
+ * @param filePath - Path to the document to parse
75
+ * @param options - Strategy, languages, and any other unstructured kwargs
76
+ */
77
+ async parse(filePath, options) {
78
+ const pkg = "@epilogo/unstructured-io-node";
79
+ let UnstructuredIO;
80
+ try {
81
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
82
+ ({ UnstructuredIO } = await Promise.resolve(`${pkg}`).then(s => __importStar(require(s))));
83
+ }
84
+ catch {
85
+ throw new Error("UnstructuredLocalParser requires '@epilogo/unstructured-io-node'. " +
86
+ "Install it with: npm install @epilogo/unstructured-io-node");
87
+ }
88
+ await UnstructuredIO.ensureEnvironmentSetup();
89
+ const { strategy, languages, ...rest } = options ?? {};
90
+ const rawElements = await UnstructuredIO.partition({
91
+ filename: filePath,
92
+ strategy: strategy ?? "auto",
93
+ ...(languages ? { languages } : {}),
94
+ ...rest,
95
+ });
96
+ const elements = this.mapRawElements(rawElements);
97
+ return {
98
+ text: this.elementsToText(elements),
99
+ elements,
100
+ };
101
+ }
102
+ mapRawElements(raw) {
103
+ return raw.map((el) => {
104
+ const e = el;
105
+ return {
106
+ type: typeof e["type"] === "string" ? e["type"] : "unknown",
107
+ text: typeof e["text"] === "string" ? e["text"] : "",
108
+ metadata: e["metadata"] != null &&
109
+ typeof e["metadata"] === "object" &&
110
+ !Array.isArray(e["metadata"])
111
+ ? e["metadata"]
112
+ : undefined,
113
+ };
114
+ });
115
+ }
116
+ }
117
+ exports.UnstructuredLocalParser = UnstructuredLocalParser;
118
+ //# sourceMappingURL=UnstructuredLocalParser.js.map
@@ -0,0 +1,3 @@
1
+ export type { ParsedElement, ParsedDocument, ParseOptions } from "./types";
2
+ export { DocumentParser } from "./DocumentParser";
3
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1,6 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.DocumentParser = void 0;
4
+ var DocumentParser_1 = require("./DocumentParser");
5
+ Object.defineProperty(exports, "DocumentParser", { enumerable: true, get: function () { return DocumentParser_1.DocumentParser; } });
6
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1,50 @@
1
+ /**
2
+ * A single structured element extracted from a document.
3
+ * Matches the element format returned by Unstructured and similar parsers.
4
+ */
5
+ export interface ParsedElement {
6
+ /**
7
+ * Element type — e.g. "Title", "NarrativeText", "Table", "Image",
8
+ * "ListItem", "Header", "Footer", "Document", etc.
9
+ */
10
+ type: string;
11
+ /** Text content of this element */
12
+ text: string;
13
+ /**
14
+ * Parser-provided metadata — e.g. page_number, coordinates, languages,
15
+ * file_directory, filename, filetype, etc.
16
+ */
17
+ metadata?: Record<string, unknown>;
18
+ }
19
+ /**
20
+ * The result of parsing a document file.
21
+ */
22
+ export interface ParsedDocument {
23
+ /** Full plain-text content (elements joined by double newlines) */
24
+ text: string;
25
+ /**
26
+ * Structured elements if the parser supports them.
27
+ * Absent when the parser only returns plain text.
28
+ */
29
+ elements?: ParsedElement[];
30
+ /** File-level metadata from the parser, when available */
31
+ metadata?: Record<string, unknown>;
32
+ }
33
+ /**
34
+ * Options shared across all document parsers.
35
+ */
36
+ export interface ParseOptions {
37
+ /**
38
+ * Parsing strategy.
39
+ * - `"auto"`: Let the parser decide (default)
40
+ * - `"fast"`: Text extraction only, no OCR
41
+ * - `"hi_res"`: High-resolution layout analysis with OCR
42
+ * - `"ocr_only"`: Force OCR on every page
43
+ */
44
+ strategy?: "auto" | "fast" | "hi_res" | "ocr_only";
45
+ /** Languages to use for OCR (ISO 639-1 codes, e.g. `["eng", "fra"]`) */
46
+ languages?: string[];
47
+ /** Pass-through options specific to the underlying parser */
48
+ [key: string]: unknown;
49
+ }
50
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1,3 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ //# sourceMappingURL=types.js.map
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@agentionai/agents",
3
3
  "author": "Laurent Zuijdwijk",
4
- "version": "0.11.0",
4
+ "version": "0.12.0-beta",
5
5
  "description": "Agent Library",
6
6
  "main": "dist/index.js",
7
7
  "types": "dist/index.d.ts",
@@ -30,6 +30,10 @@
30
30
  "types": "./dist/gemini.d.ts",
31
31
  "default": "./dist/gemini.js"
32
32
  },
33
+ "./ollama": {
34
+ "types": "./dist/agents/ollama/OllamaAgent.d.ts",
35
+ "default": "./dist/agents/ollama/OllamaAgent.js"
36
+ },
33
37
  "./embeddings": {
34
38
  "types": "./dist/embeddings/index.d.ts",
35
39
  "default": "./dist/embeddings/index.js"
@@ -61,6 +65,26 @@
61
65
  "./history/plugins": {
62
66
  "types": "./dist/history/plugins/index.d.ts",
63
67
  "default": "./dist/history/plugins/index.js"
68
+ },
69
+ "./parsers": {
70
+ "types": "./dist/parsers/index.d.ts",
71
+ "default": "./dist/parsers/index.js"
72
+ },
73
+ "./parsers/unstructured-local": {
74
+ "types": "./dist/parsers/UnstructuredLocalParser.d.ts",
75
+ "default": "./dist/parsers/UnstructuredLocalParser.js"
76
+ },
77
+ "./parsers/unstructured-api": {
78
+ "types": "./dist/parsers/UnstructuredAPIParser.d.ts",
79
+ "default": "./dist/parsers/UnstructuredAPIParser.js"
80
+ },
81
+ "./parsers/llamaindex": {
82
+ "types": "./dist/parsers/LlamaIndexParser.d.ts",
83
+ "default": "./dist/parsers/LlamaIndexParser.js"
84
+ },
85
+ "./parsers/ollama-ocr": {
86
+ "types": "./dist/parsers/OllamaOCRParser.d.ts",
87
+ "default": "./dist/parsers/OllamaOCRParser.js"
64
88
  }
65
89
  },
66
90
  "files": [
@@ -141,7 +165,12 @@
141
165
  "@modelcontextprotocol/sdk": "^1.26.0",
142
166
  "apache-arrow": "^18.0.0",
143
167
  "openai": "^6.16.0",
144
- "voyageai": "^0.0.3"
168
+ "voyageai": "^0.0.3",
169
+ "@epilogo/unstructured-io-node": "*",
170
+ "unstructured-client": "*",
171
+ "llamaindex": "*",
172
+ "@llamaindex/readers": "*",
173
+ "pdf-to-img": "*"
145
174
  },
146
175
  "peerDependenciesMeta": {
147
176
  "@lancedb/lancedb": {
@@ -170,6 +199,21 @@
170
199
  },
171
200
  "@opensearch-project/opensearch": {
172
201
  "optional": true
202
+ },
203
+ "@epilogo/unstructured-io-node": {
204
+ "optional": true
205
+ },
206
+ "unstructured-client": {
207
+ "optional": true
208
+ },
209
+ "llamaindex": {
210
+ "optional": true
211
+ },
212
+ "@llamaindex/readers": {
213
+ "optional": true
214
+ },
215
+ "pdf-to-img": {
216
+ "optional": true
173
217
  }
174
218
  },
175
219
  "dependencies": {