@agentionai/agents 0.12.0-beta → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/dist/agents/Agent.d.ts +9 -3
  2. package/dist/agents/Agent.js +4 -0
  3. package/dist/agents/AgentConfig.d.ts +12 -2
  4. package/dist/agents/model-types.d.ts +7 -1
  5. package/dist/agents/ollama/OllamaAgent.d.ts +69 -0
  6. package/dist/agents/ollama/OllamaAgent.js +304 -0
  7. package/dist/chunkers/index.d.ts +0 -1
  8. package/dist/chunkers/index.js +1 -3
  9. package/dist/history/transformers.d.ts +36 -0
  10. package/dist/history/transformers.js +78 -1
  11. package/dist/history/types.d.ts +8 -1
  12. package/dist/index.d.ts +2 -1
  13. package/dist/index.js +4 -1
  14. package/dist/ingestion/IngestionPipeline.d.ts +1 -73
  15. package/dist/ingestion/IngestionPipeline.js +1 -110
  16. package/dist/ollama.d.ts +4 -0
  17. package/dist/ollama.js +24 -0
  18. package/dist/viz/types.d.ts +1 -1
  19. package/package.json +6 -42
  20. package/dist/chunkers/ElementChunker.d.ts +0 -100
  21. package/dist/chunkers/ElementChunker.js +0 -242
  22. package/dist/parsers/DocumentParser.d.ts +0 -36
  23. package/dist/parsers/DocumentParser.js +0 -35
  24. package/dist/parsers/LlamaIndexParser.d.ts +0 -58
  25. package/dist/parsers/LlamaIndexParser.js +0 -71
  26. package/dist/parsers/OllamaOCRParser.d.ts +0 -98
  27. package/dist/parsers/OllamaOCRParser.js +0 -203
  28. package/dist/parsers/UnstructuredAPIParser.d.ts +0 -57
  29. package/dist/parsers/UnstructuredAPIParser.js +0 -131
  30. package/dist/parsers/UnstructuredLocalParser.d.ts +0 -42
  31. package/dist/parsers/UnstructuredLocalParser.js +0 -118
  32. package/dist/parsers/index.d.ts +0 -3
  33. package/dist/parsers/index.js +0 -6
  34. package/dist/parsers/types.d.ts +0 -50
  35. package/dist/parsers/types.js +0 -3
@@ -1,203 +0,0 @@
1
- "use strict";
2
- var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
- if (k2 === undefined) k2 = k;
4
- var desc = Object.getOwnPropertyDescriptor(m, k);
5
- if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
- desc = { enumerable: true, get: function() { return m[k]; } };
7
- }
8
- Object.defineProperty(o, k2, desc);
9
- }) : (function(o, m, k, k2) {
10
- if (k2 === undefined) k2 = k;
11
- o[k2] = m[k];
12
- }));
13
- var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
- Object.defineProperty(o, "default", { enumerable: true, value: v });
15
- }) : function(o, v) {
16
- o["default"] = v;
17
- });
18
- var __importStar = (this && this.__importStar) || (function () {
19
- var ownKeys = function(o) {
20
- ownKeys = Object.getOwnPropertyNames || function (o) {
21
- var ar = [];
22
- for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
- return ar;
24
- };
25
- return ownKeys(o);
26
- };
27
- return function (mod) {
28
- if (mod && mod.__esModule) return mod;
29
- var result = {};
30
- if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
- __setModuleDefault(result, mod);
32
- return result;
33
- };
34
- })();
35
- Object.defineProperty(exports, "__esModule", { value: true });
36
- exports.OllamaOCRParser = void 0;
37
- const fs = __importStar(require("fs"));
38
- const path = __importStar(require("path"));
39
- const DocumentParser_1 = require("./DocumentParser");
40
- /** Supported image extensions for direct OCR (no conversion needed). */
41
- const IMAGE_EXTS = new Set([".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp"]);
42
- /**
43
- * Document parser that uses a locally-running **Ollama** vision model (e.g. `glm-ocr`)
44
- * to perform OCR on image files and PDF documents.
45
- *
46
- * **Supported file types:**
47
- * - Images: `.jpg`, `.jpeg`, `.png`, `.gif`, `.webp`, `.bmp` — no extra dependencies
48
- * - PDF: requires the optional peer dependency `pdf-to-img` (`npm install pdf-to-img`)
49
- *
50
- * **Ollama must be running** with the model pulled:
51
- * ```bash
52
- * ollama pull glm-ocr
53
- * ollama serve # if not already running
54
- * ```
55
- *
56
- * @example
57
- * ```typescript
58
- * import { OllamaOCRParser } from "@agentionai/agents/parsers/ollama-ocr";
59
- *
60
- * const parser = new OllamaOCRParser({
61
- * model: "glm-ocr",
62
- * pdfScale: 1.5,
63
- * onProgress: (page, total) => console.log(`OCR page ${page}/${total}...`),
64
- * });
65
- *
66
- * // Parse an image
67
- * const doc = await parser.parse("/path/to/scan.png");
68
- *
69
- * // Parse a PDF (requires: npm install pdf-to-img)
70
- * const pdf = await parser.parse("/path/to/report.pdf");
71
- *
72
- * // Use with IngestionPipeline
73
- * await pipeline.ingestFile("/path/to/scan.png", parser);
74
- * ```
75
- */
76
- class OllamaOCRParser extends DocumentParser_1.DocumentParser {
77
- constructor(config = {}) {
78
- super();
79
- this.name = "ollama-ocr";
80
- this.model = config.model ?? "glm-ocr";
81
- this.baseUrl = (config.baseUrl ?? "http://localhost:11434").replace(/\/$/, "");
82
- this.prompt =
83
- config.prompt ??
84
- "Extract and transcribe all text from this image. Preserve the original structure, headings, and formatting as much as possible. Output only the extracted text.";
85
- this.pdfScale = config.pdfScale ?? 2.0;
86
- this.concurrency = config.concurrency ?? 3;
87
- this.onProgress = config.onProgress;
88
- }
89
- /**
90
- * Parse a document file using Ollama OCR.
91
- *
92
- * @param filePath - Path to the image or PDF file
93
- * @param options - Optional hints (unused by this parser; provided for interface compatibility)
94
- */
95
- async parse(filePath, _options) {
96
- const ext = path.extname(filePath).toLowerCase();
97
- if (IMAGE_EXTS.has(ext)) {
98
- return this.parseImageFile(filePath);
99
- }
100
- if (ext === ".pdf") {
101
- return this.parsePdf(filePath);
102
- }
103
- throw new Error(`OllamaOCRParser: unsupported file type "${ext}". ` +
104
- `Supported: ${[...IMAGE_EXTS].join(", ")}, .pdf`);
105
- }
106
- // ---------------------------------------------------------------------------
107
- // Private helpers
108
- // ---------------------------------------------------------------------------
109
- async parseImageFile(filePath) {
110
- const base64 = fs.readFileSync(filePath).toString("base64");
111
- const text = await this.runOCR(base64);
112
- const element = {
113
- type: "NarrativeText",
114
- text,
115
- metadata: { source: filePath },
116
- };
117
- return { text, elements: [element], metadata: { filePath, pages: 1 } };
118
- }
119
- async parsePdf(filePath) {
120
- // Dynamically load pdf-to-img — optional peer dep, no system deps required
121
- let pdfFn;
122
- try {
123
- // Resolve the module from process.cwd() so that peer deps installed in
124
- // the consuming project (not this library's own node_modules) are found.
125
- const { createRequire } = await Promise.resolve().then(() => __importStar(require("module")));
126
- const { pathToFileURL } = await Promise.resolve().then(() => __importStar(require("url")));
127
- const requireFromCwd = createRequire(path.resolve(process.cwd(), "__placeholder__.js"));
128
- const resolved = requireFromCwd.resolve("pdf-to-img");
129
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
130
- ({ pdf: pdfFn } = await Promise.resolve(`${pathToFileURL(resolved).href}`).then(s => __importStar(require(s))));
131
- }
132
- catch (err) {
133
- const message = err instanceof Error ? err.message : String(err);
134
- if (message.includes("Cannot find module") ||
135
- message.includes("MODULE_NOT_FOUND") ||
136
- message.includes("ERR_MODULE_NOT_FOUND")) {
137
- throw new Error("OllamaOCRParser: PDF parsing requires 'pdf-to-img'. " +
138
- "Install it with: npm install pdf-to-img");
139
- }
140
- throw err;
141
- }
142
- const doc = await pdfFn(filePath, { scale: this.pdfScale });
143
- const total = doc.length;
144
- // Step 1: render all pages to buffers (fast — pure JS, no network)
145
- const pageBuffers = [];
146
- for await (const buf of doc) {
147
- pageBuffers.push(buf);
148
- }
149
- // Step 2: OCR pages in parallel using a worker-pool with concurrency limit
150
- const elements = new Array(total);
151
- let completed = 0;
152
- const queue = pageBuffers.map((buf, i) => async () => {
153
- const text = await this.runOCR(buf.toString("base64"));
154
- elements[i] = {
155
- type: "NarrativeText",
156
- text,
157
- metadata: { page_number: i + 1, source: filePath },
158
- };
159
- this.onProgress?.(++completed, total);
160
- });
161
- const workers = Array.from({ length: Math.min(this.concurrency, total) }, async () => {
162
- while (queue.length > 0) {
163
- await queue.shift()();
164
- }
165
- });
166
- await Promise.all(workers);
167
- return {
168
- text: this.elementsToText(elements),
169
- elements,
170
- metadata: { filePath, pages: total },
171
- };
172
- }
173
- /**
174
- * Send a base64-encoded image to the Ollama chat API and return the extracted text.
175
- */
176
- async runOCR(base64Image) {
177
- const url = `${this.baseUrl}/api/chat`;
178
- const response = await fetch(url, {
179
- method: "POST",
180
- headers: { "Content-Type": "application/json" },
181
- body: JSON.stringify({
182
- model: this.model,
183
- messages: [
184
- {
185
- role: "user",
186
- content: this.prompt,
187
- images: [base64Image],
188
- },
189
- ],
190
- stream: false,
191
- }),
192
- });
193
- if (!response.ok) {
194
- const body = await response.text().catch(() => "");
195
- throw new Error(`OllamaOCRParser: Ollama API error ${response.status} ${response.statusText}` +
196
- (body ? `\n${body}` : ""));
197
- }
198
- const data = (await response.json());
199
- return data.message?.content?.trim() ?? "";
200
- }
201
- }
202
- exports.OllamaOCRParser = OllamaOCRParser;
203
- //# sourceMappingURL=OllamaOCRParser.js.map
@@ -1,57 +0,0 @@
1
- import { DocumentParser } from "./DocumentParser";
2
- import { ParsedDocument, ParseOptions } from "./types";
3
- /**
4
- * Configuration for {@link UnstructuredAPIParser}.
5
- */
6
- export interface UnstructuredAPIParserConfig {
7
- /**
8
- * API key for the Unstructured hosted service.
9
- * Not required when `serverUrl` points to a self-hosted instance
10
- * that does not enforce authentication.
11
- */
12
- apiKey?: string;
13
- /**
14
- * Base URL of the Unstructured API.
15
- * Defaults to the official hosted endpoint when an `apiKey` is provided.
16
- * Set to your own host when running the open-source API server locally:
17
- * ```
18
- * docker run -p 8000:8000 downloads.unstructured.io/unstructured-io/unstructured-api:latest
19
- * serverUrl: "http://localhost:8000"
20
- * ```
21
- */
22
- serverUrl?: string;
23
- }
24
- /**
25
- * Document parser backed by the **Unstructured REST API** — either the
26
- * official hosted service or a self-hosted open-source API server.
27
- *
28
- * Uses the official `unstructured-client` npm package under the hood.
29
- *
30
- * **Peer dependency:** `unstructured-client`
31
- *
32
- * @example
33
- * ```typescript
34
- * // Hosted service
35
- * const parser = new UnstructuredAPIParser({ apiKey: process.env.UNSTRUCTURED_API_KEY });
36
- *
37
- * // Self-hosted (no auth required)
38
- * const parser = new UnstructuredAPIParser({ serverUrl: "http://localhost:8000" });
39
- *
40
- * const doc = await parser.parse("/path/to/report.pdf", { strategy: "hi_res" });
41
- * await pipeline.ingestFile("/path/to/report.pdf", parser);
42
- * ```
43
- */
44
- export declare class UnstructuredAPIParser extends DocumentParser {
45
- private readonly config;
46
- readonly name = "unstructured-api";
47
- constructor(config?: UnstructuredAPIParserConfig);
48
- /**
49
- * Parse a file via the Unstructured API.
50
- *
51
- * @param filePath - Path to the document to parse (read from disk)
52
- * @param options - Strategy, languages, and any other partition parameters
53
- */
54
- parse(filePath: string, options?: ParseOptions): Promise<ParsedDocument>;
55
- private mapRawElements;
56
- }
57
- //# sourceMappingURL=UnstructuredAPIParser.d.ts.map
@@ -1,131 +0,0 @@
1
- "use strict";
2
- var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
- if (k2 === undefined) k2 = k;
4
- var desc = Object.getOwnPropertyDescriptor(m, k);
5
- if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
- desc = { enumerable: true, get: function() { return m[k]; } };
7
- }
8
- Object.defineProperty(o, k2, desc);
9
- }) : (function(o, m, k, k2) {
10
- if (k2 === undefined) k2 = k;
11
- o[k2] = m[k];
12
- }));
13
- var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
- Object.defineProperty(o, "default", { enumerable: true, value: v });
15
- }) : function(o, v) {
16
- o["default"] = v;
17
- });
18
- var __importStar = (this && this.__importStar) || (function () {
19
- var ownKeys = function(o) {
20
- ownKeys = Object.getOwnPropertyNames || function (o) {
21
- var ar = [];
22
- for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
- return ar;
24
- };
25
- return ownKeys(o);
26
- };
27
- return function (mod) {
28
- if (mod && mod.__esModule) return mod;
29
- var result = {};
30
- if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
- __setModuleDefault(result, mod);
32
- return result;
33
- };
34
- })();
35
- Object.defineProperty(exports, "__esModule", { value: true });
36
- exports.UnstructuredAPIParser = void 0;
37
- const DocumentParser_1 = require("./DocumentParser");
38
- const STRATEGY_MAP = {
39
- auto: "auto",
40
- fast: "fast",
41
- hi_res: "hi_res",
42
- ocr_only: "ocr_only",
43
- };
44
- /**
45
- * Document parser backed by the **Unstructured REST API** — either the
46
- * official hosted service or a self-hosted open-source API server.
47
- *
48
- * Uses the official `unstructured-client` npm package under the hood.
49
- *
50
- * **Peer dependency:** `unstructured-client`
51
- *
52
- * @example
53
- * ```typescript
54
- * // Hosted service
55
- * const parser = new UnstructuredAPIParser({ apiKey: process.env.UNSTRUCTURED_API_KEY });
56
- *
57
- * // Self-hosted (no auth required)
58
- * const parser = new UnstructuredAPIParser({ serverUrl: "http://localhost:8000" });
59
- *
60
- * const doc = await parser.parse("/path/to/report.pdf", { strategy: "hi_res" });
61
- * await pipeline.ingestFile("/path/to/report.pdf", parser);
62
- * ```
63
- */
64
- class UnstructuredAPIParser extends DocumentParser_1.DocumentParser {
65
- constructor(config = {}) {
66
- super();
67
- this.config = config;
68
- this.name = "unstructured-api";
69
- }
70
- /**
71
- * Parse a file via the Unstructured API.
72
- *
73
- * @param filePath - Path to the document to parse (read from disk)
74
- * @param options - Strategy, languages, and any other partition parameters
75
- */
76
- async parse(filePath, options) {
77
- const pkg = "unstructured-client";
78
- let UnstructuredClient;
79
- try {
80
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
81
- ({ UnstructuredClient } = await Promise.resolve(`${pkg}`).then(s => __importStar(require(s))));
82
- }
83
- catch {
84
- throw new Error("UnstructuredAPIParser requires 'unstructured-client'. " +
85
- "Install it with: npm install unstructured-client");
86
- }
87
- const fs = await Promise.resolve().then(() => __importStar(require("fs")));
88
- const path = await Promise.resolve().then(() => __importStar(require("path")));
89
- const clientConfig = {};
90
- if (this.config.apiKey) {
91
- clientConfig["security"] = { apiKeyAuth: this.config.apiKey };
92
- }
93
- if (this.config.serverUrl) {
94
- clientConfig["serverURL"] = this.config.serverUrl;
95
- }
96
- const client = new UnstructuredClient(clientConfig);
97
- const fileContent = fs.readFileSync(filePath);
98
- const fileName = path.basename(filePath);
99
- const { strategy, languages, ...rest } = options ?? {};
100
- const res = await client.general.partition({
101
- partitionParameters: {
102
- files: { content: fileContent, fileName },
103
- strategy: STRATEGY_MAP[strategy ?? "auto"] ?? "auto",
104
- ...(languages ? { languages } : {}),
105
- ...rest,
106
- },
107
- });
108
- const rawElements = res.elements ?? [];
109
- const elements = this.mapRawElements(rawElements);
110
- return {
111
- text: this.elementsToText(elements),
112
- elements,
113
- };
114
- }
115
- mapRawElements(raw) {
116
- return raw.map((el) => {
117
- const e = el;
118
- return {
119
- type: typeof e["type"] === "string" ? e["type"] : "unknown",
120
- text: typeof e["text"] === "string" ? e["text"] : "",
121
- metadata: e["metadata"] != null &&
122
- typeof e["metadata"] === "object" &&
123
- !Array.isArray(e["metadata"])
124
- ? e["metadata"]
125
- : undefined,
126
- };
127
- });
128
- }
129
- }
130
- exports.UnstructuredAPIParser = UnstructuredAPIParser;
131
- //# sourceMappingURL=UnstructuredAPIParser.js.map
@@ -1,42 +0,0 @@
1
- import { DocumentParser } from "./DocumentParser";
2
- import { ParsedDocument, ParseOptions } from "./types";
3
- /**
4
- * Document parser that uses the **local** (open-source Python) version of
5
- * Unstructured via the `@epilogo/unstructured-io-node` npm bridge.
6
- *
7
- * The bridge spawns a Python virtual environment and calls the Python
8
- * `unstructured` library directly — no API key required, but Python 3.8+
9
- * and system dependencies (poppler, tesseract, etc.) must be available.
10
- *
11
- * **Peer dependency:** `@epilogo/unstructured-io-node`
12
- *
13
- * @example
14
- * ```typescript
15
- * import { UnstructuredLocalParser } from "@agentionai/agents/parsers";
16
- *
17
- * const parser = new UnstructuredLocalParser();
18
- * const doc = await parser.parse("/path/to/report.pdf", {
19
- * strategy: "hi_res",
20
- * languages: ["eng"],
21
- * });
22
- * console.log(doc.elements?.length, "elements");
23
- *
24
- * // Use with IngestionPipeline
25
- * await pipeline.ingestFile("/path/to/report.pdf", parser);
26
- * ```
27
- */
28
- export declare class UnstructuredLocalParser extends DocumentParser {
29
- readonly name = "unstructured-local";
30
- /**
31
- * Parse a file using the local Python Unstructured library.
32
- *
33
- * On first call, `ensureEnvironmentSetup()` is invoked to download the
34
- * Python venv if it does not already exist (one-time, slow operation).
35
- *
36
- * @param filePath - Path to the document to parse
37
- * @param options - Strategy, languages, and any other unstructured kwargs
38
- */
39
- parse(filePath: string, options?: ParseOptions): Promise<ParsedDocument>;
40
- private mapRawElements;
41
- }
42
- //# sourceMappingURL=UnstructuredLocalParser.d.ts.map
@@ -1,118 +0,0 @@
1
- "use strict";
2
- var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
- if (k2 === undefined) k2 = k;
4
- var desc = Object.getOwnPropertyDescriptor(m, k);
5
- if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
- desc = { enumerable: true, get: function() { return m[k]; } };
7
- }
8
- Object.defineProperty(o, k2, desc);
9
- }) : (function(o, m, k, k2) {
10
- if (k2 === undefined) k2 = k;
11
- o[k2] = m[k];
12
- }));
13
- var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
- Object.defineProperty(o, "default", { enumerable: true, value: v });
15
- }) : function(o, v) {
16
- o["default"] = v;
17
- });
18
- var __importStar = (this && this.__importStar) || (function () {
19
- var ownKeys = function(o) {
20
- ownKeys = Object.getOwnPropertyNames || function (o) {
21
- var ar = [];
22
- for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
- return ar;
24
- };
25
- return ownKeys(o);
26
- };
27
- return function (mod) {
28
- if (mod && mod.__esModule) return mod;
29
- var result = {};
30
- if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
- __setModuleDefault(result, mod);
32
- return result;
33
- };
34
- })();
35
- Object.defineProperty(exports, "__esModule", { value: true });
36
- exports.UnstructuredLocalParser = void 0;
37
- const DocumentParser_1 = require("./DocumentParser");
38
- /**
39
- * Document parser that uses the **local** (open-source Python) version of
40
- * Unstructured via the `@epilogo/unstructured-io-node` npm bridge.
41
- *
42
- * The bridge spawns a Python virtual environment and calls the Python
43
- * `unstructured` library directly — no API key required, but Python 3.8+
44
- * and system dependencies (poppler, tesseract, etc.) must be available.
45
- *
46
- * **Peer dependency:** `@epilogo/unstructured-io-node`
47
- *
48
- * @example
49
- * ```typescript
50
- * import { UnstructuredLocalParser } from "@agentionai/agents/parsers";
51
- *
52
- * const parser = new UnstructuredLocalParser();
53
- * const doc = await parser.parse("/path/to/report.pdf", {
54
- * strategy: "hi_res",
55
- * languages: ["eng"],
56
- * });
57
- * console.log(doc.elements?.length, "elements");
58
- *
59
- * // Use with IngestionPipeline
60
- * await pipeline.ingestFile("/path/to/report.pdf", parser);
61
- * ```
62
- */
63
- class UnstructuredLocalParser extends DocumentParser_1.DocumentParser {
64
- constructor() {
65
- super(...arguments);
66
- this.name = "unstructured-local";
67
- }
68
- /**
69
- * Parse a file using the local Python Unstructured library.
70
- *
71
- * On first call, `ensureEnvironmentSetup()` is invoked to download the
72
- * Python venv if it does not already exist (one-time, slow operation).
73
- *
74
- * @param filePath - Path to the document to parse
75
- * @param options - Strategy, languages, and any other unstructured kwargs
76
- */
77
- async parse(filePath, options) {
78
- const pkg = "@epilogo/unstructured-io-node";
79
- let UnstructuredIO;
80
- try {
81
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
82
- ({ UnstructuredIO } = await Promise.resolve(`${pkg}`).then(s => __importStar(require(s))));
83
- }
84
- catch {
85
- throw new Error("UnstructuredLocalParser requires '@epilogo/unstructured-io-node'. " +
86
- "Install it with: npm install @epilogo/unstructured-io-node");
87
- }
88
- await UnstructuredIO.ensureEnvironmentSetup();
89
- const { strategy, languages, ...rest } = options ?? {};
90
- const rawElements = await UnstructuredIO.partition({
91
- filename: filePath,
92
- strategy: strategy ?? "auto",
93
- ...(languages ? { languages } : {}),
94
- ...rest,
95
- });
96
- const elements = this.mapRawElements(rawElements);
97
- return {
98
- text: this.elementsToText(elements),
99
- elements,
100
- };
101
- }
102
- mapRawElements(raw) {
103
- return raw.map((el) => {
104
- const e = el;
105
- return {
106
- type: typeof e["type"] === "string" ? e["type"] : "unknown",
107
- text: typeof e["text"] === "string" ? e["text"] : "",
108
- metadata: e["metadata"] != null &&
109
- typeof e["metadata"] === "object" &&
110
- !Array.isArray(e["metadata"])
111
- ? e["metadata"]
112
- : undefined,
113
- };
114
- });
115
- }
116
- }
117
- exports.UnstructuredLocalParser = UnstructuredLocalParser;
118
- //# sourceMappingURL=UnstructuredLocalParser.js.map
@@ -1,3 +0,0 @@
1
- export type { ParsedElement, ParsedDocument, ParseOptions } from "./types";
2
- export { DocumentParser } from "./DocumentParser";
3
- //# sourceMappingURL=index.d.ts.map
@@ -1,6 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.DocumentParser = void 0;
4
- var DocumentParser_1 = require("./DocumentParser");
5
- Object.defineProperty(exports, "DocumentParser", { enumerable: true, get: function () { return DocumentParser_1.DocumentParser; } });
6
- //# sourceMappingURL=index.js.map
@@ -1,50 +0,0 @@
1
- /**
2
- * A single structured element extracted from a document.
3
- * Matches the element format returned by Unstructured and similar parsers.
4
- */
5
- export interface ParsedElement {
6
- /**
7
- * Element type — e.g. "Title", "NarrativeText", "Table", "Image",
8
- * "ListItem", "Header", "Footer", "Document", etc.
9
- */
10
- type: string;
11
- /** Text content of this element */
12
- text: string;
13
- /**
14
- * Parser-provided metadata — e.g. page_number, coordinates, languages,
15
- * file_directory, filename, filetype, etc.
16
- */
17
- metadata?: Record<string, unknown>;
18
- }
19
- /**
20
- * The result of parsing a document file.
21
- */
22
- export interface ParsedDocument {
23
- /** Full plain-text content (elements joined by double newlines) */
24
- text: string;
25
- /**
26
- * Structured elements if the parser supports them.
27
- * Absent when the parser only returns plain text.
28
- */
29
- elements?: ParsedElement[];
30
- /** File-level metadata from the parser, when available */
31
- metadata?: Record<string, unknown>;
32
- }
33
- /**
34
- * Options shared across all document parsers.
35
- */
36
- export interface ParseOptions {
37
- /**
38
- * Parsing strategy.
39
- * - `"auto"`: Let the parser decide (default)
40
- * - `"fast"`: Text extraction only, no OCR
41
- * - `"hi_res"`: High-resolution layout analysis with OCR
42
- * - `"ocr_only"`: Force OCR on every page
43
- */
44
- strategy?: "auto" | "fast" | "hi_res" | "ocr_only";
45
- /** Languages to use for OCR (ISO 639-1 codes, e.g. `["eng", "fra"]`) */
46
- languages?: string[];
47
- /** Pass-through options specific to the underlying parser */
48
- [key: string]: unknown;
49
- }
50
- //# sourceMappingURL=types.d.ts.map
@@ -1,3 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- //# sourceMappingURL=types.js.map