@minded-ai/mindedjs 3.1.14 → 3.1.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +2 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +3 -2
- package/dist/index.js.map +1 -1
- package/dist/internalTools/documentExtraction/documentExtraction.d.ts +28 -80
- package/dist/internalTools/documentExtraction/documentExtraction.d.ts.map +1 -1
- package/dist/internalTools/documentExtraction/documentExtraction.js +39 -105
- package/dist/internalTools/documentExtraction/documentExtraction.js.map +1 -1
- package/dist/internalTools/documentExtraction/parseDocument.d.ts +66 -0
- package/dist/internalTools/documentExtraction/parseDocument.d.ts.map +1 -0
- package/dist/internalTools/documentExtraction/parseDocument.js +92 -0
- package/dist/internalTools/documentExtraction/parseDocument.js.map +1 -0
- package/dist/nodes/addAppToolNode.js +1 -1
- package/dist/nodes/addAppToolNode.js.map +1 -1
- package/dist/nodes/addToolNode.js +1 -1
- package/dist/nodes/addToolNode.js.map +1 -1
- package/dist/nodes/compilePrompt.d.ts +8 -0
- package/dist/nodes/compilePrompt.d.ts.map +1 -1
- package/dist/nodes/compilePrompt.js +56 -15
- package/dist/nodes/compilePrompt.js.map +1 -1
- package/dist/toolsLibrary/parseDocument.d.ts +6 -5
- package/dist/toolsLibrary/parseDocument.d.ts.map +1 -1
- package/dist/toolsLibrary/parseDocument.js +16 -8
- package/dist/toolsLibrary/parseDocument.js.map +1 -1
- package/docs/low-code-editor/nodes.md +101 -1
- package/docs/low-code-editor/tools.md +24 -3
- package/docs/tooling/document-processing.md +103 -19
- package/package.json +4 -2
- package/src/index.ts +2 -1
- package/src/internalTools/documentExtraction/documentExtraction.ts +51 -131
- package/src/internalTools/documentExtraction/parseDocument.ts +107 -0
- package/src/nodes/addAppToolNode.ts +2 -2
- package/src/nodes/addToolNode.ts +2 -2
- package/src/nodes/compilePrompt.ts +56 -16
- package/src/toolsLibrary/parseDocument.ts +17 -8
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Document Processing
|
|
2
2
|
|
|
3
|
-
Parse and extract data from images, PDFs, Word documents, spreadsheets, and more using AI-powered document processing. This tool handles both document parsing
|
|
3
|
+
Parse and extract data from images, PDFs, Word documents, spreadsheets, and more using AI-powered document processing. This tool handles both single and multiple document parsing with optional data extraction.
|
|
4
4
|
|
|
5
5
|
## Overview
|
|
6
6
|
|
|
@@ -11,6 +11,7 @@ Parse and extract data from images, PDFs, Word documents, spreadsheets, and more
|
|
|
11
11
|
1. **Structured Extraction with Schema**: Extract data into a predefined Zod schema using AI
|
|
12
12
|
2. **Structured Extraction with Prompt**: Guide extraction using custom prompts
|
|
13
13
|
3. **Raw Text Extraction**: Parse document and extract plain text without AI processing
|
|
14
|
+
4. **Multiple Document Processing**: Process multiple documents at once - content is concatenated for extraction
|
|
14
15
|
|
|
15
16
|
## Processing Modes
|
|
16
17
|
|
|
@@ -29,7 +30,7 @@ Document processing includes built-in AI extraction - use the node's `prompt` an
|
|
|
29
30
|
|
|
30
31
|
**Available properties:**
|
|
31
32
|
|
|
32
|
-
- `parameters.documentSource` (string, required): URL or file path to
|
|
33
|
+
- `parameters.documentSource` (string or array, required): URL or file path to a single document, or array of URLs/file paths to process multiple documents. When an array is provided, documents are parsed and concatenated with double newlines.
|
|
33
34
|
- `parameters.returnStructuredOutput` (boolean, optional, default: `false`): Set to `true` to enable AI-powered extraction, `false` for raw text only. When `true`, requires either `prompt` or `outputSchema` (or both)
|
|
34
35
|
- `prompt` (string, optional): Instructions for AI-powered extraction. Ignored when `returnStructuredOutput` is `false`
|
|
35
36
|
- `outputSchema` (schema object, optional): Define the structure of extracted data for structured extraction. Ignored when `returnStructuredOutput` is `false`
|
|
@@ -99,15 +100,48 @@ nodes:
|
|
|
99
100
|
prompt: 'Extract all names and addresses from this document'
|
|
100
101
|
```
|
|
101
102
|
|
|
103
|
+
Processing multiple documents with structured extraction:
|
|
104
|
+
|
|
105
|
+
```yaml
|
|
106
|
+
name: Main flow
|
|
107
|
+
nodes:
|
|
108
|
+
- name: 'parse-multiple-invoices'
|
|
109
|
+
type: appTool
|
|
110
|
+
displayName: 'Parse Multiple Invoices'
|
|
111
|
+
actionKey: 'minded-parse-documents'
|
|
112
|
+
actionName: 'Parse Document'
|
|
113
|
+
appName: 'Minded'
|
|
114
|
+
parameters:
|
|
115
|
+
documentSource: '{state.memory.invoiceUrls}'
|
|
116
|
+
# Also possible to pass an array as JSON with different items, including items from other arrays:
|
|
117
|
+
# documentSource: '["{state.memory.invoiceFilePath}", "{state.memory.invoiceUrls[0]}"]
|
|
118
|
+
returnStructuredOutput: true
|
|
119
|
+
prompt: 'Extract all invoice data from the provided documents'
|
|
120
|
+
outputSchema:
|
|
121
|
+
- name: invoices
|
|
122
|
+
type: array
|
|
123
|
+
description: Array of invoice data
|
|
124
|
+
items:
|
|
125
|
+
- name: invoiceNumber
|
|
126
|
+
type: string
|
|
127
|
+
description: Invoice number
|
|
128
|
+
- name: amount
|
|
129
|
+
type: number
|
|
130
|
+
description: Total amount
|
|
131
|
+
- name: date
|
|
132
|
+
type: string
|
|
133
|
+
description: Invoice date
|
|
134
|
+
```
|
|
135
|
+
|
|
102
136
|
## Programmatic Usage
|
|
103
137
|
|
|
104
138
|
The SDK provides three main functions for document processing:
|
|
105
139
|
|
|
106
|
-
1. **`parseDocumentAndExtractStructuredData`** - Parse and optionally extract structured data with AI
|
|
140
|
+
1. **`parseDocumentAndExtractStructuredData`** - Parse one or more documents and optionally extract structured data with AI
|
|
107
141
|
|
|
108
142
|
```typescript
|
|
109
|
-
parseDocumentAndExtractStructuredData<T>({
|
|
110
|
-
|
|
143
|
+
type parseDocumentAndExtractStructuredData = <T>(options: {
|
|
144
|
+
documentSources: string[], // Required: Array of URLs/file paths. The results of multiple documents are concatenated.
|
|
111
145
|
sessionId: string, // Required: Session identifier
|
|
112
146
|
returnStructuredOutput: boolean, // Required: Enable/disable AI extraction
|
|
113
147
|
llm?: BaseLanguageModel, // Optional: LLM instance (required when returnStructuredOutput is true)
|
|
@@ -115,22 +149,21 @@ The SDK provides three main functions for document processing:
|
|
|
115
149
|
outputSchemaPrompt?: string, // Optional: Instructions for extraction
|
|
116
150
|
processingMode?: DocumentProcessingMode, // Optional: Processing mode (default: DocumentProcessingMode.MANAGED)
|
|
117
151
|
llamaCloudApiKey?: string // Optional: API key for local mode
|
|
118
|
-
})
|
|
119
|
-
rawContent?: string,
|
|
120
|
-
structuredContent?: T | string
|
|
121
|
-
metadata?: { fileSize?: number, fileType: string, processingTime: number, contentLength: number }
|
|
152
|
+
}) => Promise<{
|
|
153
|
+
rawContent?: string, // Concatenated content when multiple documents provided
|
|
154
|
+
structuredContent?: T | string // Extracted from concatenated content when multiple documents provided
|
|
122
155
|
}>
|
|
123
156
|
```
|
|
124
157
|
|
|
125
158
|
2. **`parseDocument`** - Parse document and extract raw text only
|
|
126
159
|
|
|
127
160
|
```typescript
|
|
128
|
-
parseDocument({
|
|
161
|
+
type parseDocument = (options: {
|
|
129
162
|
documentSource: string, // Required: URL or file path
|
|
130
163
|
sessionId: string, // Required: Session identifier
|
|
131
164
|
processingMode?: DocumentProcessingMode, // Optional: Processing mode (default: DocumentProcessingMode.MANAGED)
|
|
132
165
|
llamaCloudApiKey?: string // Optional: API key for local mode
|
|
133
|
-
})
|
|
166
|
+
}) => Promise<{
|
|
134
167
|
rawContent?: string,
|
|
135
168
|
metadata?: { fileSize?: number, fileType: string, processingTime: number, contentLength: number }
|
|
136
169
|
}>
|
|
@@ -139,13 +172,13 @@ The SDK provides three main functions for document processing:
|
|
|
139
172
|
3. **`extractStructuredDataFromString`** - Extract structured data from already parsed text
|
|
140
173
|
|
|
141
174
|
```typescript
|
|
142
|
-
extractStructuredDataFromString<T>({
|
|
175
|
+
type extractStructuredDataFromString = <T>(options: {
|
|
143
176
|
content: string, // Required: Text content to extract from
|
|
144
177
|
llm: BaseLanguageModel, // Required: LLM instance
|
|
145
178
|
sessionId: string, // Required: Session identifier
|
|
146
179
|
schema?: ZodType<T>, // Optional: Zod schema for structured extraction
|
|
147
180
|
prompt?: string // Optional: Instructions for extraction
|
|
148
|
-
})
|
|
181
|
+
}) => Promise<T | string>
|
|
149
182
|
```
|
|
150
183
|
|
|
151
184
|
### Structured Extraction with Schema
|
|
@@ -176,7 +209,7 @@ const invoiceSchema = z.object({
|
|
|
176
209
|
});
|
|
177
210
|
|
|
178
211
|
const result = await parseDocumentAndExtractStructuredData({
|
|
179
|
-
|
|
212
|
+
documentSources: ['./invoice.pdf'],
|
|
180
213
|
sessionId: state.sessionId,
|
|
181
214
|
returnStructuredOutput: true,
|
|
182
215
|
llm: agent.llm,
|
|
@@ -186,7 +219,6 @@ const result = await parseDocumentAndExtractStructuredData({
|
|
|
186
219
|
|
|
187
220
|
console.log(result.structuredContent); // Typed data matching your schema
|
|
188
221
|
console.log(result.rawContent); // Original raw text
|
|
189
|
-
console.log(result.metadata); // Processing metadata
|
|
190
222
|
```
|
|
191
223
|
|
|
192
224
|
### Structured Extraction with Prompt Only
|
|
@@ -198,7 +230,7 @@ import { parseDocumentAndExtractStructuredData } from '@minded-ai/mindedjs';
|
|
|
198
230
|
|
|
199
231
|
// Contract analysis with prompt guidance
|
|
200
232
|
const result = await parseDocumentAndExtractStructuredData({
|
|
201
|
-
|
|
233
|
+
documentSources: ['./contract.pdf'],
|
|
202
234
|
sessionId: state.sessionId,
|
|
203
235
|
returnStructuredOutput: true,
|
|
204
236
|
llm: agent.llm,
|
|
@@ -226,14 +258,14 @@ console.log(result.metadata); // File size, type, processing time, content lengt
|
|
|
226
258
|
|
|
227
259
|
### Using URLs
|
|
228
260
|
|
|
229
|
-
All functions accept URLs in addition to file paths via the `documentSource`
|
|
261
|
+
All functions accept URLs in addition to file paths via the `documentSource` and `documentSources` parameters.
|
|
230
262
|
|
|
231
263
|
```typescript
|
|
232
264
|
import { parseDocumentAndExtractStructuredData } from '@minded-ai/mindedjs';
|
|
233
265
|
import { z } from 'zod';
|
|
234
266
|
|
|
235
267
|
const result = await parseDocumentAndExtractStructuredData({
|
|
236
|
-
|
|
268
|
+
documentSources: ['https://example.com/invoice.pdf'],
|
|
237
269
|
sessionId: state.sessionId,
|
|
238
270
|
returnStructuredOutput: true,
|
|
239
271
|
llm: agent.llm,
|
|
@@ -245,6 +277,58 @@ const result = await parseDocumentAndExtractStructuredData({
|
|
|
245
277
|
});
|
|
246
278
|
```
|
|
247
279
|
|
|
280
|
+
### Processing Multiple Documents
|
|
281
|
+
|
|
282
|
+
Process multiple documents by providing an array of URLs or file paths. Documents are parsed in parallel and their content is concatenated with double newlines before optional structured extraction.
|
|
283
|
+
|
|
284
|
+
```typescript
|
|
285
|
+
import { parseDocumentAndExtractStructuredData } from '@minded-ai/mindedjs';
|
|
286
|
+
import { z } from 'zod';
|
|
287
|
+
|
|
288
|
+
// Extract data from multiple invoices into an array
|
|
289
|
+
const invoiceSchema = z.array(
|
|
290
|
+
z.object({
|
|
291
|
+
invoiceNumber: z.string(),
|
|
292
|
+
vendor: z.string(),
|
|
293
|
+
amount: z.number(),
|
|
294
|
+
date: z.string(),
|
|
295
|
+
})
|
|
296
|
+
);
|
|
297
|
+
|
|
298
|
+
const result = await parseDocumentAndExtractStructuredData({
|
|
299
|
+
documentSources: [
|
|
300
|
+
'./invoice1.pdf',
|
|
301
|
+
'./invoice2.pdf',
|
|
302
|
+
'./invoice3.pdf',
|
|
303
|
+
],
|
|
304
|
+
sessionId: state.sessionId,
|
|
305
|
+
returnStructuredOutput: true,
|
|
306
|
+
llm: agent.llm,
|
|
307
|
+
outputSchema: invoiceSchema,
|
|
308
|
+
outputSchemaPrompt: 'Extract invoice data from all provided invoices',
|
|
309
|
+
});
|
|
310
|
+
|
|
311
|
+
console.log(result.structuredContent); // Array of invoice data
|
|
312
|
+
console.log(result.rawContent); // Concatenated text from all invoices
|
|
313
|
+
```
|
|
314
|
+
|
|
315
|
+
You can also extract raw text from multiple documents without structured extraction:
|
|
316
|
+
|
|
317
|
+
```typescript
|
|
318
|
+
import { parseDocumentAndExtractStructuredData } from '@minded-ai/mindedjs';
|
|
319
|
+
|
|
320
|
+
const result = await parseDocumentAndExtractStructuredData({
|
|
321
|
+
documentSources: [
|
|
322
|
+
'https://example.com/doc1.pdf',
|
|
323
|
+
'https://example.com/doc2.pdf',
|
|
324
|
+
],
|
|
325
|
+
sessionId: state.sessionId,
|
|
326
|
+
returnStructuredOutput: false,
|
|
327
|
+
});
|
|
328
|
+
|
|
329
|
+
console.log(result.rawContent); // Concatenated text from both documents
|
|
330
|
+
```
|
|
331
|
+
|
|
248
332
|
### Identity Document Verification
|
|
249
333
|
|
|
250
334
|
Extract personal information from ID documents with structured validation.
|
|
@@ -263,7 +347,7 @@ const idSchema = z.object({
|
|
|
263
347
|
});
|
|
264
348
|
|
|
265
349
|
const result = await parseDocumentAndExtractStructuredData({
|
|
266
|
-
|
|
350
|
+
documentSources: ['./id-card.jpg'],
|
|
267
351
|
sessionId: state.sessionId,
|
|
268
352
|
returnStructuredOutput: true,
|
|
269
353
|
llm: agent.llm,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@minded-ai/mindedjs",
|
|
3
|
-
"version": "3.1.
|
|
3
|
+
"version": "3.1.15",
|
|
4
4
|
"description": "MindedJS is a TypeScript library for building agents.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"types": "dist/index.d.ts",
|
|
@@ -33,6 +33,7 @@
|
|
|
33
33
|
"@types/aws-lambda": "^8.10.150",
|
|
34
34
|
"@types/chai": "^4.3.11",
|
|
35
35
|
"@types/ejs": "^3.1.5",
|
|
36
|
+
"@types/lodash": "^4.17.21",
|
|
36
37
|
"@types/mocha": "^10.0.6",
|
|
37
38
|
"@types/node": "^20.11.19",
|
|
38
39
|
"@types/sinon": "^17.0.4",
|
|
@@ -63,6 +64,7 @@
|
|
|
63
64
|
"flatted": "^3.3.3",
|
|
64
65
|
"js-yaml": "^4.1.0",
|
|
65
66
|
"langchain": "^0.3.25",
|
|
67
|
+
"lodash": "^4.17.21",
|
|
66
68
|
"pino": "^9.7.0",
|
|
67
69
|
"pino-pretty": "^13.0.0",
|
|
68
70
|
"socket.io-client": "^4.8.1",
|
|
@@ -74,4 +76,4 @@
|
|
|
74
76
|
"peerDependencies": {
|
|
75
77
|
"playwright": "^1.55.0"
|
|
76
78
|
}
|
|
77
|
-
}
|
|
79
|
+
}
|
package/src/index.ts
CHANGED
|
@@ -71,7 +71,8 @@ export type { Tool, ToolExecuteInput } from './types/Tools.types';
|
|
|
71
71
|
export { ToolType, ProxyType } from './types/Tools.types';
|
|
72
72
|
|
|
73
73
|
// Document processing utilities
|
|
74
|
-
export { parseDocumentAndExtractStructuredData
|
|
74
|
+
export { parseDocumentAndExtractStructuredData } from './internalTools/documentExtraction/documentExtraction';
|
|
75
|
+
export { parseDocument, DocumentProcessingMode } from './internalTools/documentExtraction/parseDocument';
|
|
75
76
|
export { extractStructuredDataFromString } from './internalTools/documentExtraction/extractStructuredData';
|
|
76
77
|
export type {
|
|
77
78
|
DocumentProcessorConfig,
|
|
@@ -1,31 +1,20 @@
|
|
|
1
1
|
import { ZodType } from 'zod';
|
|
2
2
|
import { BaseLanguageModel } from '@langchain/core/language_models/base';
|
|
3
3
|
import { extractStructuredDataFromString } from './extractStructuredData';
|
|
4
|
-
import {
|
|
5
|
-
import { parseDocumentWithManagedService } from './parseDocumentManaged';
|
|
6
|
-
import { parseDocumentWithLocalService } from './parseDocumentLocal';
|
|
7
|
-
|
|
8
|
-
/**
|
|
9
|
-
* Document processing mode
|
|
10
|
-
*/
|
|
11
|
-
export enum DocumentProcessingMode {
|
|
12
|
-
/** Process documents using Minded cloud service (default) */
|
|
13
|
-
MANAGED = 'managed',
|
|
14
|
-
/** Process documents locally using LlamaCloud */
|
|
15
|
-
LOCAL = 'local',
|
|
16
|
-
}
|
|
4
|
+
import { DocumentProcessingMode, parseDocument } from './parseDocument';
|
|
17
5
|
|
|
18
6
|
/**
|
|
19
7
|
* Parse document and extract structured data using AI.
|
|
20
8
|
*
|
|
21
|
-
* This function provides a flexible way to process documents with optional AI-powered extraction:
|
|
22
|
-
* - Raw text extraction: Parse document without LLM processing
|
|
9
|
+
* This function provides a flexible way to process one or more documents with optional AI-powered extraction:
|
|
10
|
+
* - Raw text extraction: Parse document(s) without LLM processing
|
|
23
11
|
* - Structured extraction with schema: Extract data matching a Zod schema
|
|
24
12
|
* - Structured extraction with prompt: Guide extraction using custom prompts
|
|
13
|
+
* - Multiple document support: Process multiple documents by passing an array - their content will be concatenated
|
|
25
14
|
* - Processing modes: Use DocumentProcessingMode.LOCAL (requires LlamaCloud API key) or DocumentProcessingMode.MANAGED (backend service)
|
|
26
15
|
*
|
|
27
16
|
* @param options - Document processing options
|
|
28
|
-
* @param options.
|
|
17
|
+
* @param options.documentSources - Array of URLs/file paths. When multiple documents are provided, they are parsed and concatenated with double newlines before optional structured extraction.
|
|
29
18
|
* @param options.processingMode - Document parsing mode: DocumentProcessingMode.MANAGED (default, backend service) or DocumentProcessingMode.LOCAL (requires llamaCloudApiKey)
|
|
30
19
|
* @param options.sessionId - Unique session identifier for logging and tracking
|
|
31
20
|
* @param options.llamaCloudApiKey - LlamaCloud API key for local processing. Required when processingMode is DocumentProcessingMode.LOCAL. Can be provided as parameter or via LLAMA_CLOUD_API_KEY environment variable
|
|
@@ -35,11 +24,10 @@ export enum DocumentProcessingMode {
|
|
|
35
24
|
* @param options.outputSchemaPrompt - Optional prompt to guide the llm how to extract the data
|
|
36
25
|
*
|
|
37
26
|
* @returns Promise resolving to an object containing:
|
|
38
|
-
* - rawContent: The raw extracted text from the document
|
|
39
|
-
* - structuredContent: AI-extracted structured data (if returnStructuredOutput is true)
|
|
40
|
-
* - metadata: Document metadata from processing
|
|
27
|
+
* - rawContent: The raw extracted text from the document. When multiple documents are provided, contains their concatenated content separated by double newlines.
|
|
28
|
+
* - structuredContent: AI-extracted structured data (only present if returnStructuredOutput is true). When multiple documents are provided, extracts from their concatenated content.
|
|
41
29
|
*
|
|
42
|
-
* @throws {Error} If
|
|
30
|
+
* @throws {Error} If documentSources is not provided
|
|
43
31
|
* @throws {Error} If returnStructuredOutput is true but llm is not provided
|
|
44
32
|
* @throws {Error} If document parsing or extraction fails
|
|
45
33
|
*
|
|
@@ -48,9 +36,9 @@ export enum DocumentProcessingMode {
|
|
|
48
36
|
* import { parseDocumentAndExtractStructuredData, DocumentProcessingMode } from '@minded-ai/mindedjs';
|
|
49
37
|
* import { z } from 'zod';
|
|
50
38
|
*
|
|
51
|
-
* // Parse document and extract structured data using a schema
|
|
39
|
+
* // Parse single document and extract structured data using a schema
|
|
52
40
|
* const result1 = await parseDocumentAndExtractStructuredData({
|
|
53
|
-
*
|
|
41
|
+
* documentSources: ['./invoice.pdf'],
|
|
54
42
|
* processingMode: DocumentProcessingMode.MANAGED,
|
|
55
43
|
* sessionId: state.sessionId,
|
|
56
44
|
* returnStructuredOutput: true,
|
|
@@ -62,9 +50,23 @@ export enum DocumentProcessingMode {
|
|
|
62
50
|
* }),
|
|
63
51
|
* });
|
|
64
52
|
*
|
|
65
|
-
* // Parse
|
|
53
|
+
* // Parse multiple documents and extract structured data from their combined content
|
|
54
|
+
* const result2 = await parseDocumentAndExtractStructuredData({
|
|
55
|
+
* documentSources: ['./invoice1.pdf', './invoice2.pdf', './invoice3.pdf'],
|
|
56
|
+
* processingMode: DocumentProcessingMode.MANAGED,
|
|
57
|
+
* sessionId: state.sessionId,
|
|
58
|
+
* returnStructuredOutput: true,
|
|
59
|
+
* llm: agent.llm,
|
|
60
|
+
* outputSchema: z.array(z.object({
|
|
61
|
+
* invoiceNumber: z.string(),
|
|
62
|
+
* totalAmount: z.number(),
|
|
63
|
+
* date: z.string(),
|
|
64
|
+
* })),
|
|
65
|
+
* });
|
|
66
|
+
*
|
|
67
|
+
* // Parse document without structured extraction
|
|
66
68
|
* const result3 = await parseDocumentAndExtractStructuredData({
|
|
67
|
-
*
|
|
69
|
+
* documentSources: ['./document.pdf'],
|
|
68
70
|
* processingMode: DocumentProcessingMode.MANAGED,
|
|
69
71
|
* sessionId: state.sessionId,
|
|
70
72
|
* returnStructuredOutput: false,
|
|
@@ -72,7 +74,7 @@ export enum DocumentProcessingMode {
|
|
|
72
74
|
* ```
|
|
73
75
|
*/
|
|
74
76
|
export async function parseDocumentAndExtractStructuredData<T extends Record<string, any>>({
|
|
75
|
-
|
|
77
|
+
documentSources,
|
|
76
78
|
processingMode,
|
|
77
79
|
sessionId,
|
|
78
80
|
llamaCloudApiKey,
|
|
@@ -81,7 +83,7 @@ export async function parseDocumentAndExtractStructuredData<T extends Record<str
|
|
|
81
83
|
outputSchema,
|
|
82
84
|
outputSchemaPrompt,
|
|
83
85
|
}: {
|
|
84
|
-
|
|
86
|
+
documentSources: string[];
|
|
85
87
|
processingMode?: DocumentProcessingMode;
|
|
86
88
|
sessionId: string;
|
|
87
89
|
llamaCloudApiKey?: string;
|
|
@@ -92,15 +94,28 @@ export async function parseDocumentAndExtractStructuredData<T extends Record<str
|
|
|
92
94
|
}): Promise<{
|
|
93
95
|
rawContent?: string;
|
|
94
96
|
structuredContent?: T | string;
|
|
95
|
-
metadata?: DocumentProcessResponse['metadata'];
|
|
96
97
|
}> {
|
|
97
|
-
// Parse document
|
|
98
|
-
const
|
|
99
|
-
documentSource
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
98
|
+
// Parse each document and concatenate results
|
|
99
|
+
const results = await Promise.all(
|
|
100
|
+
documentSources.map((documentSource) =>
|
|
101
|
+
parseDocument({
|
|
102
|
+
documentSource,
|
|
103
|
+
processingMode,
|
|
104
|
+
sessionId,
|
|
105
|
+
llamaCloudApiKey,
|
|
106
|
+
}),
|
|
107
|
+
),
|
|
108
|
+
);
|
|
109
|
+
|
|
110
|
+
// Concatenate the raw content of the parsed documents with two newlines
|
|
111
|
+
const concatenatedContent = results
|
|
112
|
+
.map((r) => r.rawContent)
|
|
113
|
+
.filter(Boolean)
|
|
114
|
+
.join('\n\n');
|
|
115
|
+
|
|
116
|
+
const result = {
|
|
117
|
+
rawContent: concatenatedContent,
|
|
118
|
+
};
|
|
104
119
|
|
|
105
120
|
if (!returnStructuredOutput || !result.rawContent) {
|
|
106
121
|
return result;
|
|
@@ -112,7 +127,7 @@ export async function parseDocumentAndExtractStructuredData<T extends Record<str
|
|
|
112
127
|
|
|
113
128
|
// Extract structured data from the parsed document
|
|
114
129
|
const structuredContent = await extractStructuredDataFromString<T>({
|
|
115
|
-
content: result.rawContent
|
|
130
|
+
content: result.rawContent!,
|
|
116
131
|
llm,
|
|
117
132
|
schema: outputSchema,
|
|
118
133
|
prompt: outputSchemaPrompt,
|
|
@@ -120,102 +135,7 @@ export async function parseDocumentAndExtractStructuredData<T extends Record<str
|
|
|
120
135
|
});
|
|
121
136
|
|
|
122
137
|
return {
|
|
123
|
-
|
|
138
|
+
rawContent: result.rawContent,
|
|
124
139
|
structuredContent,
|
|
125
140
|
};
|
|
126
141
|
}
|
|
127
|
-
|
|
128
|
-
/**
|
|
129
|
-
* Parse document and extract raw text content.
|
|
130
|
-
*
|
|
131
|
-
* This function processes various document types (PDFs, images, Word docs, etc.) and extracts
|
|
132
|
-
* raw text content using either local processing with LlamaCloud or managed backend service.
|
|
133
|
-
* Use this for raw text extraction without AI-powered data extraction.
|
|
134
|
-
*
|
|
135
|
-
* @param options - Document parsing options
|
|
136
|
-
* @param options.documentSource - URL or file path to the document
|
|
137
|
-
* @param options.processingMode - Parsing mode: DocumentProcessingMode.LOCAL (requires llamaCloudApiKey) or DocumentProcessingMode.MANAGED (backend service, default)
|
|
138
|
-
* @param options.sessionId - Unique session identifier for logging and tracking
|
|
139
|
-
* @param options.llamaCloudApiKey - LlamaCloud API key for local processing. Required when processingMode is DocumentProcessingMode.LOCAL. Can be provided as parameter or via LLAMA_CLOUD_API_KEY environment variable
|
|
140
|
-
*
|
|
141
|
-
* @returns Promise resolving to an object containing:
|
|
142
|
-
* - rawContent: The raw extracted text from the document
|
|
143
|
-
* - metadata: Document processing metadata (file size, type, processing time, content length)
|
|
144
|
-
*
|
|
145
|
-
* @throws {Error} If documentSource is not provided
|
|
146
|
-
* @throws {Error} If document processing fails
|
|
147
|
-
*
|
|
148
|
-
* @example
|
|
149
|
-
* ```typescript
|
|
150
|
-
* import { parseDocument, DocumentProcessingMode } from '@minded-ai/mindedjs';
|
|
151
|
-
*
|
|
152
|
-
* // Parse document using managed service
|
|
153
|
-
* const result1 = await parseDocument({
|
|
154
|
-
* documentSource: 'https://example.com/invoice.pdf',
|
|
155
|
-
* processingMode: DocumentProcessingMode.MANAGED,
|
|
156
|
-
* sessionId: state.sessionId,
|
|
157
|
-
* });
|
|
158
|
-
* // result1: { rawContent: "Invoice text...", metadata: {...} }
|
|
159
|
-
*
|
|
160
|
-
* // Parse local document using LlamaCloud
|
|
161
|
-
* const result2 = await parseDocument({
|
|
162
|
-
* documentSource: './contract.pdf',
|
|
163
|
-
* processingMode: DocumentProcessingMode.LOCAL,
|
|
164
|
-
* sessionId: state.sessionId,
|
|
165
|
-
* llamaCloudApiKey: process.env.LLAMA_CLOUD_API_KEY,
|
|
166
|
-
* });
|
|
167
|
-
* // result2: { rawContent: "Contract text...", metadata: {...} }
|
|
168
|
-
* ```
|
|
169
|
-
*/
|
|
170
|
-
export async function parseDocument({
|
|
171
|
-
documentSource,
|
|
172
|
-
processingMode = DocumentProcessingMode.MANAGED,
|
|
173
|
-
sessionId,
|
|
174
|
-
llamaCloudApiKey,
|
|
175
|
-
}: {
|
|
176
|
-
documentSource: string;
|
|
177
|
-
processingMode?: DocumentProcessingMode;
|
|
178
|
-
sessionId: string;
|
|
179
|
-
llamaCloudApiKey?: string;
|
|
180
|
-
}): Promise<{
|
|
181
|
-
rawContent?: string;
|
|
182
|
-
metadata?: {
|
|
183
|
-
fileSize?: number;
|
|
184
|
-
fileType: string;
|
|
185
|
-
processingTime: number;
|
|
186
|
-
contentLength: number;
|
|
187
|
-
};
|
|
188
|
-
}> {
|
|
189
|
-
if (!documentSource) {
|
|
190
|
-
throw new Error('documentSource is required - provide a URL or file path');
|
|
191
|
-
}
|
|
192
|
-
|
|
193
|
-
const isDocumentUrl = isUrl(documentSource);
|
|
194
|
-
|
|
195
|
-
if (processingMode === DocumentProcessingMode.MANAGED) {
|
|
196
|
-
return parseDocumentWithManagedService({
|
|
197
|
-
documentSource,
|
|
198
|
-
isDocumentUrl,
|
|
199
|
-
sessionId,
|
|
200
|
-
});
|
|
201
|
-
} else {
|
|
202
|
-
return parseDocumentWithLocalService({
|
|
203
|
-
documentSource,
|
|
204
|
-
isDocumentUrl,
|
|
205
|
-
sessionId,
|
|
206
|
-
llamaCloudApiKey: llamaCloudApiKey ?? process.env.LLAMA_CLOUD_API_KEY,
|
|
207
|
-
});
|
|
208
|
-
}
|
|
209
|
-
}
|
|
210
|
-
|
|
211
|
-
/**
|
|
212
|
-
* Check if a string is a URL
|
|
213
|
-
*/
|
|
214
|
-
function isUrl(source: string): boolean {
|
|
215
|
-
try {
|
|
216
|
-
const url = new URL(source);
|
|
217
|
-
return url.protocol === 'http:' || url.protocol === 'https:';
|
|
218
|
-
} catch {
|
|
219
|
-
return false;
|
|
220
|
-
}
|
|
221
|
-
}
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import { parseDocumentWithManagedService } from './parseDocumentManaged';
|
|
2
|
+
import { parseDocumentWithLocalService } from './parseDocumentLocal';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Document processing mode
|
|
6
|
+
*/
|
|
7
|
+
export enum DocumentProcessingMode {
|
|
8
|
+
/** Process documents using Minded cloud service (default) */
|
|
9
|
+
MANAGED = 'managed',
|
|
10
|
+
/** Process documents locally using LlamaCloud */
|
|
11
|
+
LOCAL = 'local',
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Parse document and extract raw text content.
|
|
16
|
+
*
|
|
17
|
+
* This function processes various document types (PDFs, images, Word docs, etc.) and extracts
|
|
18
|
+
* raw text content using either local processing with LlamaCloud or managed backend service.
|
|
19
|
+
* Use this for raw text extraction without AI-powered data extraction.
|
|
20
|
+
*
|
|
21
|
+
* @param options - Document parsing options
|
|
22
|
+
* @param options.documentSource - URL or file path to the document
|
|
23
|
+
* @param options.processingMode - Parsing mode: DocumentProcessingMode.LOCAL (requires llamaCloudApiKey) or DocumentProcessingMode.MANAGED (backend service, default)
|
|
24
|
+
* @param options.sessionId - Unique session identifier for logging and tracking
|
|
25
|
+
* @param options.llamaCloudApiKey - LlamaCloud API key for local processing. Required when processingMode is DocumentProcessingMode.LOCAL. Can be provided as parameter or via LLAMA_CLOUD_API_KEY environment variable
|
|
26
|
+
*
|
|
27
|
+
* @returns Promise resolving to an object containing:
|
|
28
|
+
* - rawContent: The raw extracted text from the document
|
|
29
|
+
* - metadata: Document processing metadata (file size, type, processing time, content length)
|
|
30
|
+
*
|
|
31
|
+
* @throws {Error} If documentSource is not provided
|
|
32
|
+
* @throws {Error} If document processing fails
|
|
33
|
+
*
|
|
34
|
+
* @example
|
|
35
|
+
* ```typescript
|
|
36
|
+
* import { parseDocument, DocumentProcessingMode } from '@minded-ai/mindedjs';
|
|
37
|
+
*
|
|
38
|
+
* // Parse document using managed service
|
|
39
|
+
* const result1 = await parseDocument({
|
|
40
|
+
* documentSource: 'https://example.com/invoice.pdf',
|
|
41
|
+
* processingMode: DocumentProcessingMode.MANAGED,
|
|
42
|
+
* sessionId: state.sessionId,
|
|
43
|
+
* });
|
|
44
|
+
* // result1: { rawContent: "Invoice text...", metadata: {...} }
|
|
45
|
+
*
|
|
46
|
+
* // Parse local document using LlamaCloud
|
|
47
|
+
* const result2 = await parseDocument({
|
|
48
|
+
* documentSource: './contract.pdf',
|
|
49
|
+
* processingMode: DocumentProcessingMode.LOCAL,
|
|
50
|
+
* sessionId: state.sessionId,
|
|
51
|
+
* llamaCloudApiKey: process.env.LLAMA_CLOUD_API_KEY,
|
|
52
|
+
* });
|
|
53
|
+
* // result2: { rawContent: "Contract text...", metadata: {...} }
|
|
54
|
+
* ```
|
|
55
|
+
*/
|
|
56
|
+
export async function parseDocument({
|
|
57
|
+
documentSource,
|
|
58
|
+
processingMode = DocumentProcessingMode.MANAGED,
|
|
59
|
+
sessionId,
|
|
60
|
+
llamaCloudApiKey,
|
|
61
|
+
}: {
|
|
62
|
+
documentSource: string;
|
|
63
|
+
processingMode?: DocumentProcessingMode;
|
|
64
|
+
sessionId: string;
|
|
65
|
+
llamaCloudApiKey?: string;
|
|
66
|
+
}): Promise<{
|
|
67
|
+
rawContent?: string;
|
|
68
|
+
metadata?: {
|
|
69
|
+
fileSize?: number;
|
|
70
|
+
fileType: string;
|
|
71
|
+
processingTime: number;
|
|
72
|
+
contentLength: number;
|
|
73
|
+
};
|
|
74
|
+
}> {
|
|
75
|
+
if (!documentSource) {
|
|
76
|
+
throw new Error('documentSource is required - provide a URL or file path');
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
const isDocumentUrl = isUrl(documentSource);
|
|
80
|
+
|
|
81
|
+
if (processingMode === DocumentProcessingMode.MANAGED) {
|
|
82
|
+
return parseDocumentWithManagedService({
|
|
83
|
+
documentSource,
|
|
84
|
+
isDocumentUrl,
|
|
85
|
+
sessionId,
|
|
86
|
+
});
|
|
87
|
+
} else {
|
|
88
|
+
return parseDocumentWithLocalService({
|
|
89
|
+
documentSource,
|
|
90
|
+
isDocumentUrl,
|
|
91
|
+
sessionId,
|
|
92
|
+
llamaCloudApiKey: llamaCloudApiKey ?? process.env.LLAMA_CLOUD_API_KEY,
|
|
93
|
+
});
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Check if a string is a URL
|
|
99
|
+
*/
|
|
100
|
+
function isUrl(source: string): boolean {
|
|
101
|
+
try {
|
|
102
|
+
const url = new URL(source);
|
|
103
|
+
return url.protocol === 'http:' || url.protocol === 'https:';
|
|
104
|
+
} catch {
|
|
105
|
+
return false;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
@@ -11,7 +11,7 @@ import { logger } from '../utils/logger';
|
|
|
11
11
|
import { createHistoryStep } from '../utils/history';
|
|
12
12
|
import { DefaultTool, Tool } from '../types/Tools.types';
|
|
13
13
|
import { combinePlaybooks } from '../playbooks/playbooks';
|
|
14
|
-
import { compilePrompt } from './compilePrompt';
|
|
14
|
+
import { compileParameter, compilePrompt } from './compilePrompt';
|
|
15
15
|
import { ToolMessage } from '@langchain/core/messages';
|
|
16
16
|
import { AnalyticsEventName } from '../types/Analytics.types';
|
|
17
17
|
import { trackAnalyticsEvent } from '../internalTools/analytics';
|
|
@@ -57,7 +57,7 @@ export const addAppToolNode = async ({
|
|
|
57
57
|
if (value !== '') {
|
|
58
58
|
// If the value is a string, compile it to allow variable injection
|
|
59
59
|
if (typeof value === 'string') {
|
|
60
|
-
compiledParameters[key] =
|
|
60
|
+
compiledParameters[key] = compileParameter(value, compileContext);
|
|
61
61
|
} else {
|
|
62
62
|
compiledParameters[key] = value;
|
|
63
63
|
}
|
package/src/nodes/addToolNode.ts
CHANGED
|
@@ -10,7 +10,7 @@ import { Agent } from '../agent';
|
|
|
10
10
|
import { createHistoryStep } from '../utils/history';
|
|
11
11
|
import { HistoryStep } from '../types/Agent.types';
|
|
12
12
|
import { combinePlaybooks } from '../playbooks/playbooks';
|
|
13
|
-
import { compilePrompt } from './compilePrompt';
|
|
13
|
+
import { compileParameter, compilePrompt } from './compilePrompt';
|
|
14
14
|
import { AnalyticsEventName } from '../types/Analytics.types';
|
|
15
15
|
import { trackAnalyticsEvent } from '../internalTools/analytics';
|
|
16
16
|
import { z } from 'zod';
|
|
@@ -58,7 +58,7 @@ export const addToolNode = async ({
|
|
|
58
58
|
if (value !== '') {
|
|
59
59
|
// If the value is a string, compile it to allow variable injection
|
|
60
60
|
if (typeof value === 'string') {
|
|
61
|
-
compiledParameters[key] =
|
|
61
|
+
compiledParameters[key] = compileParameter(value, compileContext);
|
|
62
62
|
} else {
|
|
63
63
|
compiledParameters[key] = value;
|
|
64
64
|
}
|