@minded-ai/mindedjs 3.0.8-beta.12 → 3.1.9-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/index.js +2 -9
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/runCommand.d.ts +1 -1
- package/dist/cli/runCommand.d.ts.map +1 -1
- package/dist/cli/runCommand.js +31 -23
- package/dist/cli/runCommand.js.map +1 -1
- package/dist/index.d.ts +2 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +6 -3
- package/dist/index.js.map +1 -1
- package/dist/internalTools/documentExtraction/documentExtraction.d.ts +112 -102
- package/dist/internalTools/documentExtraction/documentExtraction.d.ts.map +1 -1
- package/dist/internalTools/documentExtraction/documentExtraction.js +146 -705
- package/dist/internalTools/documentExtraction/documentExtraction.js.map +1 -1
- package/dist/internalTools/documentExtraction/extractStructuredData.d.ts +57 -0
- package/dist/internalTools/documentExtraction/extractStructuredData.d.ts.map +1 -0
- package/dist/internalTools/documentExtraction/extractStructuredData.js +121 -0
- package/dist/internalTools/documentExtraction/extractStructuredData.js.map +1 -0
- package/dist/internalTools/documentExtraction/parseDocumentLocal.d.ts +16 -0
- package/dist/internalTools/documentExtraction/parseDocumentLocal.d.ts.map +1 -0
- package/dist/internalTools/documentExtraction/parseDocumentLocal.js +547 -0
- package/dist/internalTools/documentExtraction/parseDocumentLocal.js.map +1 -0
- package/dist/internalTools/documentExtraction/parseDocumentManaged.d.ts +13 -0
- package/dist/internalTools/documentExtraction/parseDocumentManaged.d.ts.map +1 -0
- package/dist/internalTools/documentExtraction/parseDocumentManaged.js +150 -0
- package/dist/internalTools/documentExtraction/parseDocumentManaged.js.map +1 -0
- package/dist/nodes/addAppToolNode.d.ts.map +1 -1
- package/dist/nodes/addAppToolNode.js +20 -1
- package/dist/nodes/addAppToolNode.js.map +1 -1
- package/dist/toolsLibrary/classifier.d.ts +2 -2
- package/dist/toolsLibrary/parseDocument.d.ts +11 -10
- package/dist/toolsLibrary/parseDocument.d.ts.map +1 -1
- package/dist/toolsLibrary/parseDocument.js +33 -189
- package/dist/toolsLibrary/parseDocument.js.map +1 -1
- package/dist/toolsLibrary/withBrowserSession.d.ts.map +1 -1
- package/dist/toolsLibrary/withBrowserSession.js +70 -2
- package/dist/toolsLibrary/withBrowserSession.js.map +1 -1
- package/dist/types/Flows.types.d.ts +1 -0
- package/dist/types/Flows.types.d.ts.map +1 -1
- package/dist/types/Flows.types.js.map +1 -1
- package/dist/utils/schemaUtils.js +1 -1
- package/dist/utils/schemaUtils.js.map +1 -1
- package/docs/tooling/document-processing.md +235 -174
- package/package.json +2 -1
- package/src/cli/index.ts +2 -10
- package/src/cli/runCommand.ts +31 -25
- package/src/index.ts +2 -1
- package/src/internalTools/documentExtraction/documentExtraction.ts +184 -767
- package/src/internalTools/documentExtraction/extractStructuredData.ts +140 -0
- package/src/internalTools/documentExtraction/parseDocumentLocal.ts +660 -0
- package/src/internalTools/documentExtraction/parseDocumentManaged.ts +152 -0
- package/src/nodes/addAppToolNode.ts +30 -7
- package/src/toolsLibrary/parseDocument.ts +38 -206
- package/src/toolsLibrary/withBrowserSession.ts +89 -4
- package/src/types/Flows.types.ts +1 -0
- package/src/utils/schemaUtils.ts +1 -1
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
import fsp from 'fs/promises';
|
|
2
|
+
import path from 'path';
|
|
3
|
+
import {
|
|
4
|
+
DocumentProcessRequest,
|
|
5
|
+
DocumentProcessResponse,
|
|
6
|
+
FileUploadRequest,
|
|
7
|
+
FileUploadResponse,
|
|
8
|
+
FileUploadType,
|
|
9
|
+
mindedConnectionSocketMessageType,
|
|
10
|
+
} from '../../platform/mindedConnectionTypes';
|
|
11
|
+
import { logger } from '../../utils/logger';
|
|
12
|
+
import * as mindedConnection from '../../platform/mindedConnection';
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Process document using managed backend service
|
|
16
|
+
*/
|
|
17
|
+
export async function parseDocumentWithManagedService({
|
|
18
|
+
documentSource,
|
|
19
|
+
isDocumentUrl,
|
|
20
|
+
sessionId,
|
|
21
|
+
}: {
|
|
22
|
+
isDocumentUrl: boolean;
|
|
23
|
+
documentSource: string;
|
|
24
|
+
sessionId: string;
|
|
25
|
+
}): Promise<{ rawContent: string; metadata?: DocumentProcessResponse['metadata'] }> {
|
|
26
|
+
logger.info({
|
|
27
|
+
msg: 'Parsing document with managed service',
|
|
28
|
+
sessionId,
|
|
29
|
+
documentSource,
|
|
30
|
+
sourceType: isDocumentUrl ? 'url' : 'path',
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
let uploadId: string | undefined;
|
|
34
|
+
let documentUrl: string | undefined;
|
|
35
|
+
|
|
36
|
+
if (isDocumentUrl) {
|
|
37
|
+
documentUrl = documentSource;
|
|
38
|
+
} else {
|
|
39
|
+
// Upload file for processing
|
|
40
|
+
logger.info({
|
|
41
|
+
msg: 'Uploading document for parsing',
|
|
42
|
+
sessionId,
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
uploadId = await uploadDocumentForProcessing({
|
|
46
|
+
sessionId,
|
|
47
|
+
documentPath: documentSource,
|
|
48
|
+
});
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
logger.info({
|
|
52
|
+
msg: 'Requesting document parsing via socket',
|
|
53
|
+
sessionId,
|
|
54
|
+
documentSource,
|
|
55
|
+
hasUrl: !!documentUrl,
|
|
56
|
+
hasUploadId: !!uploadId,
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
// Use 2-minute timeout and maxRetries=1 for document processing
|
|
60
|
+
// No retries on timeout to avoid duplicate processing
|
|
61
|
+
const response = await mindedConnection.awaitEmit<DocumentProcessRequest, DocumentProcessResponse>(
|
|
62
|
+
mindedConnectionSocketMessageType.DOCUMENT_PROCESS,
|
|
63
|
+
{
|
|
64
|
+
type: mindedConnectionSocketMessageType.DOCUMENT_PROCESS,
|
|
65
|
+
sessionId,
|
|
66
|
+
documentUrl,
|
|
67
|
+
uploadId,
|
|
68
|
+
},
|
|
69
|
+
180000, // 2 minutes timeout
|
|
70
|
+
1, // No retries on timeout to prevent duplicate processing
|
|
71
|
+
);
|
|
72
|
+
|
|
73
|
+
if (!response.success) {
|
|
74
|
+
throw new Error(response.error || 'Failed to process document on backend');
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
logger.info({
|
|
78
|
+
msg: 'Got document parsing response',
|
|
79
|
+
sessionId,
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
return {
|
|
83
|
+
rawContent: response.rawContent || '',
|
|
84
|
+
metadata: response.metadata,
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Upload document for processing
|
|
90
|
+
*/
|
|
91
|
+
async function uploadDocumentForProcessing({ sessionId, documentPath }: { sessionId: string; documentPath: string }): Promise<string> {
|
|
92
|
+
const fileBuffer = await fsp.readFile(documentPath);
|
|
93
|
+
const originalFileName = path.basename(documentPath);
|
|
94
|
+
const fileSize = fileBuffer.length;
|
|
95
|
+
|
|
96
|
+
logger.info({
|
|
97
|
+
msg: 'Requesting upload URL for document',
|
|
98
|
+
sessionId,
|
|
99
|
+
fileName: originalFileName,
|
|
100
|
+
fileSize,
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
const urlResponse = await mindedConnection.awaitEmit<FileUploadRequest, FileUploadResponse>(
|
|
104
|
+
mindedConnectionSocketMessageType.UPLOAD_FILE_REQUEST,
|
|
105
|
+
{
|
|
106
|
+
type: mindedConnectionSocketMessageType.UPLOAD_FILE_REQUEST,
|
|
107
|
+
sessionId,
|
|
108
|
+
fileName: originalFileName,
|
|
109
|
+
fileSize,
|
|
110
|
+
uploadType: FileUploadType.DOCUMENT_PROCESSING,
|
|
111
|
+
},
|
|
112
|
+
30000,
|
|
113
|
+
);
|
|
114
|
+
|
|
115
|
+
if (!urlResponse.success || !urlResponse.uploadUrl || !urlResponse.uploadId) {
|
|
116
|
+
throw new Error(urlResponse.error || 'Failed to generate upload URL');
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
logger.info({
|
|
120
|
+
msg: 'Received upload URL, uploading document',
|
|
121
|
+
sessionId,
|
|
122
|
+
uploadId: urlResponse.uploadId,
|
|
123
|
+
expiresIn: urlResponse.expiresIn,
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
const urlObj = new URL(urlResponse.uploadUrl);
|
|
127
|
+
const signedHeaders = urlObj.searchParams.get('X-Amz-SignedHeaders');
|
|
128
|
+
const headers: Record<string, string> = {};
|
|
129
|
+
|
|
130
|
+
if (signedHeaders?.includes('x-amz-server-side-encryption')) {
|
|
131
|
+
headers['x-amz-server-side-encryption'] = 'aws:kms';
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
const uploadResponse = await fetch(urlResponse.uploadUrl, {
|
|
135
|
+
method: 'PUT',
|
|
136
|
+
headers,
|
|
137
|
+
body: new Uint8Array(fileBuffer),
|
|
138
|
+
});
|
|
139
|
+
|
|
140
|
+
if (!uploadResponse.ok) {
|
|
141
|
+
const errorText = await uploadResponse.text();
|
|
142
|
+
throw new Error(`Failed to upload document: ${uploadResponse.status} - ${errorText}`);
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
logger.info({
|
|
146
|
+
msg: 'Successfully uploaded document',
|
|
147
|
+
sessionId,
|
|
148
|
+
uploadId: urlResponse.uploadId,
|
|
149
|
+
});
|
|
150
|
+
|
|
151
|
+
return urlResponse.uploadId;
|
|
152
|
+
}
|
|
@@ -94,12 +94,15 @@ export const addAppToolNode = async ({
|
|
|
94
94
|
logger.debug({
|
|
95
95
|
message: '[Node] Omitting parameters from system prompt due to length',
|
|
96
96
|
node: node.name,
|
|
97
|
-
parameterLengths: Object.entries(compiledParameters).reduce(
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
97
|
+
parameterLengths: Object.entries(compiledParameters).reduce(
|
|
98
|
+
(acc, [key, value]) => {
|
|
99
|
+
if (typeof value === 'string') {
|
|
100
|
+
acc[key] = value.length;
|
|
101
|
+
}
|
|
102
|
+
return acc;
|
|
103
|
+
},
|
|
104
|
+
{} as Record<string, number>,
|
|
105
|
+
),
|
|
103
106
|
});
|
|
104
107
|
parametersString = '[Parameters omitted - one or more values exceed 1000 characters]';
|
|
105
108
|
} else {
|
|
@@ -145,12 +148,32 @@ export const addAppToolNode = async ({
|
|
|
145
148
|
if (toolCallMessage instanceof ToolMessage) {
|
|
146
149
|
state.messages.push(toolCallMessage);
|
|
147
150
|
}
|
|
151
|
+
|
|
152
|
+
// Try to parse content if it's a JSON string and extract the result object
|
|
153
|
+
let parsedContent: any;
|
|
154
|
+
if (toolCallMessage instanceof ToolMessage) {
|
|
155
|
+
parsedContent = toolCallMessage.content;
|
|
156
|
+
try {
|
|
157
|
+
if (typeof toolCallMessage.content === 'string') {
|
|
158
|
+
parsedContent = JSON.parse(toolCallMessage.content);
|
|
159
|
+
}
|
|
160
|
+
} catch {
|
|
161
|
+
// If parsing fails, use the original content
|
|
162
|
+
}
|
|
163
|
+
if ('result' in parsedContent) {
|
|
164
|
+
parsedContent = parsedContent.result;
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
|
|
148
168
|
state.history.push(
|
|
149
169
|
createHistoryStep<AppActionInvocationHistoryStep>(state.history, {
|
|
150
170
|
type: NodeType.APP_TOOL,
|
|
151
171
|
nodeId: node.name,
|
|
152
172
|
nodeDisplayName: node.displayName!,
|
|
153
|
-
raw:
|
|
173
|
+
raw: {
|
|
174
|
+
...AIToolCallMessage.tool_calls[0],
|
|
175
|
+
result: parsedContent,
|
|
176
|
+
},
|
|
154
177
|
appName: node.appName,
|
|
155
178
|
messageIds: [AIToolCallMessage.tool_calls[0].id],
|
|
156
179
|
}),
|
|
@@ -1,109 +1,67 @@
|
|
|
1
1
|
import { z } from 'zod';
|
|
2
2
|
import { Tool } from '../types/Tools.types';
|
|
3
|
-
import {
|
|
3
|
+
import { parseDocumentAndExtractStructuredData, DocumentProcessingMode } from '../internalTools/documentExtraction/documentExtraction';
|
|
4
4
|
import { logger } from '../utils/logger';
|
|
5
|
-
import
|
|
6
|
-
import
|
|
7
|
-
import {
|
|
8
|
-
FileUploadRequest,
|
|
9
|
-
FileUploadResponse,
|
|
10
|
-
FileUploadType,
|
|
11
|
-
DocumentProcessRequest,
|
|
12
|
-
DocumentProcessResponse,
|
|
13
|
-
mindedConnectionSocketMessageType,
|
|
14
|
-
} from '../platform/mindedConnectionTypes';
|
|
15
|
-
import * as mindedConnection from '../platform/mindedConnection';
|
|
5
|
+
import { NodeType } from '../types/Flows.types';
|
|
6
|
+
import { createZodSchemaFromFields } from '../utils/schemaUtils';
|
|
16
7
|
|
|
17
|
-
/**
|
|
18
|
-
* Check if a string is a URL
|
|
19
|
-
*/
|
|
20
|
-
function isUrl(source: string): boolean {
|
|
21
|
-
return source.startsWith('http://') || source.startsWith('https://');
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
// Schema for the tool
|
|
25
8
|
export const schema = z.object({
|
|
26
|
-
// Document source - can be a URL or file path (auto-detected)
|
|
27
9
|
documentSource: z.string().describe('URL or file path to the document to parse'),
|
|
28
|
-
|
|
29
|
-
// Processing parameters
|
|
30
|
-
extractRaw: z.boolean().optional().nullable().describe('Extract raw text without AI processing'),
|
|
31
|
-
schema: z.any().optional().nullable().describe('Zod schema for structured data extraction'),
|
|
32
|
-
systemPrompt: z.string().optional().nullable().describe('Prompt for guiding extraction'),
|
|
10
|
+
returnStructuredOutput: z.boolean().optional().default(false).describe('Whether to return structured output'),
|
|
33
11
|
});
|
|
34
12
|
|
|
35
|
-
|
|
13
|
+
/**
|
|
14
|
+
* Document parsing tool for flows.
|
|
15
|
+
* Parses documents and optionally extracts structured data using AI.
|
|
16
|
+
*
|
|
17
|
+
* - Raw mode: Returns plain text from document
|
|
18
|
+
* - Structured mode: Uses node's prompt and outputSchema for AI extraction
|
|
19
|
+
*/
|
|
20
|
+
const parseDocumentTool: Tool<typeof schema> = {
|
|
36
21
|
name: 'minded-parse-documents',
|
|
37
22
|
description:
|
|
38
23
|
'Parse and extract data from documents (PDFs, images, Word docs, etc.). Provide a URL or file path and optionally a schema or prompt for extraction. Includes built-in AI extraction - no separate extraction tool needed.',
|
|
39
24
|
input: schema,
|
|
40
25
|
isGlobal: false,
|
|
41
26
|
execute: async ({ input, state, agent }) => {
|
|
42
|
-
const combinedInput = input as z.infer<typeof schema>;
|
|
43
|
-
|
|
44
27
|
try {
|
|
45
|
-
if (!
|
|
28
|
+
if (!input.documentSource) {
|
|
46
29
|
throw new Error('documentSource is required - provide a URL or file path');
|
|
47
30
|
}
|
|
48
31
|
|
|
49
|
-
const
|
|
50
|
-
|
|
32
|
+
const node = await agent.getCurrentNode(state.sessionId);
|
|
33
|
+
const appToolNode = node?.type === NodeType.APP_TOOL ? node : undefined;
|
|
51
34
|
|
|
52
35
|
// Get processing mode from environment variable
|
|
53
|
-
const processingMode =
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
msg: 'Using local document processing',
|
|
72
|
-
sessionId: state.sessionId,
|
|
73
|
-
sourceType: isDocumentUrl ? 'url' : 'path',
|
|
74
|
-
});
|
|
75
|
-
|
|
76
|
-
const extractionOptions: Parameters<typeof extractFromDocument>[0] = isDocumentUrl
|
|
77
|
-
? { documentUrl: combinedInput.documentSource }
|
|
78
|
-
: { documentPath: combinedInput.documentSource };
|
|
79
|
-
|
|
80
|
-
const localResult = await extractFromDocument(extractionOptions);
|
|
81
|
-
rawContent = localResult.data as string;
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
// Process with LLM if needed
|
|
85
|
-
let finalData: any = rawContent;
|
|
86
|
-
|
|
87
|
-
if (!combinedInput.extractRaw && agent.llm) {
|
|
88
|
-
const llmResult = await extractFromDocument({
|
|
89
|
-
documentContent: rawContent,
|
|
90
|
-
llm: agent.llm,
|
|
91
|
-
schema: combinedInput.schema,
|
|
92
|
-
systemPrompt: combinedInput.systemPrompt || undefined,
|
|
93
|
-
});
|
|
94
|
-
|
|
95
|
-
finalData = llmResult.data;
|
|
96
|
-
}
|
|
36
|
+
const processingMode =
|
|
37
|
+
process.env.DOCUMENT_PROCESSING_MODE === 'local' ? DocumentProcessingMode.LOCAL : DocumentProcessingMode.MANAGED;
|
|
38
|
+
const llamaCloudApiKey: string | undefined = process.env.LLAMA_CLOUD_API_KEY;
|
|
39
|
+
const outputSchema =
|
|
40
|
+
input.returnStructuredOutput && agent.llm && appToolNode && appToolNode.outputSchema?.length
|
|
41
|
+
? createZodSchemaFromFields(appToolNode.outputSchema)
|
|
42
|
+
: undefined;
|
|
43
|
+
|
|
44
|
+
const result = await parseDocumentAndExtractStructuredData({
|
|
45
|
+
documentSource: input.documentSource,
|
|
46
|
+
processingMode,
|
|
47
|
+
llamaCloudApiKey,
|
|
48
|
+
sessionId: state.sessionId,
|
|
49
|
+
llm: agent.llm,
|
|
50
|
+
returnStructuredOutput: input.returnStructuredOutput,
|
|
51
|
+
outputSchema: outputSchema,
|
|
52
|
+
outputSchemaPrompt: appToolNode?.prompt,
|
|
53
|
+
});
|
|
97
54
|
|
|
98
55
|
state.memory.lastParsedDocument = {
|
|
99
|
-
source:
|
|
56
|
+
source: input.documentSource,
|
|
100
57
|
extractedAt: new Date().toISOString(),
|
|
101
|
-
|
|
58
|
+
returnStructuredOutput: input.returnStructuredOutput,
|
|
102
59
|
processingMode,
|
|
60
|
+
structuredContent: result.structuredContent,
|
|
103
61
|
};
|
|
104
62
|
|
|
105
63
|
return {
|
|
106
|
-
result:
|
|
64
|
+
result: result.structuredContent ?? result.rawContent,
|
|
107
65
|
};
|
|
108
66
|
} catch (err) {
|
|
109
67
|
logger.error({
|
|
@@ -113,6 +71,7 @@ const parseDocumentTool: Tool<typeof schema, any> = {
|
|
|
113
71
|
});
|
|
114
72
|
|
|
115
73
|
state.memory.documentParsingError = err instanceof Error ? err.message : String(err);
|
|
74
|
+
|
|
116
75
|
return {
|
|
117
76
|
result: `Failed to parse document: ${err instanceof Error ? err.message : String(err)}`,
|
|
118
77
|
};
|
|
@@ -120,131 +79,4 @@ const parseDocumentTool: Tool<typeof schema, any> = {
|
|
|
120
79
|
},
|
|
121
80
|
};
|
|
122
81
|
|
|
123
|
-
/**
|
|
124
|
-
* Process document using managed backend service
|
|
125
|
-
*/
|
|
126
|
-
async function processManagedDocument(params: {
|
|
127
|
-
sessionId: string;
|
|
128
|
-
documentSource: string;
|
|
129
|
-
isUrl: boolean;
|
|
130
|
-
}): Promise<{ rawContent: string; metadata: any }> {
|
|
131
|
-
const { sessionId, documentSource, isUrl: isDocumentUrl } = params;
|
|
132
|
-
|
|
133
|
-
let uploadId: string | undefined;
|
|
134
|
-
let documentUrl: string | undefined;
|
|
135
|
-
|
|
136
|
-
if (isDocumentUrl) {
|
|
137
|
-
documentUrl = documentSource;
|
|
138
|
-
} else {
|
|
139
|
-
// Upload file for processing
|
|
140
|
-
logger.info({
|
|
141
|
-
msg: 'Uploading document for processing',
|
|
142
|
-
sessionId,
|
|
143
|
-
});
|
|
144
|
-
|
|
145
|
-
uploadId = await uploadDocumentForProcessing({
|
|
146
|
-
sessionId,
|
|
147
|
-
documentPath: documentSource,
|
|
148
|
-
});
|
|
149
|
-
}
|
|
150
|
-
|
|
151
|
-
logger.info({
|
|
152
|
-
msg: 'Requesting document processing via socket',
|
|
153
|
-
sessionId,
|
|
154
|
-
hasUrl: !!documentUrl,
|
|
155
|
-
hasUploadId: !!uploadId,
|
|
156
|
-
});
|
|
157
|
-
|
|
158
|
-
// Use 2-minute timeout and maxRetries=1 for document processing
|
|
159
|
-
// No retries on timeout to avoid duplicate processing
|
|
160
|
-
const response = await mindedConnection.awaitEmit<DocumentProcessRequest, DocumentProcessResponse>(
|
|
161
|
-
mindedConnectionSocketMessageType.DOCUMENT_PROCESS,
|
|
162
|
-
{
|
|
163
|
-
type: mindedConnectionSocketMessageType.DOCUMENT_PROCESS,
|
|
164
|
-
sessionId,
|
|
165
|
-
documentUrl,
|
|
166
|
-
uploadId,
|
|
167
|
-
},
|
|
168
|
-
180000, // 2 minutes timeout
|
|
169
|
-
1, // No retries on timeout to prevent duplicate processing
|
|
170
|
-
);
|
|
171
|
-
|
|
172
|
-
if (!response.success) {
|
|
173
|
-
throw new Error(response.error || 'Failed to process document on backend');
|
|
174
|
-
}
|
|
175
|
-
|
|
176
|
-
return {
|
|
177
|
-
rawContent: response.rawContent || '',
|
|
178
|
-
metadata: response.metadata,
|
|
179
|
-
};
|
|
180
|
-
}
|
|
181
|
-
|
|
182
|
-
/**
|
|
183
|
-
* Upload document for processing
|
|
184
|
-
*/
|
|
185
|
-
async function uploadDocumentForProcessing(params: { sessionId: string; documentPath: string }): Promise<string> {
|
|
186
|
-
const { sessionId, documentPath } = params;
|
|
187
|
-
|
|
188
|
-
const fileBuffer = fs.readFileSync(documentPath);
|
|
189
|
-
const originalFileName = path.basename(documentPath);
|
|
190
|
-
const fileSize = fileBuffer.length;
|
|
191
|
-
|
|
192
|
-
logger.info({
|
|
193
|
-
msg: 'Requesting upload URL for document',
|
|
194
|
-
sessionId,
|
|
195
|
-
fileName: originalFileName,
|
|
196
|
-
fileSize,
|
|
197
|
-
});
|
|
198
|
-
|
|
199
|
-
const urlResponse = await mindedConnection.awaitEmit<FileUploadRequest, FileUploadResponse>(
|
|
200
|
-
mindedConnectionSocketMessageType.UPLOAD_FILE_REQUEST,
|
|
201
|
-
{
|
|
202
|
-
type: mindedConnectionSocketMessageType.UPLOAD_FILE_REQUEST,
|
|
203
|
-
sessionId,
|
|
204
|
-
fileName: originalFileName,
|
|
205
|
-
fileSize,
|
|
206
|
-
uploadType: FileUploadType.DOCUMENT_PROCESSING,
|
|
207
|
-
},
|
|
208
|
-
30000,
|
|
209
|
-
);
|
|
210
|
-
|
|
211
|
-
if (!urlResponse.success || !urlResponse.uploadUrl || !urlResponse.uploadId) {
|
|
212
|
-
throw new Error(urlResponse.error || 'Failed to generate upload URL');
|
|
213
|
-
}
|
|
214
|
-
|
|
215
|
-
logger.info({
|
|
216
|
-
msg: 'Received upload URL, uploading document',
|
|
217
|
-
sessionId,
|
|
218
|
-
uploadId: urlResponse.uploadId,
|
|
219
|
-
expiresIn: urlResponse.expiresIn,
|
|
220
|
-
});
|
|
221
|
-
|
|
222
|
-
const urlObj = new URL(urlResponse.uploadUrl);
|
|
223
|
-
const signedHeaders = urlObj.searchParams.get('X-Amz-SignedHeaders');
|
|
224
|
-
const headers: Record<string, string> = {};
|
|
225
|
-
|
|
226
|
-
if (signedHeaders?.includes('x-amz-server-side-encryption')) {
|
|
227
|
-
headers['x-amz-server-side-encryption'] = 'aws:kms';
|
|
228
|
-
}
|
|
229
|
-
|
|
230
|
-
const uploadResponse = await fetch(urlResponse.uploadUrl, {
|
|
231
|
-
method: 'PUT',
|
|
232
|
-
headers,
|
|
233
|
-
body: new Uint8Array(fileBuffer),
|
|
234
|
-
});
|
|
235
|
-
|
|
236
|
-
if (!uploadResponse.ok) {
|
|
237
|
-
const errorText = await uploadResponse.text();
|
|
238
|
-
throw new Error(`Failed to upload document: ${uploadResponse.status} - ${errorText}`);
|
|
239
|
-
}
|
|
240
|
-
|
|
241
|
-
logger.info({
|
|
242
|
-
msg: 'Successfully uploaded document',
|
|
243
|
-
sessionId,
|
|
244
|
-
uploadId: urlResponse.uploadId,
|
|
245
|
-
});
|
|
246
|
-
|
|
247
|
-
return urlResponse.uploadId;
|
|
248
|
-
}
|
|
249
|
-
|
|
250
82
|
export default parseDocumentTool;
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { Browser, BrowserContext, chromium, Page } from 'playwright';
|
|
1
|
+
import { Browser, BrowserContext, chromium, Locator, Page } from 'playwright';
|
|
2
2
|
import { BrowserTaskMode } from '../browserTask/types';
|
|
3
3
|
import { createBrowserSession, destroyBrowserSession } from '../browserTask/executeBrowserTask';
|
|
4
4
|
import { logger } from '../utils/logger';
|
|
@@ -170,6 +170,50 @@ class LogsCapture {
|
|
|
170
170
|
}
|
|
171
171
|
}
|
|
172
172
|
|
|
173
|
+
/**
|
|
174
|
+
* Create a proxy locator that intercepts actions and captures screenshots and logs
|
|
175
|
+
*/
|
|
176
|
+
const createProxyLocator = (
|
|
177
|
+
locator: Locator,
|
|
178
|
+
page: Page,
|
|
179
|
+
screenshotCapture: ScreenshotCapture,
|
|
180
|
+
logsCapture: LogsCapture,
|
|
181
|
+
selector?: string,
|
|
182
|
+
): Locator => {
|
|
183
|
+
// List of methods to intercept on locators
|
|
184
|
+
const locatorMethodsToIntercept = ['click', 'fill', 'type', 'selectOption', 'check', 'uncheck', 'setInputFiles'];
|
|
185
|
+
|
|
186
|
+
const proxyLocator = new Proxy(locator, {
|
|
187
|
+
get(target, prop, receiver) {
|
|
188
|
+
const originalValue = Reflect.get(target, prop, receiver);
|
|
189
|
+
|
|
190
|
+
// If it's one of the methods we want to intercept
|
|
191
|
+
if (typeof prop === 'string' && locatorMethodsToIntercept.includes(prop)) {
|
|
192
|
+
return async function (this: any, ...args: any[]) {
|
|
193
|
+
// Log the action (this will trigger immediate upload)
|
|
194
|
+
const actionDescription = formatLocatorActionLog(prop, selector, args);
|
|
195
|
+
await logsCapture.addLog(actionDescription);
|
|
196
|
+
|
|
197
|
+
// Capture "before" screenshot
|
|
198
|
+
await screenshotCapture.captureScreenshot(page, 'before');
|
|
199
|
+
|
|
200
|
+
// Call the original method
|
|
201
|
+
const result = await originalValue.apply(target, args);
|
|
202
|
+
|
|
203
|
+
// Capture "after" screenshot
|
|
204
|
+
await screenshotCapture.captureScreenshot(page, 'after');
|
|
205
|
+
|
|
206
|
+
return result;
|
|
207
|
+
};
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
return originalValue;
|
|
211
|
+
},
|
|
212
|
+
});
|
|
213
|
+
|
|
214
|
+
return proxyLocator;
|
|
215
|
+
};
|
|
216
|
+
|
|
173
217
|
/**
|
|
174
218
|
* Create a proxy page that intercepts actions and captures screenshots and logs
|
|
175
219
|
*/
|
|
@@ -179,13 +223,25 @@ const createProxyPage = (
|
|
|
179
223
|
logsCapture: LogsCapture,
|
|
180
224
|
): Page => {
|
|
181
225
|
// List of methods to intercept for screenshot capture and logging
|
|
182
|
-
const methodsToIntercept = ['click', 'fill', 'type', 'goto', 'selectOption', 'check', 'uncheck', 'setInputFiles'];
|
|
226
|
+
const methodsToIntercept = ['click', 'fill', 'type', 'goto', 'selectOption', 'check', 'uncheck', 'setInputFiles', 'waitForTimeout', 'evaluate'];
|
|
227
|
+
// Methods that return locators and need special handling
|
|
228
|
+
const locatorMethods = ['locator'];
|
|
183
229
|
|
|
184
230
|
const proxyPage = new Proxy(page, {
|
|
185
231
|
get(target, prop, receiver) {
|
|
186
232
|
const originalValue = Reflect.get(target, prop, receiver);
|
|
187
233
|
|
|
188
|
-
//
|
|
234
|
+
// Handle locator method - return a proxied locator
|
|
235
|
+
if (typeof prop === 'string' && locatorMethods.includes(prop)) {
|
|
236
|
+
return function (this: any, ...args: any[]) {
|
|
237
|
+
const locator = originalValue.apply(target, args);
|
|
238
|
+
// Extract selector from args if available (first argument is usually the selector)
|
|
239
|
+
const selector = args[0] && typeof args[0] === 'string' ? args[0] : undefined;
|
|
240
|
+
return createProxyLocator(locator, target, screenshotCapture, logsCapture, selector);
|
|
241
|
+
};
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
// If it's one of the methods we want to intercept with screenshots
|
|
189
245
|
if (typeof prop === 'string' && methodsToIntercept.includes(prop)) {
|
|
190
246
|
return async function (this: any, ...args: any[]) {
|
|
191
247
|
// Log the action (this will trigger immediate upload)
|
|
@@ -216,7 +272,6 @@ const createProxyPage = (
|
|
|
216
272
|
* Format action logs for display
|
|
217
273
|
*/
|
|
218
274
|
const formatActionLog = (action: string, args: any[]): string => {
|
|
219
|
-
|
|
220
275
|
switch (action) {
|
|
221
276
|
case 'goto':
|
|
222
277
|
return `Navigate to: ${args[0]}`;
|
|
@@ -234,11 +289,41 @@ const formatActionLog = (action: string, args: any[]): string => {
|
|
|
234
289
|
return `Uncheck: ${args[0]}`;
|
|
235
290
|
case 'setInputFiles':
|
|
236
291
|
return `Set files for "${args[0]}"`;
|
|
292
|
+
case 'waitForTimeout':
|
|
293
|
+
return `Wait for timeout: ${args[0]}ms`;
|
|
294
|
+
case 'evaluate':
|
|
295
|
+
return `Evaluate: ${typeof args[0] === 'function' ? 'function' : String(args[0]).substring(0, 100)}`;
|
|
237
296
|
default:
|
|
238
297
|
return `${action}: ${JSON.stringify(args[0])}`;
|
|
239
298
|
}
|
|
240
299
|
};
|
|
241
300
|
|
|
301
|
+
/**
|
|
302
|
+
* Format locator action logs for display
|
|
303
|
+
*/
|
|
304
|
+
const formatLocatorActionLog = (action: string, selector: string | undefined, args: any[]): string => {
|
|
305
|
+
const locatorDescription = selector || 'locator';
|
|
306
|
+
|
|
307
|
+
switch (action) {
|
|
308
|
+
case 'click':
|
|
309
|
+
return `Click locator: ${locatorDescription}`;
|
|
310
|
+
case 'fill':
|
|
311
|
+
return `Fill locator "${locatorDescription}" with: ${args[0]}`;
|
|
312
|
+
case 'type':
|
|
313
|
+
return `Type in locator "${locatorDescription}": ${args[0]}`;
|
|
314
|
+
case 'selectOption':
|
|
315
|
+
return `Select option in locator "${locatorDescription}": ${JSON.stringify(args[0])}`;
|
|
316
|
+
case 'check':
|
|
317
|
+
return `Check locator: ${locatorDescription}`;
|
|
318
|
+
case 'uncheck':
|
|
319
|
+
return `Uncheck locator: ${locatorDescription}`;
|
|
320
|
+
case 'setInputFiles':
|
|
321
|
+
return `Set files for locator "${locatorDescription}"`;
|
|
322
|
+
default:
|
|
323
|
+
return `Locator ${action}: ${JSON.stringify(args[0])}`;
|
|
324
|
+
}
|
|
325
|
+
};
|
|
326
|
+
|
|
242
327
|
// Helper function to load cookies for a session
|
|
243
328
|
const loadCookiesForSession = async (context: BrowserContext, sessionId: string): Promise<void> => {
|
|
244
329
|
const stored = await cookieStore.load();
|
package/src/types/Flows.types.ts
CHANGED
package/src/utils/schemaUtils.ts
CHANGED