@minded-ai/mindedjs 3.0.8-beta.12 → 3.1.9-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/dist/cli/index.js +2 -9
  2. package/dist/cli/index.js.map +1 -1
  3. package/dist/cli/runCommand.d.ts +1 -1
  4. package/dist/cli/runCommand.d.ts.map +1 -1
  5. package/dist/cli/runCommand.js +31 -23
  6. package/dist/cli/runCommand.js.map +1 -1
  7. package/dist/index.d.ts +2 -1
  8. package/dist/index.d.ts.map +1 -1
  9. package/dist/index.js +6 -3
  10. package/dist/index.js.map +1 -1
  11. package/dist/internalTools/documentExtraction/documentExtraction.d.ts +112 -102
  12. package/dist/internalTools/documentExtraction/documentExtraction.d.ts.map +1 -1
  13. package/dist/internalTools/documentExtraction/documentExtraction.js +146 -705
  14. package/dist/internalTools/documentExtraction/documentExtraction.js.map +1 -1
  15. package/dist/internalTools/documentExtraction/extractStructuredData.d.ts +57 -0
  16. package/dist/internalTools/documentExtraction/extractStructuredData.d.ts.map +1 -0
  17. package/dist/internalTools/documentExtraction/extractStructuredData.js +121 -0
  18. package/dist/internalTools/documentExtraction/extractStructuredData.js.map +1 -0
  19. package/dist/internalTools/documentExtraction/parseDocumentLocal.d.ts +16 -0
  20. package/dist/internalTools/documentExtraction/parseDocumentLocal.d.ts.map +1 -0
  21. package/dist/internalTools/documentExtraction/parseDocumentLocal.js +547 -0
  22. package/dist/internalTools/documentExtraction/parseDocumentLocal.js.map +1 -0
  23. package/dist/internalTools/documentExtraction/parseDocumentManaged.d.ts +13 -0
  24. package/dist/internalTools/documentExtraction/parseDocumentManaged.d.ts.map +1 -0
  25. package/dist/internalTools/documentExtraction/parseDocumentManaged.js +150 -0
  26. package/dist/internalTools/documentExtraction/parseDocumentManaged.js.map +1 -0
  27. package/dist/nodes/addAppToolNode.d.ts.map +1 -1
  28. package/dist/nodes/addAppToolNode.js +20 -1
  29. package/dist/nodes/addAppToolNode.js.map +1 -1
  30. package/dist/toolsLibrary/classifier.d.ts +2 -2
  31. package/dist/toolsLibrary/parseDocument.d.ts +11 -10
  32. package/dist/toolsLibrary/parseDocument.d.ts.map +1 -1
  33. package/dist/toolsLibrary/parseDocument.js +33 -189
  34. package/dist/toolsLibrary/parseDocument.js.map +1 -1
  35. package/dist/toolsLibrary/withBrowserSession.d.ts.map +1 -1
  36. package/dist/toolsLibrary/withBrowserSession.js +70 -2
  37. package/dist/toolsLibrary/withBrowserSession.js.map +1 -1
  38. package/dist/types/Flows.types.d.ts +1 -0
  39. package/dist/types/Flows.types.d.ts.map +1 -1
  40. package/dist/types/Flows.types.js.map +1 -1
  41. package/dist/utils/schemaUtils.js +1 -1
  42. package/dist/utils/schemaUtils.js.map +1 -1
  43. package/docs/tooling/document-processing.md +235 -174
  44. package/package.json +2 -1
  45. package/src/cli/index.ts +2 -10
  46. package/src/cli/runCommand.ts +31 -25
  47. package/src/index.ts +2 -1
  48. package/src/internalTools/documentExtraction/documentExtraction.ts +184 -767
  49. package/src/internalTools/documentExtraction/extractStructuredData.ts +140 -0
  50. package/src/internalTools/documentExtraction/parseDocumentLocal.ts +660 -0
  51. package/src/internalTools/documentExtraction/parseDocumentManaged.ts +152 -0
  52. package/src/nodes/addAppToolNode.ts +30 -7
  53. package/src/toolsLibrary/parseDocument.ts +38 -206
  54. package/src/toolsLibrary/withBrowserSession.ts +89 -4
  55. package/src/types/Flows.types.ts +1 -0
  56. package/src/utils/schemaUtils.ts +1 -1
@@ -0,0 +1,152 @@
1
+ import fsp from 'fs/promises';
2
+ import path from 'path';
3
+ import {
4
+ DocumentProcessRequest,
5
+ DocumentProcessResponse,
6
+ FileUploadRequest,
7
+ FileUploadResponse,
8
+ FileUploadType,
9
+ mindedConnectionSocketMessageType,
10
+ } from '../../platform/mindedConnectionTypes';
11
+ import { logger } from '../../utils/logger';
12
+ import * as mindedConnection from '../../platform/mindedConnection';
13
+
14
+ /**
15
+ * Process document using managed backend service
16
+ */
17
+ export async function parseDocumentWithManagedService({
18
+ documentSource,
19
+ isDocumentUrl,
20
+ sessionId,
21
+ }: {
22
+ isDocumentUrl: boolean;
23
+ documentSource: string;
24
+ sessionId: string;
25
+ }): Promise<{ rawContent: string; metadata?: DocumentProcessResponse['metadata'] }> {
26
+ logger.info({
27
+ msg: 'Parsing document with managed service',
28
+ sessionId,
29
+ documentSource,
30
+ sourceType: isDocumentUrl ? 'url' : 'path',
31
+ });
32
+
33
+ let uploadId: string | undefined;
34
+ let documentUrl: string | undefined;
35
+
36
+ if (isDocumentUrl) {
37
+ documentUrl = documentSource;
38
+ } else {
39
+ // Upload file for processing
40
+ logger.info({
41
+ msg: 'Uploading document for parsing',
42
+ sessionId,
43
+ });
44
+
45
+ uploadId = await uploadDocumentForProcessing({
46
+ sessionId,
47
+ documentPath: documentSource,
48
+ });
49
+ }
50
+
51
+ logger.info({
52
+ msg: 'Requesting document parsing via socket',
53
+ sessionId,
54
+ documentSource,
55
+ hasUrl: !!documentUrl,
56
+ hasUploadId: !!uploadId,
57
+ });
58
+
59
+ // Use 2-minute timeout and maxRetries=1 for document processing
60
+ // No retries on timeout to avoid duplicate processing
61
+ const response = await mindedConnection.awaitEmit<DocumentProcessRequest, DocumentProcessResponse>(
62
+ mindedConnectionSocketMessageType.DOCUMENT_PROCESS,
63
+ {
64
+ type: mindedConnectionSocketMessageType.DOCUMENT_PROCESS,
65
+ sessionId,
66
+ documentUrl,
67
+ uploadId,
68
+ },
69
+ 180000, // 2 minutes timeout
70
+ 1, // No retries on timeout to prevent duplicate processing
71
+ );
72
+
73
+ if (!response.success) {
74
+ throw new Error(response.error || 'Failed to process document on backend');
75
+ }
76
+
77
+ logger.info({
78
+ msg: 'Got document parsing response',
79
+ sessionId,
80
+ });
81
+
82
+ return {
83
+ rawContent: response.rawContent || '',
84
+ metadata: response.metadata,
85
+ };
86
+ }
87
+
88
+ /**
89
+ * Upload document for processing
90
+ */
91
+ async function uploadDocumentForProcessing({ sessionId, documentPath }: { sessionId: string; documentPath: string }): Promise<string> {
92
+ const fileBuffer = await fsp.readFile(documentPath);
93
+ const originalFileName = path.basename(documentPath);
94
+ const fileSize = fileBuffer.length;
95
+
96
+ logger.info({
97
+ msg: 'Requesting upload URL for document',
98
+ sessionId,
99
+ fileName: originalFileName,
100
+ fileSize,
101
+ });
102
+
103
+ const urlResponse = await mindedConnection.awaitEmit<FileUploadRequest, FileUploadResponse>(
104
+ mindedConnectionSocketMessageType.UPLOAD_FILE_REQUEST,
105
+ {
106
+ type: mindedConnectionSocketMessageType.UPLOAD_FILE_REQUEST,
107
+ sessionId,
108
+ fileName: originalFileName,
109
+ fileSize,
110
+ uploadType: FileUploadType.DOCUMENT_PROCESSING,
111
+ },
112
+ 30000,
113
+ );
114
+
115
+ if (!urlResponse.success || !urlResponse.uploadUrl || !urlResponse.uploadId) {
116
+ throw new Error(urlResponse.error || 'Failed to generate upload URL');
117
+ }
118
+
119
+ logger.info({
120
+ msg: 'Received upload URL, uploading document',
121
+ sessionId,
122
+ uploadId: urlResponse.uploadId,
123
+ expiresIn: urlResponse.expiresIn,
124
+ });
125
+
126
+ const urlObj = new URL(urlResponse.uploadUrl);
127
+ const signedHeaders = urlObj.searchParams.get('X-Amz-SignedHeaders');
128
+ const headers: Record<string, string> = {};
129
+
130
+ if (signedHeaders?.includes('x-amz-server-side-encryption')) {
131
+ headers['x-amz-server-side-encryption'] = 'aws:kms';
132
+ }
133
+
134
+ const uploadResponse = await fetch(urlResponse.uploadUrl, {
135
+ method: 'PUT',
136
+ headers,
137
+ body: new Uint8Array(fileBuffer),
138
+ });
139
+
140
+ if (!uploadResponse.ok) {
141
+ const errorText = await uploadResponse.text();
142
+ throw new Error(`Failed to upload document: ${uploadResponse.status} - ${errorText}`);
143
+ }
144
+
145
+ logger.info({
146
+ msg: 'Successfully uploaded document',
147
+ sessionId,
148
+ uploadId: urlResponse.uploadId,
149
+ });
150
+
151
+ return urlResponse.uploadId;
152
+ }
@@ -94,12 +94,15 @@ export const addAppToolNode = async ({
94
94
  logger.debug({
95
95
  message: '[Node] Omitting parameters from system prompt due to length',
96
96
  node: node.name,
97
- parameterLengths: Object.entries(compiledParameters).reduce((acc, [key, value]) => {
98
- if (typeof value === 'string') {
99
- acc[key] = value.length;
100
- }
101
- return acc;
102
- }, {} as Record<string, number>),
97
+ parameterLengths: Object.entries(compiledParameters).reduce(
98
+ (acc, [key, value]) => {
99
+ if (typeof value === 'string') {
100
+ acc[key] = value.length;
101
+ }
102
+ return acc;
103
+ },
104
+ {} as Record<string, number>,
105
+ ),
103
106
  });
104
107
  parametersString = '[Parameters omitted - one or more values exceed 1000 characters]';
105
108
  } else {
@@ -145,12 +148,32 @@ export const addAppToolNode = async ({
145
148
  if (toolCallMessage instanceof ToolMessage) {
146
149
  state.messages.push(toolCallMessage);
147
150
  }
151
+
152
+ // Try to parse content if it's a JSON string and extract the result object
153
+ let parsedContent: any;
154
+ if (toolCallMessage instanceof ToolMessage) {
155
+ parsedContent = toolCallMessage.content;
156
+ try {
157
+ if (typeof toolCallMessage.content === 'string') {
158
+ parsedContent = JSON.parse(toolCallMessage.content);
159
+ }
160
+ } catch {
161
+ // If parsing fails, use the original content
162
+ }
163
+ if ('result' in parsedContent) {
164
+ parsedContent = parsedContent.result;
165
+ }
166
+ }
167
+
148
168
  state.history.push(
149
169
  createHistoryStep<AppActionInvocationHistoryStep>(state.history, {
150
170
  type: NodeType.APP_TOOL,
151
171
  nodeId: node.name,
152
172
  nodeDisplayName: node.displayName!,
153
- raw: AIToolCallMessage.tool_calls[0],
173
+ raw: {
174
+ ...AIToolCallMessage.tool_calls[0],
175
+ result: parsedContent,
176
+ },
154
177
  appName: node.appName,
155
178
  messageIds: [AIToolCallMessage.tool_calls[0].id],
156
179
  }),
@@ -1,109 +1,67 @@
1
1
  import { z } from 'zod';
2
2
  import { Tool } from '../types/Tools.types';
3
- import { extractFromDocument } from '../internalTools/documentExtraction/documentExtraction';
3
+ import { parseDocumentAndExtractStructuredData, DocumentProcessingMode } from '../internalTools/documentExtraction/documentExtraction';
4
4
  import { logger } from '../utils/logger';
5
- import * as fs from 'fs';
6
- import * as path from 'path';
7
- import {
8
- FileUploadRequest,
9
- FileUploadResponse,
10
- FileUploadType,
11
- DocumentProcessRequest,
12
- DocumentProcessResponse,
13
- mindedConnectionSocketMessageType,
14
- } from '../platform/mindedConnectionTypes';
15
- import * as mindedConnection from '../platform/mindedConnection';
5
+ import { NodeType } from '../types/Flows.types';
6
+ import { createZodSchemaFromFields } from '../utils/schemaUtils';
16
7
 
17
- /**
18
- * Check if a string is a URL
19
- */
20
- function isUrl(source: string): boolean {
21
- return source.startsWith('http://') || source.startsWith('https://');
22
- }
23
-
24
- // Schema for the tool
25
8
  export const schema = z.object({
26
- // Document source - can be a URL or file path (auto-detected)
27
9
  documentSource: z.string().describe('URL or file path to the document to parse'),
28
-
29
- // Processing parameters
30
- extractRaw: z.boolean().optional().nullable().describe('Extract raw text without AI processing'),
31
- schema: z.any().optional().nullable().describe('Zod schema for structured data extraction'),
32
- systemPrompt: z.string().optional().nullable().describe('Prompt for guiding extraction'),
10
+ returnStructuredOutput: z.boolean().optional().default(false).describe('Whether to return structured output'),
33
11
  });
34
12
 
35
- const parseDocumentTool: Tool<typeof schema, any> = {
13
+ /**
14
+ * Document parsing tool for flows.
15
+ * Parses documents and optionally extracts structured data using AI.
16
+ *
17
+ * - Raw mode: Returns plain text from document
18
+ * - Structured mode: Uses node's prompt and outputSchema for AI extraction
19
+ */
20
+ const parseDocumentTool: Tool<typeof schema> = {
36
21
  name: 'minded-parse-documents',
37
22
  description:
38
23
  'Parse and extract data from documents (PDFs, images, Word docs, etc.). Provide a URL or file path and optionally a schema or prompt for extraction. Includes built-in AI extraction - no separate extraction tool needed.',
39
24
  input: schema,
40
25
  isGlobal: false,
41
26
  execute: async ({ input, state, agent }) => {
42
- const combinedInput = input as z.infer<typeof schema>;
43
-
44
27
  try {
45
- if (!combinedInput.documentSource) {
28
+ if (!input.documentSource) {
46
29
  throw new Error('documentSource is required - provide a URL or file path');
47
30
  }
48
31
 
49
- const isDocumentUrl = isUrl(combinedInput.documentSource);
50
- let rawContent: string;
32
+ const node = await agent.getCurrentNode(state.sessionId);
33
+ const appToolNode = node?.type === NodeType.APP_TOOL ? node : undefined;
51
34
 
52
35
  // Get processing mode from environment variable
53
- const processingMode = process.env.DOCUMENT_PROCESSING_MODE === 'local' ? 'local' : 'managed';
54
-
55
- if (processingMode === 'managed') {
56
- logger.info({
57
- msg: 'Using managed document processing',
58
- sessionId: state.sessionId,
59
- sourceType: isDocumentUrl ? 'url' : 'path',
60
- });
61
-
62
- const result = await processManagedDocument({
63
- sessionId: state.sessionId,
64
- documentSource: combinedInput.documentSource,
65
- isUrl: isDocumentUrl,
66
- });
67
-
68
- rawContent = result.rawContent;
69
- } else {
70
- logger.info({
71
- msg: 'Using local document processing',
72
- sessionId: state.sessionId,
73
- sourceType: isDocumentUrl ? 'url' : 'path',
74
- });
75
-
76
- const extractionOptions: Parameters<typeof extractFromDocument>[0] = isDocumentUrl
77
- ? { documentUrl: combinedInput.documentSource }
78
- : { documentPath: combinedInput.documentSource };
79
-
80
- const localResult = await extractFromDocument(extractionOptions);
81
- rawContent = localResult.data as string;
82
- }
83
-
84
- // Process with LLM if needed
85
- let finalData: any = rawContent;
86
-
87
- if (!combinedInput.extractRaw && agent.llm) {
88
- const llmResult = await extractFromDocument({
89
- documentContent: rawContent,
90
- llm: agent.llm,
91
- schema: combinedInput.schema,
92
- systemPrompt: combinedInput.systemPrompt || undefined,
93
- });
94
-
95
- finalData = llmResult.data;
96
- }
36
+ const processingMode =
37
+ process.env.DOCUMENT_PROCESSING_MODE === 'local' ? DocumentProcessingMode.LOCAL : DocumentProcessingMode.MANAGED;
38
+ const llamaCloudApiKey: string | undefined = process.env.LLAMA_CLOUD_API_KEY;
39
+ const outputSchema =
40
+ input.returnStructuredOutput && agent.llm && appToolNode && appToolNode.outputSchema?.length
41
+ ? createZodSchemaFromFields(appToolNode.outputSchema)
42
+ : undefined;
43
+
44
+ const result = await parseDocumentAndExtractStructuredData({
45
+ documentSource: input.documentSource,
46
+ processingMode,
47
+ llamaCloudApiKey,
48
+ sessionId: state.sessionId,
49
+ llm: agent.llm,
50
+ returnStructuredOutput: input.returnStructuredOutput,
51
+ outputSchema: outputSchema,
52
+ outputSchemaPrompt: appToolNode?.prompt,
53
+ });
97
54
 
98
55
  state.memory.lastParsedDocument = {
99
- source: combinedInput.documentSource,
56
+ source: input.documentSource,
100
57
  extractedAt: new Date().toISOString(),
101
- extractedRaw: !!combinedInput.extractRaw,
58
+ returnStructuredOutput: input.returnStructuredOutput,
102
59
  processingMode,
60
+ structuredContent: result.structuredContent,
103
61
  };
104
62
 
105
63
  return {
106
- result: finalData,
64
+ result: result.structuredContent ?? result.rawContent,
107
65
  };
108
66
  } catch (err) {
109
67
  logger.error({
@@ -113,6 +71,7 @@ const parseDocumentTool: Tool<typeof schema, any> = {
113
71
  });
114
72
 
115
73
  state.memory.documentParsingError = err instanceof Error ? err.message : String(err);
74
+
116
75
  return {
117
76
  result: `Failed to parse document: ${err instanceof Error ? err.message : String(err)}`,
118
77
  };
@@ -120,131 +79,4 @@ const parseDocumentTool: Tool<typeof schema, any> = {
120
79
  },
121
80
  };
122
81
 
123
- /**
124
- * Process document using managed backend service
125
- */
126
- async function processManagedDocument(params: {
127
- sessionId: string;
128
- documentSource: string;
129
- isUrl: boolean;
130
- }): Promise<{ rawContent: string; metadata: any }> {
131
- const { sessionId, documentSource, isUrl: isDocumentUrl } = params;
132
-
133
- let uploadId: string | undefined;
134
- let documentUrl: string | undefined;
135
-
136
- if (isDocumentUrl) {
137
- documentUrl = documentSource;
138
- } else {
139
- // Upload file for processing
140
- logger.info({
141
- msg: 'Uploading document for processing',
142
- sessionId,
143
- });
144
-
145
- uploadId = await uploadDocumentForProcessing({
146
- sessionId,
147
- documentPath: documentSource,
148
- });
149
- }
150
-
151
- logger.info({
152
- msg: 'Requesting document processing via socket',
153
- sessionId,
154
- hasUrl: !!documentUrl,
155
- hasUploadId: !!uploadId,
156
- });
157
-
158
- // Use 2-minute timeout and maxRetries=1 for document processing
159
- // No retries on timeout to avoid duplicate processing
160
- const response = await mindedConnection.awaitEmit<DocumentProcessRequest, DocumentProcessResponse>(
161
- mindedConnectionSocketMessageType.DOCUMENT_PROCESS,
162
- {
163
- type: mindedConnectionSocketMessageType.DOCUMENT_PROCESS,
164
- sessionId,
165
- documentUrl,
166
- uploadId,
167
- },
168
- 180000, // 2 minutes timeout
169
- 1, // No retries on timeout to prevent duplicate processing
170
- );
171
-
172
- if (!response.success) {
173
- throw new Error(response.error || 'Failed to process document on backend');
174
- }
175
-
176
- return {
177
- rawContent: response.rawContent || '',
178
- metadata: response.metadata,
179
- };
180
- }
181
-
182
- /**
183
- * Upload document for processing
184
- */
185
- async function uploadDocumentForProcessing(params: { sessionId: string; documentPath: string }): Promise<string> {
186
- const { sessionId, documentPath } = params;
187
-
188
- const fileBuffer = fs.readFileSync(documentPath);
189
- const originalFileName = path.basename(documentPath);
190
- const fileSize = fileBuffer.length;
191
-
192
- logger.info({
193
- msg: 'Requesting upload URL for document',
194
- sessionId,
195
- fileName: originalFileName,
196
- fileSize,
197
- });
198
-
199
- const urlResponse = await mindedConnection.awaitEmit<FileUploadRequest, FileUploadResponse>(
200
- mindedConnectionSocketMessageType.UPLOAD_FILE_REQUEST,
201
- {
202
- type: mindedConnectionSocketMessageType.UPLOAD_FILE_REQUEST,
203
- sessionId,
204
- fileName: originalFileName,
205
- fileSize,
206
- uploadType: FileUploadType.DOCUMENT_PROCESSING,
207
- },
208
- 30000,
209
- );
210
-
211
- if (!urlResponse.success || !urlResponse.uploadUrl || !urlResponse.uploadId) {
212
- throw new Error(urlResponse.error || 'Failed to generate upload URL');
213
- }
214
-
215
- logger.info({
216
- msg: 'Received upload URL, uploading document',
217
- sessionId,
218
- uploadId: urlResponse.uploadId,
219
- expiresIn: urlResponse.expiresIn,
220
- });
221
-
222
- const urlObj = new URL(urlResponse.uploadUrl);
223
- const signedHeaders = urlObj.searchParams.get('X-Amz-SignedHeaders');
224
- const headers: Record<string, string> = {};
225
-
226
- if (signedHeaders?.includes('x-amz-server-side-encryption')) {
227
- headers['x-amz-server-side-encryption'] = 'aws:kms';
228
- }
229
-
230
- const uploadResponse = await fetch(urlResponse.uploadUrl, {
231
- method: 'PUT',
232
- headers,
233
- body: new Uint8Array(fileBuffer),
234
- });
235
-
236
- if (!uploadResponse.ok) {
237
- const errorText = await uploadResponse.text();
238
- throw new Error(`Failed to upload document: ${uploadResponse.status} - ${errorText}`);
239
- }
240
-
241
- logger.info({
242
- msg: 'Successfully uploaded document',
243
- sessionId,
244
- uploadId: urlResponse.uploadId,
245
- });
246
-
247
- return urlResponse.uploadId;
248
- }
249
-
250
82
  export default parseDocumentTool;
@@ -1,4 +1,4 @@
1
- import { Browser, BrowserContext, chromium, Page } from 'playwright';
1
+ import { Browser, BrowserContext, chromium, Locator, Page } from 'playwright';
2
2
  import { BrowserTaskMode } from '../browserTask/types';
3
3
  import { createBrowserSession, destroyBrowserSession } from '../browserTask/executeBrowserTask';
4
4
  import { logger } from '../utils/logger';
@@ -170,6 +170,50 @@ class LogsCapture {
170
170
  }
171
171
  }
172
172
 
173
+ /**
174
+ * Create a proxy locator that intercepts actions and captures screenshots and logs
175
+ */
176
+ const createProxyLocator = (
177
+ locator: Locator,
178
+ page: Page,
179
+ screenshotCapture: ScreenshotCapture,
180
+ logsCapture: LogsCapture,
181
+ selector?: string,
182
+ ): Locator => {
183
+ // List of methods to intercept on locators
184
+ const locatorMethodsToIntercept = ['click', 'fill', 'type', 'selectOption', 'check', 'uncheck', 'setInputFiles'];
185
+
186
+ const proxyLocator = new Proxy(locator, {
187
+ get(target, prop, receiver) {
188
+ const originalValue = Reflect.get(target, prop, receiver);
189
+
190
+ // If it's one of the methods we want to intercept
191
+ if (typeof prop === 'string' && locatorMethodsToIntercept.includes(prop)) {
192
+ return async function (this: any, ...args: any[]) {
193
+ // Log the action (this will trigger immediate upload)
194
+ const actionDescription = formatLocatorActionLog(prop, selector, args);
195
+ await logsCapture.addLog(actionDescription);
196
+
197
+ // Capture "before" screenshot
198
+ await screenshotCapture.captureScreenshot(page, 'before');
199
+
200
+ // Call the original method
201
+ const result = await originalValue.apply(target, args);
202
+
203
+ // Capture "after" screenshot
204
+ await screenshotCapture.captureScreenshot(page, 'after');
205
+
206
+ return result;
207
+ };
208
+ }
209
+
210
+ return originalValue;
211
+ },
212
+ });
213
+
214
+ return proxyLocator;
215
+ };
216
+
173
217
  /**
174
218
  * Create a proxy page that intercepts actions and captures screenshots and logs
175
219
  */
@@ -179,13 +223,25 @@ const createProxyPage = (
179
223
  logsCapture: LogsCapture,
180
224
  ): Page => {
181
225
  // List of methods to intercept for screenshot capture and logging
182
- const methodsToIntercept = ['click', 'fill', 'type', 'goto', 'selectOption', 'check', 'uncheck', 'setInputFiles'];
226
+ const methodsToIntercept = ['click', 'fill', 'type', 'goto', 'selectOption', 'check', 'uncheck', 'setInputFiles', 'waitForTimeout', 'evaluate'];
227
+ // Methods that return locators and need special handling
228
+ const locatorMethods = ['locator'];
183
229
 
184
230
  const proxyPage = new Proxy(page, {
185
231
  get(target, prop, receiver) {
186
232
  const originalValue = Reflect.get(target, prop, receiver);
187
233
 
188
- // If it's one of the methods we want to intercept
234
+ // Handle locator method - return a proxied locator
235
+ if (typeof prop === 'string' && locatorMethods.includes(prop)) {
236
+ return function (this: any, ...args: any[]) {
237
+ const locator = originalValue.apply(target, args);
238
+ // Extract selector from args if available (first argument is usually the selector)
239
+ const selector = args[0] && typeof args[0] === 'string' ? args[0] : undefined;
240
+ return createProxyLocator(locator, target, screenshotCapture, logsCapture, selector);
241
+ };
242
+ }
243
+
244
+ // If it's one of the methods we want to intercept with screenshots
189
245
  if (typeof prop === 'string' && methodsToIntercept.includes(prop)) {
190
246
  return async function (this: any, ...args: any[]) {
191
247
  // Log the action (this will trigger immediate upload)
@@ -216,7 +272,6 @@ const createProxyPage = (
216
272
  * Format action logs for display
217
273
  */
218
274
  const formatActionLog = (action: string, args: any[]): string => {
219
-
220
275
  switch (action) {
221
276
  case 'goto':
222
277
  return `Navigate to: ${args[0]}`;
@@ -234,11 +289,41 @@ const formatActionLog = (action: string, args: any[]): string => {
234
289
  return `Uncheck: ${args[0]}`;
235
290
  case 'setInputFiles':
236
291
  return `Set files for "${args[0]}"`;
292
+ case 'waitForTimeout':
293
+ return `Wait for timeout: ${args[0]}ms`;
294
+ case 'evaluate':
295
+ return `Evaluate: ${typeof args[0] === 'function' ? 'function' : String(args[0]).substring(0, 100)}`;
237
296
  default:
238
297
  return `${action}: ${JSON.stringify(args[0])}`;
239
298
  }
240
299
  };
241
300
 
301
+ /**
302
+ * Format locator action logs for display
303
+ */
304
+ const formatLocatorActionLog = (action: string, selector: string | undefined, args: any[]): string => {
305
+ const locatorDescription = selector || 'locator';
306
+
307
+ switch (action) {
308
+ case 'click':
309
+ return `Click locator: ${locatorDescription}`;
310
+ case 'fill':
311
+ return `Fill locator "${locatorDescription}" with: ${args[0]}`;
312
+ case 'type':
313
+ return `Type in locator "${locatorDescription}": ${args[0]}`;
314
+ case 'selectOption':
315
+ return `Select option in locator "${locatorDescription}": ${JSON.stringify(args[0])}`;
316
+ case 'check':
317
+ return `Check locator: ${locatorDescription}`;
318
+ case 'uncheck':
319
+ return `Uncheck locator: ${locatorDescription}`;
320
+ case 'setInputFiles':
321
+ return `Set files for locator "${locatorDescription}"`;
322
+ default:
323
+ return `Locator ${action}: ${JSON.stringify(args[0])}`;
324
+ }
325
+ };
326
+
242
327
  // Helper function to load cookies for a session
243
328
  const loadCookiesForSession = async (context: BrowserContext, sessionId: string): Promise<void> => {
244
329
  const stored = await cookieStore.load();
@@ -250,6 +250,7 @@ export interface AppToolNode extends BaseNode, BaseAppNode {
250
250
  actionName: string;
251
251
  metadata: AppNodeMetadata;
252
252
  actionKey: string;
253
+ outputSchema?: OutputSchemaField[];
253
254
  }
254
255
 
255
256
  export interface AppToolNodeMindedMetadata {
@@ -54,7 +54,7 @@ export function createZodSchemaFromFields(
54
54
 
55
55
  // Handle optional fields
56
56
  if (field.required === false) {
57
- fieldSchema = fieldSchema.optional();
57
+ fieldSchema = fieldSchema.optional().nullable();
58
58
  }
59
59
 
60
60
  schemaFields[field.name] = fieldSchema;