@minded-ai/mindedjs 2.0.7 → 2.0.8-beta-2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/dist/browserTask/README.md +419 -0
  2. package/dist/browserTask/browserAgent.py +632 -0
  3. package/dist/browserTask/captcha_isolated.png +0 -0
  4. package/dist/browserTask/executeBrowserTask.d.ts +12 -3
  5. package/dist/browserTask/executeBrowserTask.d.ts.map +1 -1
  6. package/dist/browserTask/executeBrowserTask.js +35 -3
  7. package/dist/browserTask/executeBrowserTask.js.map +1 -1
  8. package/dist/browserTask/executeBrowserTask.py +42 -0
  9. package/dist/browserTask/executeBrowserTask.ts +79 -0
  10. package/dist/browserTask/localBrowserTask.d.ts +21 -0
  11. package/dist/browserTask/localBrowserTask.d.ts.map +1 -0
  12. package/dist/browserTask/localBrowserTask.js +229 -0
  13. package/dist/browserTask/localBrowserTask.js.map +1 -0
  14. package/dist/browserTask/requirements.txt +8 -0
  15. package/dist/browserTask/setup.sh +144 -0
  16. package/dist/cli/index.js +0 -0
  17. package/dist/index.d.ts +2 -2
  18. package/dist/index.d.ts.map +1 -1
  19. package/dist/index.js +2 -1
  20. package/dist/index.js.map +1 -1
  21. package/dist/internalTools/retell.d.ts +12 -0
  22. package/dist/internalTools/retell.d.ts.map +1 -0
  23. package/dist/internalTools/retell.js +54 -0
  24. package/dist/internalTools/retell.js.map +1 -0
  25. package/dist/internalTools/sendPlaceholderMessage.d.ts +14 -0
  26. package/dist/internalTools/sendPlaceholderMessage.d.ts.map +1 -0
  27. package/dist/internalTools/sendPlaceholderMessage.js +61 -0
  28. package/dist/internalTools/sendPlaceholderMessage.js.map +1 -0
  29. package/dist/nodes/addBrowserTaskNode.d.ts.map +1 -1
  30. package/dist/nodes/addBrowserTaskNode.js +6 -1
  31. package/dist/nodes/addBrowserTaskNode.js.map +1 -1
  32. package/dist/nodes/addBrowserTaskRunNode.d.ts.map +1 -1
  33. package/dist/nodes/addBrowserTaskRunNode.js +1 -1
  34. package/dist/nodes/addBrowserTaskRunNode.js.map +1 -1
  35. package/dist/nodes/addRpaNode.d.ts +18 -0
  36. package/dist/nodes/addRpaNode.d.ts.map +1 -0
  37. package/dist/nodes/addRpaNode.js +251 -0
  38. package/dist/nodes/addRpaNode.js.map +1 -0
  39. package/dist/nodes/nodeFactory.d.ts.map +1 -1
  40. package/dist/nodes/nodeFactory.js +4 -0
  41. package/dist/nodes/nodeFactory.js.map +1 -1
  42. package/dist/types/Flows.types.d.ts +47 -2
  43. package/dist/types/Flows.types.d.ts.map +1 -1
  44. package/dist/types/Flows.types.js +13 -1
  45. package/dist/types/Flows.types.js.map +1 -1
  46. package/dist/utils/extractStateMemoryResponse.d.ts +5 -0
  47. package/dist/utils/extractStateMemoryResponse.d.ts.map +1 -0
  48. package/dist/utils/extractStateMemoryResponse.js +91 -0
  49. package/dist/utils/extractStateMemoryResponse.js.map +1 -0
  50. package/package.json +5 -2
  51. package/src/browserTask/executeBrowserTask.py +42 -0
  52. package/src/browserTask/executeBrowserTask.ts +36 -2
  53. package/src/browserTask/localBrowserTask.ts +250 -0
  54. package/src/index.ts +3 -0
  55. package/src/nodes/addBrowserTaskNode.ts +7 -2
  56. package/src/nodes/addBrowserTaskRunNode.ts +1 -0
  57. package/src/nodes/addRpaNode.ts +289 -0
  58. package/src/nodes/nodeFactory.ts +4 -0
  59. package/src/types/Flows.types.ts +49 -1
@@ -54,7 +54,7 @@ export const addBrowserTaskNode = async ({ graph, node, agent, llm }: AddBrowser
54
54
  const zodSchema = z.object(schemaFields);
55
55
 
56
56
  // Create langchain tool
57
- const tool = langchainTool(() => {}, {
57
+ const tool = langchainTool(() => { }, {
58
58
  name: 'browser-task',
59
59
  description: node.prompt,
60
60
  schema: zodSchema,
@@ -100,7 +100,12 @@ ${compiledPrompt}
100
100
  ${Object.keys(inputParams).length > 0 ? `# Input parameters:\n${JSON.stringify(inputParams, null, 2)}\n\n` : ''}`;
101
101
 
102
102
  // Create browser session using socket
103
- const session = await createBrowserSession(node.proxy, node.onPrem);
103
+ const session = await createBrowserSession({
104
+ sessionId: state.sessionId,
105
+ proxy: node.proxy,
106
+ onPrem: node.onPrem,
107
+ localRun: node.localRun,
108
+ });
104
109
 
105
110
  if (!session.sessionId || !session.cdpUrl) {
106
111
  throw new Error('Failed to create browser session: missing session details');
@@ -57,6 +57,7 @@ export const addBrowserTaskRunNode = async ({ graph, browserTaskNode, attachedTo
57
57
  keepAlive,
58
58
  hooks,
59
59
  browserTaskNode.onPrem,
60
+ browserTaskNode.localRun,
60
61
  toolSchemas,
61
62
  outputSchema,
62
63
  );
@@ -0,0 +1,289 @@
1
+ import { RunnableLike } from '@langchain/core/runnables';
2
+ import { NodeType, RpaNode, RpaActionType } from '../types/Flows.types';
3
+ import { PreCompiledGraph, stateAnnotation } from '../types/LangGraph.types';
4
+ import { Tool } from '../types/Tools.types';
5
+ import { AgentEventRequestPayloads } from '../events/AgentEvents';
6
+ import { EmitSignature, HistoryStep } from '../types/Agent.types';
7
+ import { Agent } from '../agent';
8
+ import { logger } from '../utils/logger';
9
+ import { createHistoryStep } from '../utils/history';
10
+ import { chromium, Browser, Page } from 'playwright';
11
+ import { LLMProviders } from '../types/LLM.types';
12
+
13
+ type AddRpaNodeParams = {
14
+ graph: PreCompiledGraph;
15
+ node: RpaNode;
16
+ tools: Tool<any, any>[];
17
+ emit: EmitSignature<any, keyof AgentEventRequestPayloads<any>>;
18
+ agent: Agent;
19
+ llm: (typeof LLMProviders)[keyof typeof LLMProviders];
20
+ };
21
+
22
+ export const addRpaNode = async ({ graph, node, tools, emit, agent, llm }: AddRpaNodeParams) => {
23
+ const callback: RunnableLike = async (state: typeof stateAnnotation.State) => {
24
+ await agent.interruptSessionManager.checkQueueAndInterrupt(state.sessionId);
25
+ logger.info({ msg: `[Node] Executing RPA node`, node: node.displayName, sessionId: state.sessionId });
26
+
27
+ let browser: Browser | null = null;
28
+ let page: Page | null = null;
29
+
30
+ try {
31
+ // Get CDP URL from state
32
+ const cdpUrl = state.cdpUrl;
33
+ if (!cdpUrl) {
34
+ throw new Error('CDP URL not found in state. Make sure a browser session is available.');
35
+ }
36
+
37
+ logger.debug({
38
+ msg: '[RPA] Connecting to browser via CDP',
39
+ cdpUrl,
40
+ sessionId: state.sessionId,
41
+ node: node.displayName,
42
+ });
43
+
44
+ // Connect to existing browser via CDP
45
+ browser = await chromium.connectOverCDP(cdpUrl);
46
+ const contexts = browser.contexts();
47
+ if (contexts.length === 0) {
48
+ throw new Error('No browser contexts found');
49
+ }
50
+
51
+ // Get the first page or create a new one
52
+ const pages = contexts[0].pages();
53
+ page = pages.length > 0 ? pages[0] : await contexts[0].newPage();
54
+
55
+ // Set viewport if specified
56
+ if (node.viewport) {
57
+ await page.setViewportSize(node.viewport);
58
+ }
59
+
60
+ // Execute each step
61
+ const results = [];
62
+ for (const [index, step] of node.steps.entries()) {
63
+ logger.debug({
64
+ msg: '[RPA] Executing step',
65
+ stepIndex: index + 1,
66
+ stepType: step.type,
67
+ sessionId: state.sessionId,
68
+ node: node.displayName,
69
+ });
70
+
71
+ try {
72
+ const result = await executeRpaStep(page, step, llm);
73
+ results.push({
74
+ stepIndex: index + 1,
75
+ type: step.type,
76
+ success: true,
77
+ result,
78
+ });
79
+ } catch (stepError) {
80
+ logger.error({
81
+ msg: '[RPA] Step execution failed',
82
+ stepIndex: index + 1,
83
+ stepType: step.type,
84
+ error: stepError instanceof Error ? stepError.message : 'Unknown error',
85
+ sessionId: state.sessionId,
86
+ node: node.displayName,
87
+ });
88
+
89
+ results.push({
90
+ stepIndex: index + 1,
91
+ type: step.type,
92
+ success: false,
93
+ error: stepError instanceof Error ? stepError.message : 'Unknown error',
94
+ });
95
+
96
+ // Stop execution on error unless configured otherwise
97
+ throw stepError;
98
+ }
99
+ }
100
+
101
+ // Update history with RPA execution results
102
+ state.history.push(
103
+ createHistoryStep<HistoryStep>(state.history, {
104
+ type: NodeType.RPA,
105
+ nodeId: node.name,
106
+ nodeDisplayName: node.displayName,
107
+ raw: {
108
+ steps: node.steps,
109
+ results,
110
+ viewport: node.viewport,
111
+ },
112
+ messageIds: [],
113
+ }),
114
+ );
115
+
116
+ // Clear goto to allow natural flow progression
117
+ state.goto = null;
118
+
119
+ // Check for interrupts after RPA execution
120
+ await agent.interruptSessionManager.checkQueueAndInterrupt(state.sessionId, state);
121
+
122
+ return state;
123
+ } catch (error) {
124
+ logger.error({
125
+ msg: '[RPA] Error executing RPA node',
126
+ error: error instanceof Error ? error.message : 'Unknown error',
127
+ sessionId: state.sessionId,
128
+ node: node.displayName,
129
+ });
130
+
131
+ throw error;
132
+ } finally {
133
+ // Note: We don't close the browser as it's connected via CDP
134
+ // The browser session should remain active for other operations
135
+ }
136
+ };
137
+
138
+ graph.addNode(node.name, callback);
139
+ };
140
+
141
+ // Helper function to execute individual RPA steps
142
+ async function executeRpaStep(page: Page, step: any, llm?: (typeof LLMProviders)[keyof typeof LLMProviders]): Promise<any> {
143
+ switch (step.type) {
144
+ case RpaActionType.CLICK:
145
+ if (step.xpath) {
146
+ await page.locator(`xpath=${step.xpath}`).click({ timeout: 30000 });
147
+ } else if (step.selector) {
148
+ await page.click(step.selector, { timeout: 30000 });
149
+ }
150
+ return { action: 'clicked' };
151
+
152
+ case RpaActionType.TYPE:
153
+ if (step.shouldReplaceExistingText) {
154
+ if (step.xpath) {
155
+ await page.locator(`xpath=${step.xpath}`).fill(step.text || '');
156
+ } else if (step.selector) {
157
+ await page.fill(step.selector, step.text || '');
158
+ }
159
+ } else {
160
+ if (step.xpath) {
161
+ await page.locator(`xpath=${step.xpath}`).type(step.text || '');
162
+ } else if (step.selector) {
163
+ await page.type(step.selector, step.text || '');
164
+ }
165
+ }
166
+ return { action: 'typed', text: step.text };
167
+
168
+ case RpaActionType.WAIT:
169
+ await page.waitForTimeout(step.waitTime || 1000);
170
+ return { action: 'waited', duration: step.waitTime };
171
+
172
+ case RpaActionType.GOTO:
173
+ await page.goto(step.url || '', { waitUntil: 'networkidle' });
174
+ return { action: 'navigated', url: step.url };
175
+
176
+ case RpaActionType.PRESS:
177
+ await page.keyboard.press(step.key || 'Enter');
178
+ return { action: 'pressed', key: step.key };
179
+
180
+ case RpaActionType.SELECT:
181
+ if (step.xpath) {
182
+ await page.locator(`xpath=${step.xpath}`).selectOption(step.value || '');
183
+ } else if (step.selector) {
184
+ await page.selectOption(step.selector, step.value || '');
185
+ }
186
+ return { action: 'selected', value: step.value };
187
+
188
+ case RpaActionType.SCREENSHOT:
189
+ const screenshot = await page.screenshot({ type: 'png' });
190
+ return {
191
+ action: 'screenshot',
192
+ description: step.description,
193
+ data: screenshot.toString('base64'),
194
+ };
195
+
196
+ case RpaActionType.EXTRACT_DATA: {
197
+ if (!llm) {
198
+ throw new Error('LLM is required for EXTRACT_DATA action');
199
+ }
200
+
201
+ // Get the page content for extraction
202
+ const pageContent = await page.content();
203
+ const url = page.url();
204
+
205
+ // Prepare the output schema description for the LLM
206
+ const schemaDescription = step.outputSchema
207
+ ?.map((field: any) => {
208
+ let desc = `- ${field.name} (${field.type})`;
209
+ if (field.description) desc += `: ${field.description}`;
210
+ if (field.required) desc += ' [REQUIRED]';
211
+ return desc;
212
+ })
213
+ .join('\n');
214
+
215
+ // Create a JSON schema for structured output
216
+ const jsonSchema: any = {
217
+ type: 'object',
218
+ properties: {},
219
+ required: [],
220
+ };
221
+
222
+ step.outputSchema?.forEach((field: any) => {
223
+ const fieldSchema: any = { type: field.type };
224
+ if (field.description) fieldSchema.description = field.description;
225
+
226
+ if (field.type === 'array' && field.items) {
227
+ fieldSchema.items = field.items;
228
+ } else if (field.type === 'object' && field.properties) {
229
+ fieldSchema.properties = field.properties;
230
+ }
231
+
232
+ jsonSchema.properties[field.name] = fieldSchema;
233
+ if (field.required) {
234
+ jsonSchema.required.push(field.name);
235
+ }
236
+ });
237
+
238
+ const prompt = `Extract data from the following web page according to the specified schema.
239
+
240
+ URL: ${url}
241
+
242
+ OUTPUT SCHEMA:
243
+ ${schemaDescription}
244
+
245
+ WEB PAGE CONTENT:
246
+ ${pageContent}
247
+
248
+ Extract the requested data and return it as a JSON object matching the schema. If a field cannot be found or extracted, use null for optional fields or provide a reasonable default for required fields.`;
249
+
250
+ try {
251
+ // Use the LLM to extract data with structured output
252
+ const messages = [
253
+ {
254
+ role: 'user' as const,
255
+ content: prompt,
256
+ },
257
+ ];
258
+
259
+ const response = await llm.invoke(messages, {
260
+ response_format: {
261
+ type: 'json_object' as const,
262
+ schema: jsonSchema,
263
+ },
264
+ temperature: 0.1,
265
+ model: 'gpt-4o-mini',
266
+ });
267
+
268
+ // Parse the response content
269
+ const extractedData = typeof response.content === 'string' ? JSON.parse(response.content) : response.content;
270
+
271
+ return {
272
+ action: 'extract_data',
273
+ url,
274
+ data: extractedData,
275
+ };
276
+ } catch (error) {
277
+ logger.error({
278
+ msg: '[RPA] Failed to extract data',
279
+ error: error instanceof Error ? error.message : 'Unknown error',
280
+ url,
281
+ });
282
+ throw error;
283
+ }
284
+ }
285
+
286
+ default:
287
+ throw new Error(`Unknown RPA action type: ${step.type}`);
288
+ }
289
+ }
@@ -12,6 +12,7 @@ import { Agent } from '../agent';
12
12
  import { addJumpToNode } from './addJumpToNode';
13
13
  import { addJunctionNode } from './addJunctionNode';
14
14
  import { addBrowserTaskNode } from './addBrowserTaskNode';
15
+ import { addRpaNode } from './addRpaNode';
15
16
 
16
17
  export const nodeFactory = ({
17
18
  graph,
@@ -51,6 +52,9 @@ export const nodeFactory = ({
51
52
  case NodeType.BROWSER_TASK:
52
53
  addBrowserTaskNode({ graph, node, agent, llm });
53
54
  break;
55
+ case NodeType.RPA:
56
+ addRpaNode({ graph, node, tools, emit, agent, llm });
57
+ break;
54
58
  default:
55
59
  throw new Error(`Unsupported node type: ${nodeType}`);
56
60
  }
@@ -12,6 +12,7 @@ export enum NodeType {
12
12
  PROMPT_NODE = 'promptNode',
13
13
  JUMP_TO_NODE = 'jumpToNode',
14
14
  BROWSER_TASK = 'browserTask',
15
+ RPA = 'rpa',
15
16
  }
16
17
 
17
18
  export enum EdgeType {
@@ -121,6 +122,53 @@ export interface BrowserTaskNode extends BaseNode {
121
122
  proxy?: string; // 2-digit country code like 'IL'
122
123
  hooks?: { name: string }[]; // Array of hooks to be passed to the browser-use lambda
123
124
  onPrem?: boolean;
125
+ localRun?: boolean;
126
+ }
127
+
128
+ export enum RpaActionType {
129
+ CLICK = 'click',
130
+ TYPE = 'type',
131
+ WAIT = 'wait',
132
+ SCREENSHOT = 'screenshot',
133
+ SELECT = 'select',
134
+ PRESS = 'press',
135
+ GOTO = 'goto',
136
+ EXTRACT_DATA = 'extract_data',
137
+ }
138
+
139
+ export interface RpaStep {
140
+ id: string;
141
+ type: RpaActionType;
142
+ xpath?: string;
143
+ selector?: string;
144
+ text?: string;
145
+ value?: string;
146
+ key?: string;
147
+ url?: string;
148
+ shouldReplaceExistingText?: boolean;
149
+ waitTime?: number;
150
+ description?: string;
151
+ outputSchema?: {
152
+ name: string;
153
+ type: 'string' | 'number' | 'boolean' | 'array' | 'object';
154
+ description?: string;
155
+ required?: boolean;
156
+ items?: {
157
+ type: 'string' | 'number' | 'boolean' | 'object';
158
+ properties?: any;
159
+ };
160
+ properties?: any;
161
+ }[];
162
+ }
163
+
164
+ export interface RpaNode extends BaseNode {
165
+ type: NodeType.RPA;
166
+ steps: RpaStep[];
167
+ timeout?: number;
168
+ viewport?: {
169
+ width: number;
170
+ height: number;
171
+ };
124
172
  }
125
173
 
126
174
  export type TriggerNode = AppTriggerNode | WebhookTriggerNode | ManualTriggerNode | VoiceTriggerNode | InterfaceTriggerNode;
@@ -145,7 +193,7 @@ export interface AppToolNode extends BaseNode, BaseAppNode {
145
193
  actionKey: string;
146
194
  }
147
195
 
148
- export type Node = TriggerNode | JunctionNode | ToolNode | AppToolNode | PromptNode | JumpToNode | BrowserTaskNode;
196
+ export type Node = TriggerNode | JunctionNode | ToolNode | AppToolNode | PromptNode | JumpToNode | BrowserTaskNode | RpaNode;
149
197
 
150
198
  export interface BaseEdge {
151
199
  source: string;