@minded-ai/mindedjs 2.0.9 → 2.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/dist/agent.js +1 -1
  2. package/dist/agent.js.map +1 -1
  3. package/dist/edges/createPromptRouter.d.ts.map +1 -1
  4. package/dist/edges/createPromptRouter.js +1 -0
  5. package/dist/edges/createPromptRouter.js.map +1 -1
  6. package/dist/index.d.ts +2 -2
  7. package/dist/index.d.ts.map +1 -1
  8. package/dist/index.js +2 -1
  9. package/dist/index.js.map +1 -1
  10. package/dist/nodes/addAppToolNode.js +1 -1
  11. package/dist/nodes/addAppToolNode.js.map +1 -1
  12. package/dist/nodes/addBrowserTaskNode.d.ts.map +1 -1
  13. package/dist/nodes/addBrowserTaskNode.js +4 -25
  14. package/dist/nodes/addBrowserTaskNode.js.map +1 -1
  15. package/dist/nodes/addPromptNode.js +1 -1
  16. package/dist/nodes/addPromptNode.js.map +1 -1
  17. package/dist/nodes/addRpaNode.d.ts +18 -0
  18. package/dist/nodes/addRpaNode.d.ts.map +1 -0
  19. package/dist/nodes/addRpaNode.js +162 -0
  20. package/dist/nodes/addRpaNode.js.map +1 -0
  21. package/dist/nodes/addToolNode.js +1 -1
  22. package/dist/nodes/addToolNode.js.map +1 -1
  23. package/dist/nodes/nodeFactory.d.ts.map +1 -1
  24. package/dist/nodes/nodeFactory.js +4 -0
  25. package/dist/nodes/nodeFactory.js.map +1 -1
  26. package/dist/nodes/rpaStepsExecutor.d.ts +5 -0
  27. package/dist/nodes/rpaStepsExecutor.d.ts.map +1 -0
  28. package/dist/nodes/rpaStepsExecutor.js +156 -0
  29. package/dist/nodes/rpaStepsExecutor.js.map +1 -0
  30. package/dist/types/Flows.types.d.ts +41 -2
  31. package/dist/types/Flows.types.d.ts.map +1 -1
  32. package/dist/types/Flows.types.js +13 -1
  33. package/dist/types/Flows.types.js.map +1 -1
  34. package/dist/utils/schemaUtils.d.ts +15 -0
  35. package/dist/utils/schemaUtils.d.ts.map +1 -0
  36. package/dist/utils/schemaUtils.js +56 -0
  37. package/dist/utils/schemaUtils.js.map +1 -0
  38. package/package.json +2 -2
  39. package/src/agent.ts +1 -1
  40. package/src/edges/createPromptRouter.ts +6 -5
  41. package/src/index.ts +3 -0
  42. package/src/nodes/addAppToolNode.ts +1 -1
  43. package/src/nodes/addBrowserTaskNode.ts +5 -29
  44. package/src/nodes/addPromptNode.ts +1 -1
  45. package/src/nodes/addRpaNode.ts +199 -0
  46. package/src/nodes/addToolNode.ts +2 -2
  47. package/src/nodes/nodeFactory.ts +4 -0
  48. package/src/nodes/rpaStepsExecutor.ts +175 -0
  49. package/src/types/Flows.types.ts +43 -1
  50. package/src/utils/schemaUtils.ts +68 -0
@@ -0,0 +1,199 @@
1
+ import { RunnableLike } from '@langchain/core/runnables';
2
+ import { NodeType, RpaNode, RpaActionType } from '../types/Flows.types';
3
+ import { PreCompiledGraph, stateAnnotation } from '../types/LangGraph.types';
4
+ import { Tool } from '../types/Tools.types';
5
+ import { AgentEventRequestPayloads } from '../events/AgentEvents';
6
+ import { EmitSignature, HistoryStep } from '../types/Agent.types';
7
+ import { Agent } from '../agent';
8
+ import { logger } from '../utils/logger';
9
+ import { createHistoryStep } from '../utils/history';
10
+ import { chromium, Browser, Page } from 'playwright';
11
+ import { LLMProviders } from '../types/LLM.types';
12
+ import { AIMessage, ToolMessage } from '@langchain/core/messages';
13
+ import { v4 as uuidv4 } from 'uuid';
14
+ import { executeRpaStep } from './rpaStepsExecutor';
15
+
16
+ type AddRpaNodeParams = {
17
+ graph: PreCompiledGraph;
18
+ node: RpaNode;
19
+ tools: Tool<any, any>[];
20
+ emit: EmitSignature<any, keyof AgentEventRequestPayloads<any>>;
21
+ agent: Agent;
22
+ llm: (typeof LLMProviders)[keyof typeof LLMProviders];
23
+ };
24
+
25
+ export const addRpaNode = async ({ graph, node, agent, llm }: AddRpaNodeParams) => {
26
+ const callback: RunnableLike = async (state: typeof stateAnnotation.State) => {
27
+ await agent.interruptSessionManager.checkQueueAndInterrupt(state.sessionId);
28
+ logger.info({ msg: `[Node] Executing RPA node`, node: node.displayName, sessionId: state.sessionId });
29
+
30
+ let browser: Browser | null = null;
31
+ let page: Page | null = null;
32
+
33
+ // Create tool call for RPA execution
34
+ const toolCallId = uuidv4();
35
+ const aiMessageId = uuidv4();
36
+
37
+ // Get CDP URL from state
38
+ const cdpUrl = state.cdpUrl;
39
+
40
+ const toolCall = {
41
+ id: toolCallId,
42
+ name: 'rpa-task',
43
+ args: {
44
+ steps: node.steps,
45
+ },
46
+ };
47
+
48
+ // Create AI message with tool call
49
+ const aiMessage = new AIMessage({
50
+ id: aiMessageId,
51
+ content: '',
52
+ tool_calls: [toolCall],
53
+ additional_kwargs: {
54
+ mindedMetadata: {
55
+ nodeType: NodeType.RPA,
56
+ nodeDisplayName: node.displayName,
57
+ sessionId: state.sessionId,
58
+ cdpUrl: cdpUrl || undefined,
59
+ },
60
+ },
61
+ });
62
+
63
+ state.messages.push(aiMessage);
64
+
65
+ try {
66
+ if (!cdpUrl) {
67
+ throw new Error('CDP URL not found in state. Make sure a browser session is available.');
68
+ }
69
+
70
+ logger.debug({
71
+ msg: '[RPA] Connecting to browser via CDP',
72
+ cdpUrl,
73
+ sessionId: state.sessionId,
74
+ node: node.displayName,
75
+ });
76
+
77
+ // Connect to existing browser via CDP
78
+ browser = await chromium.connectOverCDP(cdpUrl);
79
+ const contexts = browser.contexts();
80
+ if (contexts.length === 0) {
81
+ throw new Error('No browser contexts found');
82
+ }
83
+
84
+ // Get the first page or create a new one
85
+ const pages = contexts[0].pages();
86
+ page = pages.length > 0 ? pages[0] : await contexts[0].newPage();
87
+
88
+ // Execute each step
89
+ const results = []; // Collect all extracted data
90
+
91
+ for (const [index, step] of node.steps.entries()) {
92
+ logger.debug({
93
+ msg: '[RPA] Executing step',
94
+ stepIndex: index + 1,
95
+ stepType: step.type,
96
+ sessionId: state.sessionId,
97
+ node: node.displayName,
98
+ });
99
+
100
+ try {
101
+ const result = await executeRpaStep(page, step, state, llm);
102
+
103
+ // Collect extracted data
104
+ if (step.type === RpaActionType.EXTRACT_DATA && result.data) {
105
+ results.push({
106
+ stepIndex: index + 1,
107
+ url: result.url,
108
+ data: result.data,
109
+ });
110
+ }
111
+ } catch (stepError) {
112
+ logger.error({
113
+ msg: '[RPA] Step execution failed',
114
+ stepIndex: index + 1,
115
+ stepType: step.type,
116
+ error: stepError instanceof Error ? stepError.message : 'Unknown error',
117
+ sessionId: state.sessionId,
118
+ node: node.displayName,
119
+ });
120
+
121
+ // Stop execution on error unless configured otherwise
122
+ throw stepError;
123
+ }
124
+ }
125
+
126
+ // Create tool message with results
127
+ const toolMessage = new ToolMessage({
128
+ id: toolCallId,
129
+ content: JSON.stringify({
130
+ result: results,
131
+ }),
132
+ name: 'rpa-task',
133
+ tool_call_id: toolCallId,
134
+ status: 'success',
135
+ });
136
+
137
+ // Update messages - remove old AI message and add updated one with tool message
138
+ state.messages.push(toolMessage);
139
+
140
+ // Update history with RPA execution results
141
+ state.history.push(
142
+ createHistoryStep<HistoryStep>(state.history, {
143
+ type: NodeType.RPA,
144
+ nodeId: node.name,
145
+ nodeDisplayName: node.displayName,
146
+ raw: {
147
+ steps: node.steps,
148
+ results,
149
+ },
150
+ messageIds: [aiMessageId, toolMessage.id!],
151
+ }),
152
+ );
153
+
154
+ return state;
155
+ } catch (error) {
156
+ logger.error({
157
+ msg: '[RPA] Error executing RPA node',
158
+ error: error instanceof Error ? error.message : 'Unknown error',
159
+ sessionId: state.sessionId,
160
+ node: node.displayName,
161
+ });
162
+
163
+ // Create error tool message
164
+ const errorToolMessage = new ToolMessage({
165
+ id: uuidv4(),
166
+ content: JSON.stringify({
167
+ error: error instanceof Error ? error.message : 'Unknown error',
168
+ }),
169
+ name: 'rpa-task',
170
+ tool_call_id: toolCallId,
171
+ status: 'error',
172
+ });
173
+
174
+ // Update messages - remove old AI message and add updated one with error tool message
175
+ state.messages.push(errorToolMessage);
176
+
177
+ // Update history with error
178
+ state.history.push(
179
+ createHistoryStep<HistoryStep>(state.history, {
180
+ type: NodeType.RPA,
181
+ nodeId: node.name,
182
+ nodeDisplayName: node.displayName,
183
+ raw: {
184
+ error: error instanceof Error ? error.message : 'Unknown error',
185
+ steps: node.steps,
186
+ },
187
+ messageIds: [aiMessageId, errorToolMessage.id!],
188
+ }),
189
+ );
190
+
191
+ return state;
192
+ } finally {
193
+ // Note: We don't close the browser as it's connected via CDP
194
+ // The browser session should remain active for other operations
195
+ }
196
+ };
197
+
198
+ graph.addNode(node.name, callback);
199
+ };
@@ -34,7 +34,7 @@ export const addToolNode = async ({
34
34
  await agent.interruptSessionManager.checkQueueAndInterrupt(state.sessionId);
35
35
  logger.debug({ msg: `[Node] Executing tool node`, node: toolNode.displayName });
36
36
 
37
- const tool = langchainTool(() => { }, {
37
+ const tool = langchainTool(() => {}, {
38
38
  name: matchedTool.name,
39
39
  description: matchedTool.description,
40
40
  schema: matchedTool.input,
@@ -52,7 +52,7 @@ export const addToolNode = async ({
52
52
  }
53
53
  }
54
54
  if (finalMessage) {
55
- const compiledPrompt = compilePrompt(finalMessage, { memory: state.memory, env: process.env });
55
+ const compiledPrompt = compilePrompt(finalMessage, { state: state, memory: state.memory, env: process.env });
56
56
  const systemMessage = new SystemMessage(compiledPrompt);
57
57
  if (state.messages.length === 0 || state.messages[0].getType() === 'system') {
58
58
  state.messages[0] = systemMessage;
@@ -12,6 +12,7 @@ import { Agent } from '../agent';
12
12
  import { addJumpToNode } from './addJumpToNode';
13
13
  import { addJunctionNode } from './addJunctionNode';
14
14
  import { addBrowserTaskNode } from './addBrowserTaskNode';
15
+ import { addRpaNode } from './addRpaNode';
15
16
 
16
17
  export const nodeFactory = ({
17
18
  graph,
@@ -51,6 +52,9 @@ export const nodeFactory = ({
51
52
  case NodeType.BROWSER_TASK:
52
53
  addBrowserTaskNode({ graph, node, agent, llm });
53
54
  break;
55
+ case NodeType.RPA:
56
+ addRpaNode({ graph, node, tools, emit, agent, llm });
57
+ break;
54
58
  default:
55
59
  throw new Error(`Unsupported node type: ${nodeType}`);
56
60
  }
@@ -0,0 +1,175 @@
1
+ import { Page } from 'playwright';
2
+ import { RpaActionType } from '../types/Flows.types';
3
+ import { LLMProviders } from '../types/LLM.types';
4
+ import { logger } from '../utils/logger';
5
+ import { z } from 'zod';
6
+ import { SystemMessage } from '@langchain/core/messages';
7
+ import { compilePrompt } from './compilePrompt';
8
+ import { stateAnnotation } from '../types/LangGraph.types';
9
+ import { createZodSchemaFromFields } from '../utils/schemaUtils';
10
+
11
+ // Helper function to execute individual RPA steps
12
+ export async function executeRpaStep(
13
+ page: Page,
14
+ step: any,
15
+ state: typeof stateAnnotation.State,
16
+ llm: (typeof LLMProviders)[keyof typeof LLMProviders],
17
+ ): Promise<any> {
18
+ // Create params object with memory and secrets
19
+ const params = {
20
+ env: process.env,
21
+ state: state,
22
+ };
23
+
24
+ switch (step.type) {
25
+ case RpaActionType.CLICK: {
26
+ // Compile selectors in case they contain placeholders
27
+ const compiledXpath = step.xpath ? compilePrompt(step.xpath, params) : undefined;
28
+ const compiledSelector = step.selector ? compilePrompt(step.selector, params) : undefined;
29
+
30
+ if (compiledXpath) {
31
+ await page.locator(`xpath=${compiledXpath}`).click({ timeout: 5000 });
32
+ } else if (compiledSelector) {
33
+ await page.click(compiledSelector, { timeout: 5000 });
34
+ }
35
+ return { action: 'clicked' };
36
+ }
37
+
38
+ case RpaActionType.TYPE: {
39
+ // Compile text and selectors
40
+ const compiledText = step.text ? compilePrompt(step.text, params) : '';
41
+ const compiledTypeXpath = step.xpath ? compilePrompt(step.xpath, params) : undefined;
42
+ const compiledTypeSelector = step.selector ? compilePrompt(step.selector, params) : undefined;
43
+
44
+ if (step.shouldReplaceExistingText) {
45
+ if (compiledTypeXpath) {
46
+ await page.locator(`xpath=${compiledTypeXpath}`).fill(compiledText, { timeout: 5000 });
47
+ } else if (compiledTypeSelector) {
48
+ await page.fill(compiledTypeSelector, compiledText, { timeout: 5000 });
49
+ }
50
+ } else {
51
+ if (compiledTypeXpath) {
52
+ await page.locator(`xpath=${compiledTypeXpath}`).type(compiledText, { timeout: 5000 });
53
+ } else if (compiledTypeSelector) {
54
+ await page.type(compiledTypeSelector, compiledText, { timeout: 5000 });
55
+ }
56
+ }
57
+ return { action: 'typed', text: compiledText };
58
+ }
59
+
60
+ case RpaActionType.WAIT:
61
+ await page.waitForTimeout(step.waitTime || 3000);
62
+ return { action: 'waited', duration: step.waitTime };
63
+
64
+ case RpaActionType.GOTO: {
65
+ // Compile URL
66
+ const compiledUrl = step.url ? compilePrompt(step.url, params) : '';
67
+ await page.goto(compiledUrl, { waitUntil: 'load', timeout: 5000 });
68
+ return { action: 'navigated', url: compiledUrl };
69
+ }
70
+
71
+ case RpaActionType.PRESS: {
72
+ // Compile key
73
+ const compiledKey = step.key ? compilePrompt(step.key, params) : 'Enter';
74
+ await page.keyboard.press(compiledKey);
75
+ return { action: 'pressed', key: compiledKey };
76
+ }
77
+
78
+ case RpaActionType.SELECT: {
79
+ // Compile value and selectors
80
+ const compiledValue = step.value ? compilePrompt(step.value, params) : '';
81
+ const compiledSelectXpath = step.xpath ? compilePrompt(step.xpath, params) : undefined;
82
+ const compiledSelectSelector = step.selector ? compilePrompt(step.selector, params) : undefined;
83
+
84
+ if (compiledSelectXpath) {
85
+ await page.locator(`xpath=${compiledSelectXpath}`).selectOption(compiledValue, { timeout: 5000 });
86
+ } else if (compiledSelectSelector) {
87
+ await page.selectOption(compiledSelectSelector, compiledValue, { timeout: 5000 });
88
+ }
89
+ return { action: 'selected', value: compiledValue };
90
+ }
91
+
92
+ case RpaActionType.SCREENSHOT: {
93
+ const screenshot = await page.screenshot({ type: 'png' });
94
+ // Compile description if provided
95
+ const compiledDescription = step.description ? compilePrompt(step.description, params) : undefined;
96
+ return {
97
+ action: 'screenshot',
98
+ description: compiledDescription,
99
+ data: screenshot.toString('base64'),
100
+ };
101
+ }
102
+
103
+ case RpaActionType.EXTRACT_DATA: {
104
+ if (!llm) {
105
+ throw new Error('LLM is required for EXTRACT_DATA action');
106
+ }
107
+
108
+ // Get the page content for extraction
109
+ const pageContent = await page.content();
110
+ const url = page.url();
111
+
112
+ // Convert outputSchema to Zod schema
113
+ const zodSchema = createZodSchemaFromFields(step.outputSchema, { result: z.string().describe('The extracted data') });
114
+
115
+ const prompt = `Extract data from the following web page according to the specified structure.
116
+ Extract the requested data and return it as a structured object.
117
+ WEB PAGE CONTENT:
118
+ ${pageContent}`;
119
+
120
+ try {
121
+ // Check if LLM supports structured output
122
+ const supportsStructuredOutput = 'withStructuredOutput' in llm && typeof (llm as any).withStructuredOutput === 'function';
123
+
124
+ let extractedData: any;
125
+
126
+ if (supportsStructuredOutput) {
127
+ // Use withStructuredOutput for guaranteed schema compliance
128
+ const structuredLLM = (llm as any).withStructuredOutput(zodSchema);
129
+ const messages = [new SystemMessage(prompt)];
130
+ extractedData = await structuredLLM.invoke(messages);
131
+ } else {
132
+ // Fallback to regular LLM call if withStructuredOutput is not available
133
+ logger.warn({
134
+ msg: '[RPA] LLM does not support withStructuredOutput, using regular invoke',
135
+ url,
136
+ });
137
+
138
+ const response = await llm.invoke([new SystemMessage(prompt)]);
139
+
140
+ // Try to parse the response as JSON
141
+ const responseContent = typeof response.content === 'string' ? response.content : JSON.stringify(response.content);
142
+ extractedData = JSON.parse(responseContent);
143
+
144
+ // Validate against schema
145
+ try {
146
+ extractedData = zodSchema.parse(extractedData);
147
+ } catch (validationError) {
148
+ logger.error({
149
+ msg: '[RPA] Extracted data validation failed',
150
+ error: validationError instanceof Error ? validationError.message : 'Unknown error',
151
+ url,
152
+ });
153
+ throw validationError;
154
+ }
155
+ }
156
+
157
+ return {
158
+ action: 'extract_data',
159
+ url,
160
+ data: extractedData,
161
+ };
162
+ } catch (error) {
163
+ logger.error({
164
+ msg: '[RPA] Failed to extract data',
165
+ error: error instanceof Error ? error.message : 'Unknown error',
166
+ url,
167
+ });
168
+ throw error;
169
+ }
170
+ }
171
+
172
+ default:
173
+ throw new Error(`Unknown RPA action type: ${step.type}`);
174
+ }
175
+ }
@@ -12,6 +12,7 @@ export enum NodeType {
12
12
  PROMPT_NODE = 'promptNode',
13
13
  JUMP_TO_NODE = 'jumpToNode',
14
14
  BROWSER_TASK = 'browserTask',
15
+ RPA = 'rpa',
15
16
  }
16
17
 
17
18
  export enum EdgeType {
@@ -124,6 +125,47 @@ export interface BrowserTaskNode extends BaseNode {
124
125
  localRun?: boolean;
125
126
  }
126
127
 
128
+ export enum RpaActionType {
129
+ CLICK = 'click',
130
+ TYPE = 'type',
131
+ WAIT = 'wait',
132
+ SCREENSHOT = 'screenshot',
133
+ SELECT = 'select',
134
+ PRESS = 'press',
135
+ GOTO = 'goto',
136
+ EXTRACT_DATA = 'extract_data',
137
+ }
138
+
139
+ export interface RpaStep {
140
+ id: string;
141
+ type: RpaActionType;
142
+ xpath?: string;
143
+ selector?: string;
144
+ text?: string;
145
+ value?: string;
146
+ key?: string;
147
+ url?: string;
148
+ shouldReplaceExistingText?: boolean;
149
+ waitTime?: number;
150
+ description?: string;
151
+ outputSchema?: {
152
+ name: string;
153
+ type: 'string' | 'number' | 'boolean' | 'array' | 'object';
154
+ description?: string;
155
+ required?: boolean;
156
+ items?: {
157
+ type: 'string' | 'number' | 'boolean' | 'object';
158
+ properties?: any;
159
+ };
160
+ properties?: any;
161
+ }[];
162
+ }
163
+
164
+ export interface RpaNode extends BaseNode {
165
+ type: NodeType.RPA;
166
+ steps: RpaStep[];
167
+ }
168
+
127
169
  export type TriggerNode = AppTriggerNode | WebhookTriggerNode | ManualTriggerNode | VoiceTriggerNode | InterfaceTriggerNode;
128
170
 
129
171
  export interface JunctionNode extends BaseNode {
@@ -146,7 +188,7 @@ export interface AppToolNode extends BaseNode, BaseAppNode {
146
188
  actionKey: string;
147
189
  }
148
190
 
149
- export type Node = TriggerNode | JunctionNode | ToolNode | AppToolNode | PromptNode | JumpToNode | BrowserTaskNode;
191
+ export type Node = TriggerNode | JunctionNode | ToolNode | AppToolNode | PromptNode | JumpToNode | BrowserTaskNode | RpaNode;
150
192
 
151
193
  export interface BaseEdge {
152
194
  source: string;
@@ -0,0 +1,68 @@
1
+ import { z } from 'zod';
2
+
3
+ export interface SchemaField {
4
+ name: string;
5
+ type: 'string' | 'number' | 'boolean' | 'array' | 'object';
6
+ description?: string;
7
+ required?: boolean;
8
+ }
9
+
10
+ /**
11
+ * Converts an array of schema field definitions to a Zod object schema
12
+ * @param fields Array of field definitions
13
+ * @param defaultSchema Optional default schema to use if no fields are provided
14
+ * @returns Zod object schema
15
+ */
16
+ export function createZodSchemaFromFields(
17
+ fields?: SchemaField[],
18
+ defaultSchema?: Record<string, z.ZodTypeAny>
19
+ ): z.ZodObject<Record<string, z.ZodTypeAny>> {
20
+ const schemaFields: Record<string, z.ZodTypeAny> = {};
21
+
22
+ if (fields && fields.length > 0) {
23
+ for (const field of fields) {
24
+ let fieldSchema: z.ZodTypeAny;
25
+
26
+ // Create appropriate Zod type based on field type
27
+ switch (field.type) {
28
+ case 'string':
29
+ fieldSchema = z.string();
30
+ break;
31
+ case 'number':
32
+ fieldSchema = z.number();
33
+ break;
34
+ case 'boolean':
35
+ fieldSchema = z.boolean();
36
+ break;
37
+ case 'array':
38
+ // For arrays, we'll default to array of any unless more specific
39
+ fieldSchema = z.array(z.any());
40
+ break;
41
+ case 'object':
42
+ // For objects, we'll default to record of any unless more specific
43
+ fieldSchema = z.record(z.any());
44
+ break;
45
+ default:
46
+ // Default to string for unknown types
47
+ fieldSchema = z.string();
48
+ }
49
+
50
+ // Add description if available
51
+ if (field.description) {
52
+ fieldSchema = fieldSchema.describe(field.description);
53
+ }
54
+
55
+ // Handle optional fields
56
+ if (field.required === false) {
57
+ fieldSchema = fieldSchema.optional();
58
+ }
59
+
60
+ schemaFields[field.name] = fieldSchema;
61
+ }
62
+ } else if (defaultSchema) {
63
+ // Use default schema if no fields provided
64
+ return z.object(defaultSchema);
65
+ }
66
+
67
+ return z.object(schemaFields);
68
+ }