react-native-agentic-ai 0.0.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/LICENSE +20 -0
  2. package/README.md +252 -14
  3. package/lib/module/components/AIAgent.js +185 -0
  4. package/lib/module/components/AIAgent.js.map +1 -0
  5. package/lib/module/components/AgentChatBar.js +268 -0
  6. package/lib/module/components/AgentChatBar.js.map +1 -0
  7. package/lib/module/components/AgentOverlay.js +53 -0
  8. package/lib/module/components/AgentOverlay.js.map +1 -0
  9. package/lib/module/core/AgentRuntime.js +640 -0
  10. package/lib/module/core/AgentRuntime.js.map +1 -0
  11. package/lib/module/core/FiberTreeWalker.js +362 -0
  12. package/lib/module/core/FiberTreeWalker.js.map +1 -0
  13. package/lib/module/core/MCPBridge.js +98 -0
  14. package/lib/module/core/MCPBridge.js.map +1 -0
  15. package/lib/module/core/ScreenDehydrator.js +46 -0
  16. package/lib/module/core/ScreenDehydrator.js.map +1 -0
  17. package/lib/module/core/systemPrompt.js +164 -0
  18. package/lib/module/core/systemPrompt.js.map +1 -0
  19. package/lib/module/core/types.js +2 -0
  20. package/lib/module/core/types.js.map +1 -0
  21. package/lib/module/hooks/useAction.js +32 -0
  22. package/lib/module/hooks/useAction.js.map +1 -0
  23. package/lib/module/index.js +17 -0
  24. package/lib/module/index.js.map +1 -0
  25. package/lib/module/package.json +1 -0
  26. package/lib/module/providers/GeminiProvider.js +294 -0
  27. package/lib/module/providers/GeminiProvider.js.map +1 -0
  28. package/lib/module/utils/logger.js +17 -0
  29. package/lib/module/utils/logger.js.map +1 -0
  30. package/lib/typescript/package.json +1 -0
  31. package/lib/typescript/src/components/AIAgent.d.ts +65 -0
  32. package/lib/typescript/src/components/AIAgent.d.ts.map +1 -0
  33. package/lib/typescript/src/components/AgentChatBar.d.ts +15 -0
  34. package/lib/typescript/src/components/AgentChatBar.d.ts.map +1 -0
  35. package/lib/typescript/src/components/AgentOverlay.d.ts +10 -0
  36. package/lib/typescript/src/components/AgentOverlay.d.ts.map +1 -0
  37. package/lib/typescript/src/core/AgentRuntime.d.ts +53 -0
  38. package/lib/typescript/src/core/AgentRuntime.d.ts.map +1 -0
  39. package/lib/typescript/src/core/FiberTreeWalker.d.ts +31 -0
  40. package/lib/typescript/src/core/FiberTreeWalker.d.ts.map +1 -0
  41. package/lib/typescript/src/core/MCPBridge.d.ts +23 -0
  42. package/lib/typescript/src/core/MCPBridge.d.ts.map +1 -0
  43. package/lib/typescript/src/core/ScreenDehydrator.d.ts +20 -0
  44. package/lib/typescript/src/core/ScreenDehydrator.d.ts.map +1 -0
  45. package/lib/typescript/src/core/systemPrompt.d.ts +9 -0
  46. package/lib/typescript/src/core/systemPrompt.d.ts.map +1 -0
  47. package/lib/typescript/src/core/types.d.ts +176 -0
  48. package/lib/typescript/src/core/types.d.ts.map +1 -0
  49. package/lib/typescript/src/hooks/useAction.d.ts +13 -0
  50. package/lib/typescript/src/hooks/useAction.d.ts.map +1 -0
  51. package/lib/typescript/src/index.d.ts +10 -0
  52. package/lib/typescript/src/index.d.ts.map +1 -0
  53. package/lib/typescript/src/providers/GeminiProvider.d.ts +43 -0
  54. package/lib/typescript/src/providers/GeminiProvider.d.ts.map +1 -0
  55. package/lib/typescript/src/utils/logger.d.ts +7 -0
  56. package/lib/typescript/src/utils/logger.d.ts.map +1 -0
  57. package/package.json +135 -12
  58. package/src/components/AIAgent.tsx +262 -0
  59. package/src/components/AgentChatBar.tsx +258 -0
  60. package/src/components/AgentOverlay.tsx +48 -0
  61. package/src/core/AgentRuntime.ts +661 -0
  62. package/src/core/FiberTreeWalker.ts +404 -0
  63. package/src/core/MCPBridge.ts +110 -0
  64. package/src/core/ScreenDehydrator.ts +53 -0
  65. package/src/core/systemPrompt.ts +162 -0
  66. package/src/core/types.ts +233 -0
  67. package/src/hooks/useAction.ts +40 -0
  68. package/src/index.ts +22 -0
  69. package/src/providers/GeminiProvider.ts +283 -0
  70. package/src/utils/logger.ts +21 -0
@@ -0,0 +1,661 @@
1
+ /**
2
+ * AgentRuntime — The main agent loop, inspired by page-agent.js.
3
+ *
4
+ * Flow:
5
+ * 1. Walk Fiber tree → detect interactive elements
6
+ * 2. Dehydrate screen → text for LLM
7
+ * 3. Send to AI provider with tools
8
+ * 4. Parse tool call → execute (tap, type, navigate, done)
9
+ * 5. If not done, repeat from step 1 (re-dehydrate after UI change)
10
+ */
11
+
12
+ import { logger } from '../utils/logger';
13
+ import { walkFiberTree } from './FiberTreeWalker';
14
+ import type { WalkConfig } from './FiberTreeWalker';
15
+ import { dehydrateScreen } from './ScreenDehydrator';
16
+ import { buildSystemPrompt } from './systemPrompt';
17
+ import type {
18
+ AIProvider,
19
+ AgentConfig,
20
+ AgentStep,
21
+ ExecutionResult,
22
+ ToolDefinition,
23
+ ActionDefinition,
24
+ } from './types';
25
+
26
+ const DEFAULT_MAX_STEPS = 10;
27
+
28
+ // ─── Agent Runtime ─────────────────────────────────────────────
29
+
30
+ export class AgentRuntime {
31
+ private provider: AIProvider;
32
+ private config: AgentConfig;
33
+ private rootRef: any;
34
+ private navRef: any;
35
+ private tools: Map<string, ToolDefinition> = new Map();
36
+ private actions: Map<string, ActionDefinition> = new Map();
37
+ private history: AgentStep[] = [];
38
+ private isRunning = false;
39
+ private lastAskUserQuestion: string | null = null;
40
+
41
+ constructor(
42
+ provider: AIProvider,
43
+ config: AgentConfig,
44
+ rootRef: any,
45
+ navRef: any,
46
+ ) {
47
+ this.provider = provider;
48
+ this.config = config;
49
+ this.rootRef = rootRef;
50
+ this.navRef = navRef;
51
+
52
+ this.registerBuiltInTools();
53
+
54
+ // Apply customTools — mirrors page-agent: null = remove, otherwise override
55
+ if (config.customTools) {
56
+ for (const [name, tool] of Object.entries(config.customTools)) {
57
+ if (tool === null) {
58
+ this.tools.delete(name);
59
+ logger.info('AgentRuntime', `Removed tool: ${name}`);
60
+ } else {
61
+ this.tools.set(name, tool);
62
+ logger.info('AgentRuntime', `Overrode tool: ${name}`);
63
+ }
64
+ }
65
+ }
66
+ }
67
+
68
+ // ─── Tool Registration ─────────────────────────────────────
69
+
70
+ private registerBuiltInTools(): void {
71
+ // tap — universal interaction (mirrors RNTL's dispatchEvent pattern)
72
+ this.tools.set('tap', {
73
+ name: 'tap',
74
+ description: 'Tap an interactive element by its index. Works universally on buttons, switches, and custom components.',
75
+ parameters: {
76
+ index: { type: 'number', description: 'The index of the element to tap', required: true },
77
+ },
78
+ execute: async (args) => {
79
+ const { interactives: elements } = walkFiberTree(this.rootRef, this.getWalkConfig());
80
+ const element = elements.find(el => el.index === args.index);
81
+ if (!element) {
82
+ return `❌ Element with index ${args.index} not found. Available indexes: ${elements.map(e => e.index).join(', ')}`;
83
+ }
84
+
85
+ // Strategy 1: Switch — call onValueChange (like RNTL's fireEvent('valueChange'))
86
+ if (element.type === 'switch' && element.props.onValueChange) {
87
+ try {
88
+ element.props.onValueChange(!element.props.value);
89
+ await new Promise(resolve => setTimeout(resolve, 500));
90
+ return `✅ Toggled [${args.index}] "${element.label}" to ${!element.props.value}`;
91
+ } catch (error: any) {
92
+ return `❌ Error toggling [${args.index}]: ${error.message}`;
93
+ }
94
+ }
95
+
96
+ // Strategy 2: Direct onPress (covers Pressable, Button, custom components)
97
+ if (element.props.onPress) {
98
+ try {
99
+ element.props.onPress();
100
+ await new Promise(resolve => setTimeout(resolve, 500));
101
+ return `✅ Tapped [${args.index}] "${element.label}"`;
102
+ } catch (error: any) {
103
+ return `❌ Error tapping [${args.index}]: ${error.message}`;
104
+ }
105
+ }
106
+
107
+ // Strategy 3: Bubble up Fiber tree (like RNTL's findEventHandler → element.parent)
108
+ let fiber = element.fiberNode?.return;
109
+ let bubbleDepth = 0;
110
+ while (fiber && bubbleDepth < 5) {
111
+ const parentProps = fiber.memoizedProps || {};
112
+ if (parentProps.onPress && typeof parentProps.onPress === 'function') {
113
+ try {
114
+ parentProps.onPress();
115
+ await new Promise(resolve => setTimeout(resolve, 500));
116
+ return `✅ Tapped parent of [${args.index}] "${element.label}"`;
117
+ } catch (error: any) {
118
+ return `❌ Error tapping parent of [${args.index}]: ${error.message}`;
119
+ }
120
+ }
121
+ fiber = fiber.return;
122
+ bubbleDepth++;
123
+ }
124
+
125
+ return `❌ Element [${args.index}] "${element.label}" has no tap handler (no onPress or onValueChange found).`;
126
+ },
127
+ });
128
+
129
+ // type — type text into a TextInput
130
+ this.tools.set('type', {
131
+ name: 'type',
132
+ description: 'Type text into a text-input element by its index.',
133
+ parameters: {
134
+ index: { type: 'number', description: 'The index of the text-input element', required: true },
135
+ text: { type: 'string', description: 'The text to type', required: true },
136
+ },
137
+ execute: async (args) => {
138
+ const { interactives: elements } = walkFiberTree(this.rootRef, this.getWalkConfig());
139
+ const element = elements.find(el => el.index === args.index);
140
+ if (!element) {
141
+ return `❌ Element with index ${args.index} not found.`;
142
+ }
143
+ if (!element.props.onChangeText) {
144
+ return `❌ Element [${args.index}] "${element.label}" is not a text input.`;
145
+ }
146
+ try {
147
+ element.props.onChangeText(args.text);
148
+ return `✅ Typed "${args.text}" into [${args.index}] "${element.label}"`;
149
+ } catch (error: any) {
150
+ return `❌ Error typing: ${error.message}`;
151
+ }
152
+ },
153
+ });
154
+
155
+ // navigate — navigate to a screen (supports React Navigation + Expo Router)
156
+ this.tools.set('navigate', {
157
+ name: 'navigate',
158
+ description: 'Navigate to a specific screen in the app.',
159
+ parameters: {
160
+ screen: { type: 'string', description: 'Screen name or path to navigate to', required: true },
161
+ params: { type: 'string', description: 'Optional JSON params object', required: false },
162
+ },
163
+ execute: async (args) => {
164
+ // Expo Router path: use router.push()
165
+ if (this.config.router) {
166
+ try {
167
+ const path = args.screen.startsWith('/') ? args.screen : `/${args.screen}`;
168
+ this.config.router.push(path);
169
+ await new Promise(resolve => setTimeout(resolve, 500));
170
+ return `✅ Navigated to "${path}"`;
171
+ } catch (error: any) {
172
+ return `❌ Navigation error: ${error.message}`;
173
+ }
174
+ }
175
+
176
+ // React Navigation path: use navRef.navigate()
177
+ if (!this.navRef) {
178
+ return '❌ Navigation ref not available.';
179
+ }
180
+ if (!this.navRef.isReady()) {
181
+ await new Promise(resolve => setTimeout(resolve, 1000));
182
+ if (!this.navRef.isReady()) {
183
+ return '❌ Navigation is not ready yet.';
184
+ }
185
+ }
186
+ try {
187
+ const params = args.params ? (typeof args.params === 'string' ? JSON.parse(args.params) : args.params) : undefined;
188
+ this.navRef.navigate(args.screen, params);
189
+ await new Promise(resolve => setTimeout(resolve, 500));
190
+ return `✅ Navigated to "${args.screen}"${params ? ` with params: ${JSON.stringify(params)}` : ''}`;
191
+ } catch (error: any) {
192
+ return `❌ Navigation error: ${error.message}. Available screens: ${this.getRouteNames().join(', ')}`;
193
+ }
194
+ },
195
+ });
196
+
197
+ // done — complete the task
198
+ this.tools.set('done', {
199
+ name: 'done',
200
+ description: 'Complete the task with a message to the user.',
201
+ parameters: {
202
+ text: { type: 'string', description: 'Response message to the user', required: true },
203
+ success: { type: 'boolean', description: 'Whether the task was completed successfully', required: true },
204
+ },
205
+ execute: async (args) => {
206
+ return args.text;
207
+ },
208
+ });
209
+
210
+ // ask_user — ask for clarification (mirrors page-agent: blocks until user responds)
211
+ this.tools.set('ask_user', {
212
+ name: 'ask_user',
213
+ description: 'Ask the user a question and wait for their answer. Use this if you need more information or clarification.',
214
+ parameters: {
215
+ question: { type: 'string', description: 'Question to ask the user', required: true },
216
+ },
217
+ execute: async (args) => {
218
+ if (this.config.onAskUser) {
219
+ // Page-agent pattern: block until user responds, then continue the loop
220
+ this.config.onStatusUpdate?.('Waiting for your answer...');
221
+ const answer = await this.config.onAskUser(args.question);
222
+ return `User answered: ${answer}`;
223
+ }
224
+ // Legacy fallback: break the loop (context will be lost)
225
+ return `❓ ${args.question}`;
226
+ },
227
+ });
228
+ }
229
+
230
+ // ─── Action Registration (useAction hook) ──────────────────
231
+
232
+ registerAction(action: ActionDefinition): void {
233
+ this.actions.set(action.name, action);
234
+ logger.info('AgentRuntime', `Registered action: ${action.name}`);
235
+ }
236
+
237
+ unregisterAction(name: string): void {
238
+ this.actions.delete(name);
239
+ }
240
+
241
+ // ─── Navigation Helpers ────────────────────────────────────
242
+
243
+ /**
244
+ * Recursively collect ALL screen names from the navigation state tree.
245
+ * This handles tabs, drawers, and nested stacks.
246
+ */
247
+ private getRouteNames(): string[] {
248
+ try {
249
+ if (!this.navRef?.isReady?.()) return [];
250
+ const state = this.navRef?.getRootState?.() || this.navRef?.getState?.();
251
+ if (!state) return [];
252
+ return this.collectRouteNames(state);
253
+ } catch {
254
+ return [];
255
+ }
256
+ }
257
+
258
+ private collectRouteNames(state: any): string[] {
259
+ const names: string[] = [];
260
+ if (state?.routes) {
261
+ for (const route of state.routes) {
262
+ names.push(route.name);
263
+ // Recurse into nested navigator states
264
+ if (route.state) {
265
+ names.push(...this.collectRouteNames(route.state));
266
+ }
267
+ }
268
+ }
269
+ return [...new Set(names)];
270
+ }
271
+
272
+ /**
273
+ * Recursively find the deepest active screen name.
274
+ * For tabs: follows active tab → active screen inside that tab.
275
+ */
276
+ private getCurrentScreenName(): string {
277
+ // Expo Router: use pathname
278
+ if (this.config.pathname) {
279
+ const segments = this.config.pathname.split('/').filter(Boolean);
280
+ return segments[segments.length - 1] || 'Unknown';
281
+ }
282
+
283
+ try {
284
+ if (!this.navRef?.isReady?.()) return 'Unknown';
285
+ const state = this.navRef?.getRootState?.() || this.navRef?.getState?.();
286
+ if (!state) return 'Unknown';
287
+ return this.getDeepestScreenName(state);
288
+ } catch {
289
+ return 'Unknown';
290
+ }
291
+ }
292
+
293
+ private getDeepestScreenName(state: any): string {
294
+ if (!state?.routes || state.index == null) return 'Unknown';
295
+ const route = state.routes[state.index];
296
+ if (!route) return 'Unknown';
297
+ // If this route has a nested state, recurse deeper
298
+ if (route.state) {
299
+ return this.getDeepestScreenName(route.state);
300
+ }
301
+ return route.name || 'Unknown';
302
+ }
303
+
304
+ /** Maps a tool call to a user-friendly status label for the loading overlay. */
305
+ private getToolStatusLabel(toolName: string, args: Record<string, any>): string {
306
+ switch (toolName) {
307
+ case 'tap':
308
+ return `Tapping element ${args.index ?? ''}...`;
309
+ case 'type':
310
+ return `Typing into field...`;
311
+ case 'navigate':
312
+ return `Navigating to ${args.screen || 'screen'}...`;
313
+ case 'done':
314
+ return 'Wrapping up...';
315
+ case 'ask_user':
316
+ return 'Asking you a question...';
317
+ default:
318
+ return `Running ${toolName}...`;
319
+ }
320
+ }
321
+
322
+ // ─── Build Tools Array for Provider ────────────────────────
323
+
324
+ private buildToolsForProvider(): ToolDefinition[] {
325
+ const allTools = [...this.tools.values()];
326
+
327
+ // Add registered actions as tools
328
+ for (const action of this.actions.values()) {
329
+ allTools.push({
330
+ name: action.name,
331
+ description: action.description,
332
+ parameters: Object.fromEntries(
333
+ Object.entries(action.parameters).map(([key, typeStr]) => [
334
+ key,
335
+ { type: typeStr as any, description: key, required: true },
336
+ ]),
337
+ ),
338
+ execute: async (args) => {
339
+ try {
340
+ const result = action.handler(args);
341
+ return typeof result === 'string' ? result : JSON.stringify(result);
342
+ } catch (error: any) {
343
+ return `❌ Action "${action.name}" failed: ${error.message}`;
344
+ }
345
+ },
346
+ });
347
+ }
348
+
349
+ return allTools;
350
+ }
351
+
352
+ // ─── Walk Config (passes security settings to FiberTreeWalker) ─
353
+
354
+ private getWalkConfig(): WalkConfig {
355
+ return {
356
+ interactiveBlacklist: this.config.interactiveBlacklist,
357
+ interactiveWhitelist: this.config.interactiveWhitelist,
358
+ };
359
+ }
360
+
361
+ // ─── Instructions (mirrors page-agent #getInstructions) ───────
362
+
363
+ private getInstructions(screenName: string): string {
364
+ const { instructions } = this.config;
365
+ if (!instructions) return '';
366
+
367
+ let result = '';
368
+ if (instructions.system?.trim()) {
369
+ result += `<system_instructions>\n${instructions.system.trim()}\n</system_instructions>\n`;
370
+ }
371
+
372
+ if (instructions.getScreenInstructions) {
373
+ try {
374
+ const screenInstructions = instructions.getScreenInstructions(screenName)?.trim();
375
+ if (screenInstructions) {
376
+ result += `<screen_instructions>\n${screenInstructions}\n</screen_instructions>\n`;
377
+ }
378
+ } catch (error) {
379
+ logger.error('AgentRuntime', 'Failed to get screen instructions:', error);
380
+ }
381
+ }
382
+
383
+ return result ? `<instructions>\n${result}</instructions>\n\n` : '';
384
+ }
385
+
386
+ // ─── Observation System (mirrors PageAgentCore.#handleObservations) ──
387
+
388
+ private observations: string[] = [];
389
+ private lastScreenName: string = '';
390
+
391
+ private handleObservations(step: number, maxSteps: number, screenName: string): void {
392
+ // Screen change detection
393
+ if (this.lastScreenName && screenName !== this.lastScreenName) {
394
+ this.observations.push(`Screen navigated to → ${screenName}`);
395
+ }
396
+ this.lastScreenName = screenName;
397
+
398
+ // Remaining steps warning
399
+ const remaining = maxSteps - step;
400
+ if (remaining === 5) {
401
+ this.observations.push(
402
+ `⚠️ Only ${remaining} steps remaining. Consider wrapping up or calling done with partial results.`
403
+ );
404
+ } else if (remaining === 2) {
405
+ this.observations.push(
406
+ `⚠️ Critical: Only ${remaining} steps left! You must finish the task or call done immediately.`
407
+ );
408
+ }
409
+ }
410
+
411
+ // ─── User Prompt Assembly (mirrors PageAgentCore.#assembleUserPrompt) ──
412
+
413
+ private assembleUserPrompt(
414
+ step: number,
415
+ maxSteps: number,
416
+ contextualMessage: string,
417
+ screenName: string,
418
+ screenContent: string,
419
+ ): string {
420
+ let prompt = '';
421
+
422
+ // 1. <instructions> (optional system/screen instructions)
423
+ prompt += this.getInstructions(screenName);
424
+
425
+ // 2. <agent_state> — user request + step info (mirrors page-agent)
426
+ prompt += '<agent_state>\n';
427
+ prompt += '<user_request>\n';
428
+ prompt += `${contextualMessage}\n`;
429
+ prompt += '</user_request>\n';
430
+ prompt += '<step_info>\n';
431
+ prompt += `Step ${step + 1} of ${maxSteps} max possible steps\n`;
432
+ prompt += '</step_info>\n';
433
+ prompt += '</agent_state>\n\n';
434
+
435
+ // 3. <agent_history> — structured per-step history (mirrors page-agent)
436
+ prompt += '<agent_history>\n';
437
+
438
+ let stepIndex = 0;
439
+ for (const event of this.history) {
440
+ stepIndex++;
441
+ prompt += `<step_${stepIndex}>\n`;
442
+ prompt += `Previous Goal Eval: ${event.reflection.previousGoalEval}\n`;
443
+ prompt += `Memory: ${event.reflection.memory}\n`;
444
+ prompt += `Plan: ${event.reflection.plan}\n`;
445
+ prompt += `Action Result: ${event.action.output}\n`;
446
+ prompt += `</step_${stepIndex}>\n`;
447
+ }
448
+
449
+ // Inject system observations
450
+ for (const obs of this.observations) {
451
+ prompt += `<sys>${obs}</sys>\n`;
452
+ }
453
+ this.observations = [];
454
+
455
+ prompt += '</agent_history>\n\n';
456
+
457
+ // 4. <screen_state> — dehydrated screen content
458
+ prompt += '<screen_state>\n';
459
+ prompt += `Current Screen: ${screenName}\n`;
460
+ prompt += screenContent + '\n';
461
+ prompt += '</screen_state>\n';
462
+
463
+ return prompt;
464
+ }
465
+
466
+ // ─── Main Execution Loop ──────────────────────────────────────
467
+
468
+ async execute(userMessage: string): Promise<ExecutionResult> {
469
+ if (this.isRunning) {
470
+ return { success: false, message: 'Agent is already running.', steps: [] };
471
+ }
472
+
473
+ this.isRunning = true;
474
+ this.history = [];
475
+ this.observations = [];
476
+ this.lastScreenName = '';
477
+ const maxSteps = this.config.maxSteps || DEFAULT_MAX_STEPS;
478
+ const stepDelay = this.config.stepDelay ?? 300;
479
+
480
+ // Inject conversational context if we are answering the AI's question
481
+ let contextualMessage = userMessage;
482
+ if (this.lastAskUserQuestion) {
483
+ contextualMessage = `(Note: You just asked the user: "${this.lastAskUserQuestion}")\n\nUser replied: ${userMessage}`;
484
+ this.lastAskUserQuestion = null; // Consume the question
485
+ }
486
+
487
+ logger.info('AgentRuntime', `Starting execution: "${contextualMessage}"`);
488
+
489
+ // Lifecycle: onBeforeTask (mirrors page-agent)
490
+ await this.config.onBeforeTask?.();
491
+
492
+ try {
493
+ for (let step = 0; step < maxSteps; step++) {
494
+ logger.info('AgentRuntime', `===== Step ${step + 1}/${maxSteps} =====`);
495
+
496
+ // Lifecycle: onBeforeStep (mirrors page-agent)
497
+ await this.config.onBeforeStep?.(step);
498
+
499
+ // 1. Walk Fiber tree with security config and dehydrate screen
500
+ const walkResult = walkFiberTree(this.rootRef, this.getWalkConfig());
501
+ const screenName = this.getCurrentScreenName();
502
+ const screen = dehydrateScreen(
503
+ screenName,
504
+ this.getRouteNames(),
505
+ walkResult.elementsText,
506
+ walkResult.interactives,
507
+ );
508
+
509
+ logger.info('AgentRuntime', `Screen: ${screen.screenName}`);
510
+ logger.debug('AgentRuntime', `Dehydrated:\n${screen.elementsText}`);
511
+
512
+ // 2. Apply transformScreenContent (mirrors page-agent transformPageContent)
513
+ let screenContent = screen.elementsText;
514
+ if (this.config.transformScreenContent) {
515
+ screenContent = await this.config.transformScreenContent(screenContent);
516
+ }
517
+
518
+ // 3. Handle observations (mirrors page-agent #handleObservations)
519
+ this.handleObservations(step, maxSteps, screenName);
520
+
521
+ // 4. Assemble structured user prompt (mirrors page-agent #assembleUserPrompt)
522
+ const contextMessage = this.assembleUserPrompt(
523
+ step, maxSteps, contextualMessage, screenName, screenContent,
524
+ );
525
+
526
+ // 5. Send to AI provider
527
+ this.config.onStatusUpdate?.('Analyzing screen...');
528
+ const systemPrompt = buildSystemPrompt(this.config.language || 'en');
529
+ const tools = this.buildToolsForProvider();
530
+
531
+ logger.info('AgentRuntime', `Sending to AI with ${tools.length} tools...`);
532
+
533
+ const response = await this.provider.generateContent(
534
+ systemPrompt,
535
+ contextMessage,
536
+ tools,
537
+ this.history,
538
+ );
539
+
540
+ // 6. Process tool calls
541
+ if (!response.toolCalls || response.toolCalls.length === 0) {
542
+ logger.warn('AgentRuntime', 'No tool calls in response. Text:', response.text);
543
+ const result: ExecutionResult = {
544
+ success: true,
545
+ message: response.text || 'Task completed.',
546
+ steps: this.history,
547
+ };
548
+ await this.config.onAfterTask?.(result);
549
+ return result;
550
+ }
551
+
552
+ // 7. Structured reasoning from provider (no regex parsing needed)
553
+ const { reasoning } = response;
554
+ logger.info('AgentRuntime', `🧠 Plan: ${reasoning.plan}`);
555
+ if (reasoning.memory) {
556
+ logger.debug('AgentRuntime', `💾 Memory: ${reasoning.memory}`);
557
+ }
558
+
559
+ // Only process the FIRST tool call per step (one action per step).
560
+ // After one action, the loop re-reads the screen with fresh indexes.
561
+ const toolCall = response.toolCalls[0]!;
562
+ if (response.toolCalls.length > 1) {
563
+ logger.warn('AgentRuntime', `AI returned ${response.toolCalls.length} tool calls, executing only the first one.`);
564
+ }
565
+
566
+ logger.info('AgentRuntime', `Tool: ${toolCall.name}(${JSON.stringify(toolCall.args)})`);
567
+
568
+ // Dynamic status update based on tool being executed
569
+ const statusLabel = this.getToolStatusLabel(toolCall.name, toolCall.args);
570
+ this.config.onStatusUpdate?.(statusLabel);
571
+
572
+ // Find and execute the tool
573
+ const tool = this.tools.get(toolCall.name) ||
574
+ this.buildToolsForProvider().find(t => t.name === toolCall.name);
575
+
576
+ let output: string;
577
+ if (tool) {
578
+ output = await tool.execute(toolCall.args);
579
+ } else {
580
+ output = `❌ Unknown tool: ${toolCall.name}`;
581
+ }
582
+
583
+ logger.info('AgentRuntime', `Result: ${output}`);
584
+
585
+ // Record step with structured reasoning
586
+ const agentStep: AgentStep = {
587
+ stepIndex: step,
588
+ reflection: reasoning,
589
+ action: {
590
+ name: toolCall.name,
591
+ input: toolCall.args,
592
+ output,
593
+ },
594
+ };
595
+ this.history.push(agentStep);
596
+
597
+ // Lifecycle: onAfterStep (mirrors page-agent)
598
+ await this.config.onAfterStep?.(this.history);
599
+
600
+ // Check if done
601
+ if (toolCall.name === 'done') {
602
+ const result: ExecutionResult = {
603
+ success: toolCall.args.success !== false,
604
+ message: toolCall.args.text || output,
605
+ steps: this.history,
606
+ };
607
+ logger.info('AgentRuntime', `Task completed: ${result.message}`);
608
+ await this.config.onAfterTask?.(result);
609
+ return result;
610
+ }
611
+
612
+ // Check if asking user (legacy path — only breaks loop when onAskUser is NOT set)
613
+ if (toolCall.name === 'ask_user' && !this.config.onAskUser) {
614
+ this.lastAskUserQuestion = toolCall.args.question || output;
615
+
616
+ const result: ExecutionResult = {
617
+ success: true,
618
+ message: output,
619
+ steps: this.history,
620
+ };
621
+ await this.config.onAfterTask?.(result);
622
+ return result;
623
+ }
624
+
625
+ // Step delay (mirrors page-agent stepDelay)
626
+ await new Promise(resolve => setTimeout(resolve, stepDelay));
627
+ }
628
+
629
+ // Max steps reached
630
+ const result: ExecutionResult = {
631
+ success: false,
632
+ message: `Reached maximum steps (${maxSteps}) without completing the task.`,
633
+ steps: this.history,
634
+ };
635
+ await this.config.onAfterTask?.(result);
636
+ return result;
637
+ } catch (error: any) {
638
+ logger.error('AgentRuntime', 'Execution error:', error);
639
+ const result: ExecutionResult = {
640
+ success: false,
641
+ message: `Error: ${error.message}`,
642
+ steps: this.history,
643
+ };
644
+ await this.config.onAfterTask?.(result);
645
+ return result;
646
+ } finally {
647
+ this.isRunning = false;
648
+ }
649
+ }
650
+
651
+ /** Update refs (called when component re-renders) */
652
+ updateRefs(rootRef: any, navRef: any): void {
653
+ this.rootRef = rootRef;
654
+ this.navRef = navRef;
655
+ }
656
+
657
+ /** Check if agent is currently executing */
658
+ getIsRunning(): boolean {
659
+ return this.isRunning;
660
+ }
661
+ }