@mobileai/react-native 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/LICENSE +20 -0
  2. package/README.md +190 -0
  3. package/lib/module/components/AIAgent.js +149 -0
  4. package/lib/module/components/AIAgent.js.map +1 -0
  5. package/lib/module/components/AgentChatBar.js +120 -0
  6. package/lib/module/components/AgentChatBar.js.map +1 -0
  7. package/lib/module/components/AgentOverlay.js +53 -0
  8. package/lib/module/components/AgentOverlay.js.map +1 -0
  9. package/lib/module/core/AgentRuntime.js +498 -0
  10. package/lib/module/core/AgentRuntime.js.map +1 -0
  11. package/lib/module/core/FiberTreeWalker.js +308 -0
  12. package/lib/module/core/FiberTreeWalker.js.map +1 -0
  13. package/lib/module/core/MCPBridge.js +98 -0
  14. package/lib/module/core/MCPBridge.js.map +1 -0
  15. package/lib/module/core/ScreenDehydrator.js +46 -0
  16. package/lib/module/core/ScreenDehydrator.js.map +1 -0
  17. package/lib/module/core/types.js +2 -0
  18. package/lib/module/core/types.js.map +1 -0
  19. package/lib/module/hooks/useAction.js +32 -0
  20. package/lib/module/hooks/useAction.js.map +1 -0
  21. package/lib/module/index.js +17 -0
  22. package/lib/module/index.js.map +1 -0
  23. package/lib/module/package.json +1 -0
  24. package/lib/module/providers/GeminiProvider.js +178 -0
  25. package/lib/module/providers/GeminiProvider.js.map +1 -0
  26. package/lib/module/utils/logger.js +17 -0
  27. package/lib/module/utils/logger.js.map +1 -0
  28. package/lib/typescript/package.json +1 -0
  29. package/lib/typescript/src/components/AIAgent.d.ts +57 -0
  30. package/lib/typescript/src/components/AIAgent.d.ts.map +1 -0
  31. package/lib/typescript/src/components/AgentChatBar.d.ts +14 -0
  32. package/lib/typescript/src/components/AgentChatBar.d.ts.map +1 -0
  33. package/lib/typescript/src/components/AgentOverlay.d.ts +10 -0
  34. package/lib/typescript/src/components/AgentOverlay.d.ts.map +1 -0
  35. package/lib/typescript/src/core/AgentRuntime.d.ts +37 -0
  36. package/lib/typescript/src/core/AgentRuntime.d.ts.map +1 -0
  37. package/lib/typescript/src/core/FiberTreeWalker.d.ts +26 -0
  38. package/lib/typescript/src/core/FiberTreeWalker.d.ts.map +1 -0
  39. package/lib/typescript/src/core/MCPBridge.d.ts +23 -0
  40. package/lib/typescript/src/core/MCPBridge.d.ts.map +1 -0
  41. package/lib/typescript/src/core/ScreenDehydrator.d.ts +20 -0
  42. package/lib/typescript/src/core/ScreenDehydrator.d.ts.map +1 -0
  43. package/lib/typescript/src/core/types.d.ts +138 -0
  44. package/lib/typescript/src/core/types.d.ts.map +1 -0
  45. package/lib/typescript/src/hooks/useAction.d.ts +13 -0
  46. package/lib/typescript/src/hooks/useAction.d.ts.map +1 -0
  47. package/lib/typescript/src/index.d.ts +10 -0
  48. package/lib/typescript/src/index.d.ts.map +1 -0
  49. package/lib/typescript/src/providers/GeminiProvider.d.ts +23 -0
  50. package/lib/typescript/src/providers/GeminiProvider.d.ts.map +1 -0
  51. package/lib/typescript/src/utils/logger.d.ts +7 -0
  52. package/lib/typescript/src/utils/logger.d.ts.map +1 -0
  53. package/package.json +143 -0
  54. package/src/components/AIAgent.tsx +222 -0
  55. package/src/components/AgentChatBar.tsx +136 -0
  56. package/src/components/AgentOverlay.tsx +48 -0
  57. package/src/core/AgentRuntime.ts +505 -0
  58. package/src/core/FiberTreeWalker.ts +349 -0
  59. package/src/core/MCPBridge.ts +110 -0
  60. package/src/core/ScreenDehydrator.ts +53 -0
  61. package/src/core/types.ts +185 -0
  62. package/src/hooks/useAction.ts +40 -0
  63. package/src/index.ts +22 -0
  64. package/src/providers/GeminiProvider.ts +210 -0
  65. package/src/utils/logger.ts +21 -0
@@ -0,0 +1,505 @@
1
+ /**
2
+ * AgentRuntime — The main agent loop, inspired by page-agent.js.
3
+ *
4
+ * Flow:
5
+ * 1. Walk Fiber tree → detect interactive elements
6
+ * 2. Dehydrate screen → text for LLM
7
+ * 3. Send to AI provider with tools
8
+ * 4. Parse tool call → execute (tap, type, navigate, done)
9
+ * 5. If not done, repeat from step 1 (re-dehydrate after UI change)
10
+ */
11
+
12
+ import { logger } from '../utils/logger';
13
+ import { walkFiberTree } from './FiberTreeWalker';
14
+ import type { WalkConfig } from './FiberTreeWalker';
15
+ import { dehydrateScreen } from './ScreenDehydrator';
16
+ import type {
17
+ AIProvider,
18
+ AgentConfig,
19
+ AgentStep,
20
+ ExecutionResult,
21
+ ToolDefinition,
22
+ ActionDefinition,
23
+ } from './types';
24
+
25
+ const DEFAULT_MAX_STEPS = 10;
26
+
27
+ // ─── System Prompt ─────────────────────────────────────────────
28
+
29
+ function buildSystemPrompt(language: string): string {
30
+ const isArabic = language === 'ar';
31
+
32
+ return `You are an AI agent that controls a React Native mobile app. You operate in an iterative loop to accomplish user requests.
33
+
34
+ ${isArabic ? 'Respond to the user in Arabic.' : 'Respond to the user in English.'}
35
+
36
+ <input>
37
+ At every step you receive:
38
+ 1. <screen_state>: Current screen name, available screens, and interactive elements indexed for actions.
39
+ 2. <agent_history>: Your previous steps and their results.
40
+ 3. <user_request>: The user's original request.
41
+ </input>
42
+
43
+ <screen_state>
44
+ Interactive elements are listed as [index]<type attrs>label</>
45
+ - index: numeric identifier for interaction
46
+ - type: element type (pressable, text-input, switch)
47
+ - label: visible text content of the element
48
+
49
+ Only elements with [index] are interactive. Use the index to tap or type into them.
50
+ </screen_state>
51
+
52
+ <tools>
53
+ Available tools:
54
+ - tap(index): Tap an interactive element by its index. This triggers its onPress handler.
55
+ - type(index, text): Type text into a text-input element by its index.
56
+ - navigate(screen, params): Navigate to a specific screen. params is optional JSON object.
57
+ - done(text, success): Complete the task. text is your response to the user.
58
+ - ask_user(question): Ask the user for clarification if needed.
59
+ </tools>
60
+
61
+ <rules>
62
+ - Only interact with elements that have an [index].
63
+ - After tapping an element, the screen may change. Wait for the next step to see updated elements.
64
+ - If the current screen doesn't have what you need, use navigate() to go to another screen.
65
+ - If you're stuck or need more info, use ask_user().
66
+ - When the task is complete, ALWAYS call done() with a summary.
67
+ - Be efficient — complete tasks in as few steps as possible.
68
+ - If a tap navigates to another screen, the next step will show the new screen's elements.
69
+ </rules>`;
70
+ }
71
+
72
+ // ─── Agent Runtime ─────────────────────────────────────────────
73
+
74
+ export class AgentRuntime {
75
+ private provider: AIProvider;
76
+ private config: AgentConfig;
77
+ private rootRef: any;
78
+ private navRef: any;
79
+ private tools: Map<string, ToolDefinition> = new Map();
80
+ private actions: Map<string, ActionDefinition> = new Map();
81
+ private history: AgentStep[] = [];
82
+ private isRunning = false;
83
+ private lastAskUserQuestion: string | null = null;
84
+
85
+ constructor(
86
+ provider: AIProvider,
87
+ config: AgentConfig,
88
+ rootRef: any,
89
+ navRef: any,
90
+ ) {
91
+ this.provider = provider;
92
+ this.config = config;
93
+ this.rootRef = rootRef;
94
+ this.navRef = navRef;
95
+
96
+ this.registerBuiltInTools();
97
+
98
+ // Apply customTools — mirrors page-agent: null = remove, otherwise override
99
+ if (config.customTools) {
100
+ for (const [name, tool] of Object.entries(config.customTools)) {
101
+ if (tool === null) {
102
+ this.tools.delete(name);
103
+ logger.info('AgentRuntime', `Removed tool: ${name}`);
104
+ } else {
105
+ this.tools.set(name, tool);
106
+ logger.info('AgentRuntime', `Overrode tool: ${name}`);
107
+ }
108
+ }
109
+ }
110
+ }
111
+
112
+ // ─── Tool Registration ─────────────────────────────────────
113
+
114
+ private registerBuiltInTools(): void {
115
+ // tap — tap an interactive element by index
116
+ this.tools.set('tap', {
117
+ name: 'tap',
118
+ description: 'Tap an interactive element by its index to trigger its onPress handler.',
119
+ parameters: {
120
+ index: { type: 'number', description: 'The index of the element to tap', required: true },
121
+ },
122
+ execute: async (args) => {
123
+ const { interactives: elements } = walkFiberTree(this.rootRef, this.getWalkConfig());
124
+ const element = elements.find(el => el.index === args.index);
125
+ if (!element) {
126
+ return `❌ Element with index ${args.index} not found. Available indexes: ${elements.map(e => e.index).join(', ')}`;
127
+ }
128
+ if (!element.props.onPress) {
129
+ return `❌ Element [${args.index}] "${element.label}" does not have an onPress handler.`;
130
+ }
131
+ try {
132
+ element.props.onPress();
133
+ // Wait for UI to update after tap
134
+ await new Promise(resolve => setTimeout(resolve, 500));
135
+ return `✅ Tapped [${args.index}] "${element.label}"`;
136
+ } catch (error: any) {
137
+ return `❌ Error tapping [${args.index}]: ${error.message}`;
138
+ }
139
+ },
140
+ });
141
+
142
+ // type — type text into a TextInput
143
+ this.tools.set('type', {
144
+ name: 'type',
145
+ description: 'Type text into a text-input element by its index.',
146
+ parameters: {
147
+ index: { type: 'number', description: 'The index of the text-input element', required: true },
148
+ text: { type: 'string', description: 'The text to type', required: true },
149
+ },
150
+ execute: async (args) => {
151
+ const { interactives: elements } = walkFiberTree(this.rootRef, this.getWalkConfig());
152
+ const element = elements.find(el => el.index === args.index);
153
+ if (!element) {
154
+ return `❌ Element with index ${args.index} not found.`;
155
+ }
156
+ if (!element.props.onChangeText) {
157
+ return `❌ Element [${args.index}] "${element.label}" is not a text input.`;
158
+ }
159
+ try {
160
+ element.props.onChangeText(args.text);
161
+ return `✅ Typed "${args.text}" into [${args.index}] "${element.label}"`;
162
+ } catch (error: any) {
163
+ return `❌ Error typing: ${error.message}`;
164
+ }
165
+ },
166
+ });
167
+
168
+ // navigate — navigate to a screen
169
+ this.tools.set('navigate', {
170
+ name: 'navigate',
171
+ description: 'Navigate to a specific screen in the app.',
172
+ parameters: {
173
+ screen: { type: 'string', description: 'Screen name to navigate to', required: true },
174
+ params: { type: 'string', description: 'Optional JSON params object', required: false },
175
+ },
176
+ execute: async (args) => {
177
+ if (!this.navRef) {
178
+ return '❌ Navigation ref not available.';
179
+ }
180
+ // Per React Navigation docs: must check isReady() before navigate
181
+ // https://reactnavigation.org/docs/navigating-without-navigation-prop#handling-initialization
182
+ if (!this.navRef.isReady()) {
183
+ // Wait a bit and retry — navigator may still be mounting
184
+ await new Promise(resolve => setTimeout(resolve, 1000));
185
+ if (!this.navRef.isReady()) {
186
+ return '❌ Navigation is not ready yet. The navigator may not have finished mounting.';
187
+ }
188
+ }
189
+ try {
190
+ const params = args.params ? (typeof args.params === 'string' ? JSON.parse(args.params) : args.params) : undefined;
191
+ this.navRef.navigate(args.screen, params);
192
+ await new Promise(resolve => setTimeout(resolve, 500));
193
+ return `✅ Navigated to "${args.screen}"${params ? ` with params: ${JSON.stringify(params)}` : ''}`;
194
+ } catch (error: any) {
195
+ return `❌ Navigation error: ${error.message}. Available screens: ${this.getRouteNames().join(', ')}`;
196
+ }
197
+ },
198
+ });
199
+
200
+ // done — complete the task
201
+ this.tools.set('done', {
202
+ name: 'done',
203
+ description: 'Complete the task with a message to the user.',
204
+ parameters: {
205
+ text: { type: 'string', description: 'Response message to the user', required: true },
206
+ success: { type: 'boolean', description: 'Whether the task was completed successfully', required: true },
207
+ },
208
+ execute: async (args) => {
209
+ return args.text;
210
+ },
211
+ });
212
+
213
+ // ask_user — ask for clarification
214
+ this.tools.set('ask_user', {
215
+ name: 'ask_user',
216
+ description: 'Ask the user for clarification or more information.',
217
+ parameters: {
218
+ question: { type: 'string', description: 'Question to ask the user', required: true },
219
+ },
220
+ execute: async (args) => {
221
+ return `❓ ${args.question}`;
222
+ },
223
+ });
224
+ }
225
+
226
+ // ─── Action Registration (useAction hook) ──────────────────
227
+
228
+ registerAction(action: ActionDefinition): void {
229
+ this.actions.set(action.name, action);
230
+ logger.info('AgentRuntime', `Registered action: ${action.name}`);
231
+ }
232
+
233
+ unregisterAction(name: string): void {
234
+ this.actions.delete(name);
235
+ }
236
+
237
+ // ─── Navigation Helpers ────────────────────────────────────
238
+
239
+ private getRouteNames(): string[] {
240
+ try {
241
+ if (!this.navRef?.isReady?.()) return [];
242
+ const state = this.navRef?.getRootState?.() || this.navRef?.getState?.();
243
+ if (state?.routeNames) return state.routeNames;
244
+ if (state?.routes) return state.routes.map((r: any) => r.name);
245
+ return [];
246
+ } catch {
247
+ return [];
248
+ }
249
+ }
250
+
251
+ private getCurrentScreenName(): string {
252
+ try {
253
+ if (!this.navRef?.isReady?.()) return 'Unknown';
254
+ const state = this.navRef?.getRootState?.() || this.navRef?.getState?.();
255
+ if (!state) return 'Unknown';
256
+ const route = state.routes[state.index];
257
+ return route?.name || 'Unknown';
258
+ } catch {
259
+ return 'Unknown';
260
+ }
261
+ }
262
+
263
+ // ─── Build Tools Array for Provider ────────────────────────
264
+
265
+ private buildToolsForProvider(): ToolDefinition[] {
266
+ const allTools = [...this.tools.values()];
267
+
268
+ // Add registered actions as tools
269
+ for (const action of this.actions.values()) {
270
+ allTools.push({
271
+ name: action.name,
272
+ description: action.description,
273
+ parameters: Object.fromEntries(
274
+ Object.entries(action.parameters).map(([key, typeStr]) => [
275
+ key,
276
+ { type: typeStr as any, description: key, required: true },
277
+ ]),
278
+ ),
279
+ execute: async (args) => {
280
+ try {
281
+ const result = action.handler(args);
282
+ return typeof result === 'string' ? result : JSON.stringify(result);
283
+ } catch (error: any) {
284
+ return `❌ Action "${action.name}" failed: ${error.message}`;
285
+ }
286
+ },
287
+ });
288
+ }
289
+
290
+ return allTools;
291
+ }
292
+
293
+ // ─── Walk Config (passes security settings to FiberTreeWalker) ─
294
+
295
+ private getWalkConfig(): WalkConfig {
296
+ return {
297
+ interactiveBlacklist: this.config.interactiveBlacklist,
298
+ interactiveWhitelist: this.config.interactiveWhitelist,
299
+ };
300
+ }
301
+
302
+ // ─── Instructions (mirrors page-agent #getInstructions) ───────
303
+
304
+ private getInstructions(screenName: string): string {
305
+ const { instructions } = this.config;
306
+ if (!instructions) return '';
307
+
308
+ let result = '';
309
+ if (instructions.system?.trim()) {
310
+ result += `<system_instructions>\n${instructions.system.trim()}\n</system_instructions>\n`;
311
+ }
312
+
313
+ if (instructions.getScreenInstructions) {
314
+ try {
315
+ const screenInstructions = instructions.getScreenInstructions(screenName)?.trim();
316
+ if (screenInstructions) {
317
+ result += `<screen_instructions>\n${screenInstructions}\n</screen_instructions>\n`;
318
+ }
319
+ } catch (error) {
320
+ logger.error('AgentRuntime', 'Failed to get screen instructions:', error);
321
+ }
322
+ }
323
+
324
+ return result ? `<instructions>\n${result}</instructions>\n\n` : '';
325
+ }
326
+
327
+ // ─── Main Execution Loop (mirrors PageAgentCore.execute) ───────
328
+
329
+ async execute(userMessage: string): Promise<ExecutionResult> {
330
+ if (this.isRunning) {
331
+ return { success: false, message: 'Agent is already running.', steps: [] };
332
+ }
333
+
334
+ this.isRunning = true;
335
+ this.history = [];
336
+ const maxSteps = this.config.maxSteps || DEFAULT_MAX_STEPS;
337
+ const stepDelay = this.config.stepDelay ?? 300;
338
+
339
+ // Inject conversational context if we are answering the AI's question
340
+ let contextualMessage = userMessage;
341
+ if (this.lastAskUserQuestion) {
342
+ contextualMessage = `(Note: You just asked the user: "${this.lastAskUserQuestion}")\n\nUser replied: ${userMessage}`;
343
+ this.lastAskUserQuestion = null; // Consume the question
344
+ }
345
+
346
+ logger.info('AgentRuntime', `Starting execution: "${contextualMessage}"`);
347
+
348
+ // Lifecycle: onBeforeTask (mirrors page-agent)
349
+ await this.config.onBeforeTask?.();
350
+
351
+ try {
352
+ for (let step = 0; step < maxSteps; step++) {
353
+ logger.info('AgentRuntime', `===== Step ${step + 1}/${maxSteps} =====`);
354
+
355
+ // Lifecycle: onBeforeStep (mirrors page-agent)
356
+ await this.config.onBeforeStep?.(step);
357
+
358
+ // 1. Walk Fiber tree with security config and dehydrate screen
359
+ const walkResult = walkFiberTree(this.rootRef, this.getWalkConfig());
360
+ const screenName = this.getCurrentScreenName();
361
+ const screen = dehydrateScreen(
362
+ screenName,
363
+ this.getRouteNames(),
364
+ walkResult.elementsText,
365
+ walkResult.interactives,
366
+ );
367
+
368
+ logger.info('AgentRuntime', `Screen: ${screen.screenName}`);
369
+ logger.debug('AgentRuntime', `Dehydrated:\n${screen.elementsText}`);
370
+
371
+ // 2. Apply transformScreenContent (mirrors page-agent transformPageContent)
372
+ let screenContent = screen.elementsText;
373
+ if (this.config.transformScreenContent) {
374
+ screenContent = await this.config.transformScreenContent(screenContent);
375
+ }
376
+
377
+ // 3. Build context message with instructions + screen state
378
+ const instructionsBlock = this.getInstructions(screenName);
379
+ const contextMessage = step === 0
380
+ ? `${instructionsBlock}<user_request>${contextualMessage}</user_request>\n\n<screen_state>\n${screenContent}\n</screen_state>`
381
+ : `${instructionsBlock}<screen_state>\n${screenContent}\n</screen_state>`;
382
+
383
+ // 4. Send to AI provider
384
+ const systemPrompt = buildSystemPrompt(this.config.language || 'en');
385
+ const tools = this.buildToolsForProvider();
386
+
387
+ logger.info('AgentRuntime', `Sending to AI with ${tools.length} tools...`);
388
+
389
+ const response = await this.provider.generateContent(
390
+ systemPrompt,
391
+ contextMessage,
392
+ tools,
393
+ this.history,
394
+ );
395
+
396
+ // 5. Process tool calls
397
+ if (!response.toolCalls || response.toolCalls.length === 0) {
398
+ logger.warn('AgentRuntime', 'No tool calls in response. Text:', response.text);
399
+ const result: ExecutionResult = {
400
+ success: true,
401
+ message: response.text || 'Task completed.',
402
+ steps: this.history,
403
+ };
404
+ await this.config.onAfterTask?.(result);
405
+ return result;
406
+ }
407
+
408
+ for (const toolCall of response.toolCalls) {
409
+ logger.info('AgentRuntime', `Tool: ${toolCall.name}(${JSON.stringify(toolCall.args)})`);
410
+
411
+ // Find and execute the tool
412
+ const tool = this.tools.get(toolCall.name) ||
413
+ this.buildToolsForProvider().find(t => t.name === toolCall.name);
414
+
415
+ let output: string;
416
+ if (tool) {
417
+ output = await tool.execute(toolCall.args);
418
+ } else {
419
+ output = `❌ Unknown tool: ${toolCall.name}`;
420
+ }
421
+
422
+ logger.info('AgentRuntime', `Result: ${output}`);
423
+
424
+ // Record step
425
+ const agentStep: AgentStep = {
426
+ stepIndex: step,
427
+ reflection: {
428
+ evaluationPreviousGoal: step > 0 ? 'Evaluating...' : 'First step',
429
+ memory: '',
430
+ nextGoal: '',
431
+ },
432
+ action: {
433
+ name: toolCall.name,
434
+ input: toolCall.args,
435
+ output,
436
+ },
437
+ };
438
+ this.history.push(agentStep);
439
+
440
+ // Lifecycle: onAfterStep (mirrors page-agent)
441
+ await this.config.onAfterStep?.(this.history);
442
+
443
+ // Check if done
444
+ if (toolCall.name === 'done') {
445
+ const result: ExecutionResult = {
446
+ success: toolCall.args.success !== false,
447
+ message: output,
448
+ steps: this.history,
449
+ };
450
+ logger.info('AgentRuntime', `Task completed: ${output}`);
451
+ await this.config.onAfterTask?.(result);
452
+ return result;
453
+ }
454
+
455
+ // Check if asking user
456
+ if (toolCall.name === 'ask_user') {
457
+ this.lastAskUserQuestion = toolCall.args.question || output;
458
+
459
+ const result: ExecutionResult = {
460
+ success: true,
461
+ message: output,
462
+ steps: this.history,
463
+ };
464
+ await this.config.onAfterTask?.(result);
465
+ return result;
466
+ }
467
+ }
468
+
469
+ // Step delay (mirrors page-agent stepDelay)
470
+ await new Promise(resolve => setTimeout(resolve, stepDelay));
471
+ }
472
+
473
+ // Max steps reached
474
+ const result: ExecutionResult = {
475
+ success: false,
476
+ message: `Reached maximum steps (${maxSteps}) without completing the task.`,
477
+ steps: this.history,
478
+ };
479
+ await this.config.onAfterTask?.(result);
480
+ return result;
481
+ } catch (error: any) {
482
+ logger.error('AgentRuntime', 'Execution error:', error);
483
+ const result: ExecutionResult = {
484
+ success: false,
485
+ message: `Error: ${error.message}`,
486
+ steps: this.history,
487
+ };
488
+ await this.config.onAfterTask?.(result);
489
+ return result;
490
+ } finally {
491
+ this.isRunning = false;
492
+ }
493
+ }
494
+
495
+ /** Update refs (called when component re-renders) */
496
+ updateRefs(rootRef: any, navRef: any): void {
497
+ this.rootRef = rootRef;
498
+ this.navRef = navRef;
499
+ }
500
+
501
+ /** Check if agent is currently executing */
502
+ getIsRunning(): boolean {
503
+ return this.isRunning;
504
+ }
505
+ }