@mobileai/react-native 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/README.md +78 -7
  2. package/lib/module/components/AIAgent.js +40 -4
  3. package/lib/module/components/AIAgent.js.map +1 -1
  4. package/lib/module/components/AgentChatBar.js +177 -29
  5. package/lib/module/components/AgentChatBar.js.map +1 -1
  6. package/lib/module/core/AgentRuntime.js +268 -126
  7. package/lib/module/core/AgentRuntime.js.map +1 -1
  8. package/lib/module/core/FiberTreeWalker.js +74 -20
  9. package/lib/module/core/FiberTreeWalker.js.map +1 -1
  10. package/lib/module/core/systemPrompt.js +164 -0
  11. package/lib/module/core/systemPrompt.js.map +1 -0
  12. package/lib/module/providers/GeminiProvider.js +189 -73
  13. package/lib/module/providers/GeminiProvider.js.map +1 -1
  14. package/lib/typescript/src/components/AIAgent.d.ts +9 -1
  15. package/lib/typescript/src/components/AIAgent.d.ts.map +1 -1
  16. package/lib/typescript/src/components/AgentChatBar.d.ts +4 -3
  17. package/lib/typescript/src/components/AgentChatBar.d.ts.map +1 -1
  18. package/lib/typescript/src/core/AgentRuntime.d.ts +16 -0
  19. package/lib/typescript/src/core/AgentRuntime.d.ts.map +1 -1
  20. package/lib/typescript/src/core/FiberTreeWalker.d.ts +5 -0
  21. package/lib/typescript/src/core/FiberTreeWalker.d.ts.map +1 -1
  22. package/lib/typescript/src/core/systemPrompt.d.ts +9 -0
  23. package/lib/typescript/src/core/systemPrompt.d.ts.map +1 -0
  24. package/lib/typescript/src/core/types.d.ts +51 -13
  25. package/lib/typescript/src/core/types.d.ts.map +1 -1
  26. package/lib/typescript/src/providers/GeminiProvider.d.ts +33 -13
  27. package/lib/typescript/src/providers/GeminiProvider.d.ts.map +1 -1
  28. package/package.json +16 -14
  29. package/src/components/AIAgent.tsx +41 -1
  30. package/src/components/AgentChatBar.tsx +150 -28
  31. package/src/core/AgentRuntime.ts +287 -131
  32. package/src/core/FiberTreeWalker.ts +74 -19
  33. package/src/core/systemPrompt.ts +162 -0
  34. package/src/core/types.ts +58 -10
  35. package/src/providers/GeminiProvider.ts +174 -101
@@ -37,6 +37,55 @@ const SWITCH_TYPES = new Set(['Switch', 'RCTSwitch']);
37
37
  const TEXT_TYPES = new Set(['Text', 'RCTText']);
38
38
  // ScrollView/FlatList/SectionList detection can be added later for scroll tool
39
39
 
40
+ // ─── State Extraction (mirrors page-agent DEFAULT_INCLUDE_ATTRIBUTES) ──
41
+
42
+ /** Props to extract as state attributes — covers lazy devs who skip accessibility */
43
+ const STATE_PROPS = ['value', 'checked', 'selected', 'active', 'on', 'isOn', 'toggled', 'enabled'];
44
+
45
+ /**
46
+ * Extract state attributes from a fiber node's props.
47
+ * Mirrors page-agent's DEFAULT_INCLUDE_ATTRIBUTES extraction.
48
+ * Priority: accessibilityState > accessibilityRole > direct scalar props.
49
+ */
50
+ function extractStateAttributes(props: any): string {
51
+ const parts: string[] = [];
52
+
53
+ // Priority 1: accessibilityState (proper ARIA equivalent)
54
+ if (props.accessibilityState && typeof props.accessibilityState === 'object') {
55
+ for (const [k, v] of Object.entries(props.accessibilityState)) {
56
+ if (v !== undefined) parts.push(`${k}="${v}"`);
57
+ }
58
+ }
59
+
60
+ // Priority 2: accessibilityRole
61
+ if (props.accessibilityRole) {
62
+ parts.push(`role="${props.accessibilityRole}"`);
63
+ }
64
+
65
+ // Priority 3: Direct scalar props fallback (lazy developer support)
66
+ for (const key of STATE_PROPS) {
67
+ if (props[key] !== undefined && typeof props[key] !== 'function' && typeof props[key] !== 'object') {
68
+ parts.push(`${key}="${props[key]}"`);
69
+ }
70
+ }
71
+
72
+ return parts.join(' ');
73
+ }
74
+
75
+ /**
76
+ * Check if a node has ANY event handler prop (on* function).
77
+ * Mirrors RNTL's getEventHandlerFromProps pattern.
78
+ */
79
+ export function hasAnyEventHandler(props: any): boolean {
80
+ if (!props || typeof props !== 'object') return false;
81
+ for (const key of Object.keys(props)) {
82
+ if (key.startsWith('on') && typeof props[key] === 'function') {
83
+ return true;
84
+ }
85
+ }
86
+ return false;
87
+ }
88
+
40
89
  // ─── Fiber Node Helpers ────────────────────────────────────────
41
90
 
42
91
  /**
@@ -66,23 +115,27 @@ function getElementType(fiber: any): ElementType | null {
66
115
  const name = getComponentName(fiber);
67
116
  const props = fiber.memoizedProps || {};
68
117
 
69
- // Check by component name
118
+ // Check by component name (known React Native types)
70
119
  if (name && PRESSABLE_TYPES.has(name)) return 'pressable';
71
120
  if (name && TEXT_INPUT_TYPES.has(name)) return 'text-input';
72
121
  if (name && SWITCH_TYPES.has(name)) return 'switch';
73
122
 
74
- // Check by props any component with onPress is interactive
75
- if (props.onPress && typeof props.onPress === 'function') return 'pressable';
76
-
77
- // Check by accessibility role
123
+ // Check by accessibilityRole (covers custom components with proper ARIA)
78
124
  const role = props.accessibilityRole || props.role;
125
+ if (role === 'switch') return 'switch';
79
126
  if (role === 'button' || role === 'link' || role === 'checkbox' || role === 'radio') {
80
- if (props.onPress) return 'pressable';
127
+ return 'pressable';
81
128
  }
82
129
 
130
+ // Check by props — any component with onPress is interactive
131
+ if (props.onPress && typeof props.onPress === 'function') return 'pressable';
132
+
83
133
  // TextInput detection by props
84
134
  if (props.onChangeText && typeof props.onChangeText === 'function') return 'text-input';
85
135
 
136
+ // Switch detection by props (custom switches with onValueChange)
137
+ if (props.onValueChange && typeof props.onValueChange === 'function') return 'switch';
138
+
86
139
  return null;
87
140
  }
88
141
 
@@ -267,7 +320,7 @@ export function walkFiberTree(rootRef: any, config?: WalkConfig): WalkResult {
267
320
  let currentIndex = 0;
268
321
  const hasWhitelist = config?.interactiveWhitelist && (config.interactiveWhitelist.length ?? 0) > 0;
269
322
 
270
- function processNode(node: any, depth: number = 0): string {
323
+ function processNode(node: any, depth: number = 0, isInsideInteractive: boolean = false): string {
271
324
  if (!node) return '';
272
325
 
273
326
  const props = node.memoizedProps || {};
@@ -275,29 +328,28 @@ export function walkFiberTree(rootRef: any, config?: WalkConfig): WalkResult {
275
328
  // ── Security Constraints ──
276
329
  if (props.aiIgnore === true) return '';
277
330
  if (matchesRefList(node, config?.interactiveBlacklist)) {
278
- // Blacklisted nodes themselves aren't interactive, but we still walk children for structure
279
331
  let childText = '';
280
332
  let currentChild = node.child;
281
333
  while (currentChild) {
282
- childText += processNode(currentChild, depth);
334
+ childText += processNode(currentChild, depth, isInsideInteractive);
283
335
  currentChild = currentChild.sibling;
284
336
  }
285
337
  return childText;
286
338
  }
287
339
 
288
- // Process all children first
340
+ // Interactive check skip if already inside an interactive ancestor (dedup nested TextInput layers)
341
+ const isWhitelisted = matchesRefList(node, config?.interactiveWhitelist);
342
+ const elementType = getElementType(node);
343
+ const shouldInclude = !isInsideInteractive && (hasWhitelist ? isWhitelisted : (elementType && !isDisabled(node)));
344
+
345
+ // Process children — if this node IS interactive, children won't register as separate interactives
289
346
  let childrenText = '';
290
347
  let currentChild = node.child;
291
348
  while (currentChild) {
292
- childrenText += processNode(currentChild, depth + 1);
349
+ childrenText += processNode(currentChild, depth + 1, isInsideInteractive || !!shouldInclude);
293
350
  currentChild = currentChild.sibling;
294
351
  }
295
352
 
296
- // Interactive Check
297
- const isWhitelisted = matchesRefList(node, config?.interactiveWhitelist);
298
- const elementType = getElementType(node);
299
- const shouldInclude = hasWhitelist ? isWhitelisted : (elementType && !isDisabled(node));
300
-
301
353
  const indent = ' '.repeat(depth);
302
354
 
303
355
  if (shouldInclude) {
@@ -307,16 +359,19 @@ export function walkFiberTree(rootRef: any, config?: WalkConfig): WalkResult {
307
359
  label = props.placeholder;
308
360
  }
309
361
 
310
- // Record interactive element
311
362
  interactives.push({
312
363
  index: currentIndex,
313
364
  type: resolvedType,
314
365
  label: label || `[${resolvedType}]`,
315
366
  fiberNode: node,
316
- props: { ...props }, // snapshot
367
+ props: { ...props },
317
368
  });
318
369
 
319
- const elementOutput = `${indent}[${currentIndex}]<${resolvedType}>${label ? label + ' ' : ''}${childrenText.trim() ? childrenText.trim() : ''}</>\n`;
370
+ // Build output tag with state attributes (mirrors page-agent format)
371
+ const stateAttrs = extractStateAttributes(props);
372
+ const attrStr = stateAttrs ? ` ${stateAttrs}` : '';
373
+ const textContent = label || '';
374
+ const elementOutput = `${indent}[${currentIndex}]<${resolvedType}${attrStr}>${textContent} />${childrenText.trim() ? '\n' + childrenText : ''}\n`;
320
375
  currentIndex++;
321
376
  return elementOutput;
322
377
  }
@@ -0,0 +1,162 @@
1
+ /**
2
+ * System prompt for the AI agent — adapted from page-agent reference.
3
+ *
4
+ * Separated into its own file for maintainability.
5
+ * The prompt uses XML-style tags (matching page-agent's structure)
6
+ * to give the LLM clear, structured instructions.
7
+ */
8
+
9
+ export function buildSystemPrompt(language: string): string {
10
+ const isArabic = language === 'ar';
11
+
12
+ return `You are an AI agent designed to operate in an iterative loop to automate tasks in a React Native mobile app. Your ultimate goal is accomplishing the task provided in <user_request>.
13
+
14
+ <intro>
15
+ You excel at the following tasks:
16
+ 1. Reading and understanding mobile app screens to extract precise information
17
+ 2. Automating UI interactions like tapping buttons and filling forms
18
+ 3. Gathering information from the screen and reporting it to the user
19
+ 4. Operating effectively in an agent loop
20
+ 5. Answering user questions based on what is visible on screen
21
+ </intro>
22
+
23
+ <language_settings>
24
+ ${isArabic ? '- Working language: **Arabic**. Respond in Arabic.' : '- Working language: **English**. Respond in English.'}
25
+ - Use the language that the user is using. Return in user's language.
26
+ </language_settings>
27
+
28
+ <input>
29
+ At every step, your input will consist of:
30
+ 1. <agent_history>: Your previous steps and their results.
31
+ 2. <user_request>: The user's original request.
32
+ 3. <screen_state>: Current screen name, available screens, and interactive elements indexed for actions.
33
+
34
+ Agent history uses the following format per step:
35
+ <step_N>
36
+ Previous Goal Eval: Assessment of last action
37
+ Memory: Key facts to remember
38
+ Plan: What you did next
39
+ Action Result: Result of the action
40
+ </step_N>
41
+
42
+ System messages may appear as <sys>...</sys> between steps.
43
+ </input>
44
+
45
+ <screen_state>
46
+ Interactive elements are listed as [index]<type attrs>label />
47
+ - index: numeric identifier for interaction
48
+ - type: element type (pressable, text-input, switch)
49
+ - attrs: state attributes like value="true", checked="false", role="switch"
50
+ - label: visible text content of the element
51
+
52
+ Only elements with [index] are interactive. Use the index to tap or type into them.
53
+ Pure text elements without [] are NOT interactive — they are informational content you can read.
54
+ </screen_state>
55
+
56
+ <tools>
57
+ Available tools:
58
+ - tap(index): Tap an interactive element by its index. Works universally on buttons, switches, and custom components. For switches, this toggles their state.
59
+ - type(index, text): Type text into a text-input element by its index.
60
+ - navigate(screen, params): Navigate to a specific screen. params is optional JSON object.
61
+ - done(text, success): Complete task. Text is your final response to the user — keep it concise unless the user explicitly asks for detail.
62
+ - ask_user(question): Ask the user for clarification ONLY when you cannot determine what action to take.
63
+ </tools>
64
+
65
+ <rules>
66
+ - There are 2 types of requests — always determine which type BEFORE acting:
67
+ 1. Information requests (e.g. "what's available?", "how much is X?", "list the items"):
68
+ Read the screen content and call done() with the answer. Do NOT perform any tap/type/navigate actions.
69
+ 2. Action requests (e.g. "add margherita to cart", "go to checkout", "fill in my name"):
70
+ Execute the required UI interactions using tap/type/navigate tools.
71
+ - For action requests, determine whether the user gave specific step-by-step instructions or an open-ended task:
72
+ 1. Specific instructions: Follow each step precisely, do not skip.
73
+ 2. Open-ended tasks: Plan the steps yourself.
74
+ - Only interact with elements that have an [index].
75
+ - After tapping an element, the screen may change. Wait for the next step to see updated elements.
76
+ - If the current screen doesn't have what you need, use navigate() to go to another screen.
77
+ - If a tap navigates to another screen, the next step will show the new screen's elements.
78
+ - Do not repeat one action for more than 3 times unless some conditions changed.
79
+ - After typing into a text input, check if the screen changed (e.g., suggestions or autocomplete appeared). If so, interact with the new elements.
80
+ - After typing into a search field, you may need to tap a search button, press enter, or select from a dropdown to complete the search.
81
+ - If the user request includes specific details (product type, price, category), use available filters or search to be more efficient.
82
+ - Do not fill in login/signup forms unless the user provides credentials. If asked to log in, use ask_user to request their email and password first.
83
+ - Do not guess or auto-fill sensitive data (passwords, payment info, personal details). Always ask the user.
84
+ - Trying too hard can be harmful. If stuck, call done() with partial results rather than repeating failed actions.
85
+ - If you do not know how to proceed with the current screen, use ask_user to request specific instructions from the user.
86
+ </rules>
87
+
88
+ <task_completion_rules>
89
+ You must call the done action in one of these cases:
90
+ - When you have fully completed the USER REQUEST.
91
+ - When the user asked for information and you can see the answer on screen.
92
+ - When you reach the final allowed step, even if the task is incomplete.
93
+ - When you feel stuck or unable to solve the user request.
94
+
95
+ BEFORE calling done() for action requests that changed state (added items, submitted forms, etc.):
96
+ 1. First, navigate to the result screen (e.g., Cart, confirmation, order summary) so the user can see the outcome.
97
+ 2. Wait for the next step to see the result screen content.
98
+ 3. THEN call done() with a summary of what you did.
99
+ Do NOT call done() immediately after the last action — the user needs to SEE the result.
100
+
101
+ The done action is your opportunity to communicate findings and provide a coherent reply to the user:
102
+ - Set success to true only if the full USER REQUEST has been completed.
103
+ - Use the text field to answer questions, summarize what you found, or explain what you did.
104
+ - You are ONLY ALLOWED to call done as a single action. Do not call it together with other actions.
105
+
106
+ The ask_user action should ONLY be used when the user gave an action request but you lack specific information to execute it (e.g., user says "order a pizza" but there are multiple options and you don't know which one).
107
+ - Do NOT use ask_user to confirm actions the user explicitly requested. If they said "place my order", just do it.
108
+ - NEVER ask for the same confirmation twice. If the user already answered, proceed with their answer.
109
+ - For destructive/purchase actions (place order, delete, pay), tap the button exactly ONCE. Do not repeat the same action — the user could be charged multiple times.
110
+ </task_completion_rules>
111
+
112
+ <capability>
113
+ - It is ok to just provide information without performing any actions.
114
+ - User can ask questions about what's on screen — answer them directly via done().
115
+ - It is ok to fail the task. User would rather you report failure than repeat failed actions endlessly.
116
+ - The user can be wrong. If the request is not achievable, tell the user via done().
117
+ - The app can have bugs. If something is not working as expected, report it to the user.
118
+ </capability>
119
+
120
+ <ux_rules>
121
+ UX best practices for mobile agent interactions:
122
+ - Confirm what you did: When completing actions, summarize exactly what happened (e.g., "Added 2x Margherita ($10 each) to your cart. Total: $20").
123
+ - Be transparent about errors: If an action fails, explain what failed and why — do not silently skip it or pretend it succeeded.
124
+ - Track multi-item progress: For requests involving multiple items, keep track and report which ones succeeded and which did not.
125
+ - Stay on the user's screen: For information requests, read from the current screen. Only navigate away if the needed information is on another screen.
126
+ - Fail gracefully: If stuck after multiple attempts, call done() with what you accomplished and what remains, rather than repeating failed actions.
127
+ - Be concise: Keep responses short and actionable. Users are on mobile — avoid walls of text.
128
+ - Suggest next steps: After completing an action, briefly suggest what the user might want to do next (e.g., "Added to cart. Would you like to checkout or add more items?").
129
+ - When a request is ambiguous, pick the most common interpretation rather than always asking. State your assumption in the done() text.
130
+ </ux_rules>
131
+
132
+ <reasoning_rules>
133
+ Exhibit the following reasoning patterns to successfully achieve the <user_request>:
134
+ - Reason about <agent_history> to track progress and context toward <user_request>.
135
+ - Analyze the most recent action result in <agent_history> and clearly state what you previously tried to achieve.
136
+ - Explicitly judge success/failure of the last action. If the expected change is missing, mark the last action as failed and plan a recovery.
137
+ - Analyze whether you are stuck, e.g. when you repeat the same actions multiple times without any progress. Then consider alternative approaches.
138
+ - If you see information relevant to <user_request>, include it in your response via done().
139
+ - Always compare the current trajectory with the user request — make sure every action moves you closer to the goal.
140
+ - Save important information to memory: field values you collected, items found, pages visited, etc.
141
+ </reasoning_rules>
142
+
143
+ <output>
144
+ You MUST call the agent_step tool on every step. Provide:
145
+
146
+ 1. previous_goal_eval: "One-sentence result of your last action — success, failure, or uncertain. Skip on first step."
147
+ 2. memory: "Key facts to persist: values collected, items found, progress so far. Be specific."
148
+ 3. plan: "Your immediate next goal — what action you will take and why."
149
+ 4. action_name: Choose one action to execute
150
+ 5. Action parameters (index, text, screen, etc. depending on the action)
151
+
152
+ Examples:
153
+
154
+ previous_goal_eval: "Typed email into field [0]. Verdict: Success"
155
+ memory: "Email: user@test.com entered. Still need password."
156
+ plan: "Ask the user for their password using ask_user."
157
+
158
+ previous_goal_eval: "Navigated to Cart screen. Verdict: Success"
159
+ memory: "Added 2x Margherita pizza. Cart total visible."
160
+ plan: "Call done to report the cart contents to the user."
161
+ </output>`;
162
+ }
package/src/core/types.ts CHANGED
@@ -19,7 +19,8 @@ export interface InteractiveElement {
19
19
  props: {
20
20
  onPress?: (...args: any[]) => void;
21
21
  onChangeText?: (text: string) => void;
22
- value?: string;
22
+ onValueChange?: (value: boolean) => void;
23
+ value?: string | boolean;
23
24
  placeholder?: string;
24
25
  checked?: boolean;
25
26
  disabled?: boolean;
@@ -45,11 +46,7 @@ export interface DehydratedScreen {
45
46
 
46
47
  export interface AgentStep {
47
48
  stepIndex: number;
48
- reflection: {
49
- evaluationPreviousGoal: string;
50
- memory: string;
51
- nextGoal: string;
52
- };
49
+ reflection: AgentReasoning;
53
50
  action: {
54
51
  name: string;
55
52
  input: Record<string, any>;
@@ -129,6 +126,40 @@ export interface AgentConfig {
129
126
  /** Delay between steps in ms (page-agent default: 400ms). */
130
127
  stepDelay?: number;
131
128
 
129
+ // ─── Status Updates ──────────────────────────────────────────────────────
130
+
131
+ /**
132
+ * Called with a human-readable status string at each step.
133
+ * Use this to show dynamic loading text (e.g., "Tapping 'Add'...").
134
+ */
135
+ onStatusUpdate?: (status: string) => void;
136
+
137
+ /**
138
+ * Callback for when agent needs user input (ask_user tool).
139
+ * Mirrors page-agent: the agent loop blocks until the user responds.
140
+ * If not set, ask_user tool will break the loop (legacy behavior).
141
+ * @example onAskUser: (q) => new Promise(resolve => showPrompt(q, resolve))
142
+ */
143
+ onAskUser?: (question: string) => Promise<string>;
144
+
145
+ // ─── Expo Router Support ─────────────────────────────────────────────────
146
+
147
+ /**
148
+ * Expo Router instance (from useRouter()).
149
+ * When provided, the navigate tool uses router.push('/path') instead of navRef.navigate().
150
+ */
151
+ router?: {
152
+ push: (href: string) => void;
153
+ replace: (href: string) => void;
154
+ back: () => void;
155
+ };
156
+
157
+ /**
158
+ * Current pathname from Expo Router (from usePathname()).
159
+ * Used to determine the current screen when using Expo Router.
160
+ */
161
+ pathname?: string;
162
+
132
163
  // ─── MCP Bridge Integration ──────────────────────────────────────────────
133
164
 
134
165
  /**
@@ -172,14 +203,31 @@ export interface ActionDefinition {
172
203
 
173
204
  // ─── Provider Interface ──────────────────────────────────────
174
205
 
206
+ /** Structured reasoning returned per step via the agent_step tool. */
207
+ export interface AgentReasoning {
208
+ /** Assessment of whether the previous action succeeded or failed. */
209
+ previousGoalEval: string;
210
+ /** What to remember for future steps (progress, items found, etc). */
211
+ memory: string;
212
+ /** The immediate next goal and why. */
213
+ plan: string;
214
+ }
215
+
216
+ /** Result from the AI provider's generateContent call. */
217
+ export interface ProviderResult {
218
+ /** Extracted action tool call (action_name + params). */
219
+ toolCalls: Array<{ name: string; args: Record<string, any> }>;
220
+ /** Structured reasoning from MacroTool (evaluation, memory, next_goal). */
221
+ reasoning: AgentReasoning;
222
+ /** Raw text response (if any). */
223
+ text?: string;
224
+ }
225
+
175
226
  export interface AIProvider {
176
227
  generateContent(
177
228
  systemPrompt: string,
178
229
  userMessage: string,
179
230
  tools: ToolDefinition[],
180
231
  history: AgentStep[],
181
- ): Promise<{
182
- toolCalls: Array<{ name: string; args: Record<string, any> }>;
183
- text?: string;
184
- }>;
232
+ ): Promise<ProviderResult>;
185
233
  }