react-native-agentic-ai 0.0.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/LICENSE +20 -0
  2. package/README.md +252 -14
  3. package/lib/module/components/AIAgent.js +185 -0
  4. package/lib/module/components/AIAgent.js.map +1 -0
  5. package/lib/module/components/AgentChatBar.js +268 -0
  6. package/lib/module/components/AgentChatBar.js.map +1 -0
  7. package/lib/module/components/AgentOverlay.js +53 -0
  8. package/lib/module/components/AgentOverlay.js.map +1 -0
  9. package/lib/module/core/AgentRuntime.js +640 -0
  10. package/lib/module/core/AgentRuntime.js.map +1 -0
  11. package/lib/module/core/FiberTreeWalker.js +362 -0
  12. package/lib/module/core/FiberTreeWalker.js.map +1 -0
  13. package/lib/module/core/MCPBridge.js +98 -0
  14. package/lib/module/core/MCPBridge.js.map +1 -0
  15. package/lib/module/core/ScreenDehydrator.js +46 -0
  16. package/lib/module/core/ScreenDehydrator.js.map +1 -0
  17. package/lib/module/core/systemPrompt.js +164 -0
  18. package/lib/module/core/systemPrompt.js.map +1 -0
  19. package/lib/module/core/types.js +2 -0
  20. package/lib/module/core/types.js.map +1 -0
  21. package/lib/module/hooks/useAction.js +32 -0
  22. package/lib/module/hooks/useAction.js.map +1 -0
  23. package/lib/module/index.js +17 -0
  24. package/lib/module/index.js.map +1 -0
  25. package/lib/module/package.json +1 -0
  26. package/lib/module/providers/GeminiProvider.js +294 -0
  27. package/lib/module/providers/GeminiProvider.js.map +1 -0
  28. package/lib/module/utils/logger.js +17 -0
  29. package/lib/module/utils/logger.js.map +1 -0
  30. package/lib/typescript/package.json +1 -0
  31. package/lib/typescript/src/components/AIAgent.d.ts +65 -0
  32. package/lib/typescript/src/components/AIAgent.d.ts.map +1 -0
  33. package/lib/typescript/src/components/AgentChatBar.d.ts +15 -0
  34. package/lib/typescript/src/components/AgentChatBar.d.ts.map +1 -0
  35. package/lib/typescript/src/components/AgentOverlay.d.ts +10 -0
  36. package/lib/typescript/src/components/AgentOverlay.d.ts.map +1 -0
  37. package/lib/typescript/src/core/AgentRuntime.d.ts +53 -0
  38. package/lib/typescript/src/core/AgentRuntime.d.ts.map +1 -0
  39. package/lib/typescript/src/core/FiberTreeWalker.d.ts +31 -0
  40. package/lib/typescript/src/core/FiberTreeWalker.d.ts.map +1 -0
  41. package/lib/typescript/src/core/MCPBridge.d.ts +23 -0
  42. package/lib/typescript/src/core/MCPBridge.d.ts.map +1 -0
  43. package/lib/typescript/src/core/ScreenDehydrator.d.ts +20 -0
  44. package/lib/typescript/src/core/ScreenDehydrator.d.ts.map +1 -0
  45. package/lib/typescript/src/core/systemPrompt.d.ts +9 -0
  46. package/lib/typescript/src/core/systemPrompt.d.ts.map +1 -0
  47. package/lib/typescript/src/core/types.d.ts +176 -0
  48. package/lib/typescript/src/core/types.d.ts.map +1 -0
  49. package/lib/typescript/src/hooks/useAction.d.ts +13 -0
  50. package/lib/typescript/src/hooks/useAction.d.ts.map +1 -0
  51. package/lib/typescript/src/index.d.ts +10 -0
  52. package/lib/typescript/src/index.d.ts.map +1 -0
  53. package/lib/typescript/src/providers/GeminiProvider.d.ts +43 -0
  54. package/lib/typescript/src/providers/GeminiProvider.d.ts.map +1 -0
  55. package/lib/typescript/src/utils/logger.d.ts +7 -0
  56. package/lib/typescript/src/utils/logger.d.ts.map +1 -0
  57. package/package.json +135 -12
  58. package/src/components/AIAgent.tsx +262 -0
  59. package/src/components/AgentChatBar.tsx +258 -0
  60. package/src/components/AgentOverlay.tsx +48 -0
  61. package/src/core/AgentRuntime.ts +661 -0
  62. package/src/core/FiberTreeWalker.ts +404 -0
  63. package/src/core/MCPBridge.ts +110 -0
  64. package/src/core/ScreenDehydrator.ts +53 -0
  65. package/src/core/systemPrompt.ts +162 -0
  66. package/src/core/types.ts +233 -0
  67. package/src/hooks/useAction.ts +40 -0
  68. package/src/index.ts +22 -0
  69. package/src/providers/GeminiProvider.ts +283 -0
  70. package/src/utils/logger.ts +21 -0
@@ -0,0 +1,283 @@
1
+ /**
2
+ * GeminiProvider — Gemini API integration with structured action pattern.
3
+ *
4
+ * Uses a single forced function call (`agent_step`) that bundles
5
+ * structured reasoning (evaluation, memory, plan) alongside the action.
6
+ * This replaces free-form text + separate tool calls for stability.
7
+ */
8
+
9
+ import { logger } from '../utils/logger';
10
+ import type { AIProvider, ToolDefinition, AgentStep, ProviderResult, AgentReasoning } from '../core/types';
11
+
12
+ // ─── Constants ─────────────────────────────────────────────────
13
+
14
+ const AGENT_STEP_FN = 'agent_step';
15
+
16
+ // Reasoning fields that are always present in the agent_step schema
17
+ const REASONING_FIELDS = ['previous_goal_eval', 'memory', 'plan'] as const;
18
+
19
+ // ─── Gemini API Types ──────────────────────────────────────────
20
+
21
+ interface GeminiContent {
22
+ role: 'user' | 'model';
23
+ parts: Array<{
24
+ text?: string;
25
+ functionCall?: { name: string; args: any };
26
+ functionResponse?: { name: string; response: any };
27
+ }>;
28
+ }
29
+
30
+ // ─── Provider ──────────────────────────────────────────────────
31
+
32
+ export class GeminiProvider implements AIProvider {
33
+ private apiKey: string;
34
+ private model: string;
35
+
36
+ constructor(apiKey: string, model: string = 'gemini-2.5-flash') {
37
+ this.apiKey = apiKey;
38
+ this.model = model;
39
+ }
40
+
41
+ async generateContent(
42
+ systemPrompt: string,
43
+ userMessage: string,
44
+ tools: ToolDefinition[],
45
+ history: AgentStep[],
46
+ ): Promise<ProviderResult> {
47
+
48
+ logger.info('GeminiProvider', `Sending request. Model: ${this.model}, Tools: ${tools.length}`);
49
+
50
+ // Build single agent_step function declaration
51
+ const agentStepDeclaration = this.buildAgentStepDeclaration(tools);
52
+
53
+ // Build conversation history with proper function call/response pairs
54
+ const contents = this.buildContents(userMessage, history);
55
+
56
+ // Make API request
57
+ const url = `https://generativelanguage.googleapis.com/v1beta/models/${this.model}:generateContent?key=${this.apiKey}`;
58
+
59
+ const body: any = {
60
+ contents,
61
+ tools: [{ functionDeclarations: [agentStepDeclaration] }],
62
+ systemInstruction: { parts: [{ text: systemPrompt }] },
63
+ // Force the model to always call agent_step
64
+ tool_config: {
65
+ function_calling_config: {
66
+ mode: 'ANY',
67
+ allowed_function_names: [AGENT_STEP_FN],
68
+ },
69
+ },
70
+ generationConfig: {
71
+ temperature: 0.2,
72
+ maxOutputTokens: 2048,
73
+ },
74
+ };
75
+
76
+ const startTime = Date.now();
77
+
78
+ try {
79
+ const response = await fetch(url, {
80
+ method: 'POST',
81
+ headers: { 'Content-Type': 'application/json' },
82
+ body: JSON.stringify(body),
83
+ });
84
+
85
+ const elapsed = Date.now() - startTime;
86
+ logger.info('GeminiProvider', `Response received in ${elapsed}ms`);
87
+
88
+ if (!response.ok) {
89
+ const errorText = await response.text();
90
+ logger.error('GeminiProvider', `API error ${response.status}: ${errorText}`);
91
+ throw new Error(`Gemini API error ${response.status}: ${errorText}`);
92
+ }
93
+
94
+ const data = await response.json();
95
+
96
+ return this.parseAgentStepResponse(data, tools);
97
+ } catch (error: any) {
98
+ logger.error('GeminiProvider', 'Request failed:', error.message);
99
+ throw error;
100
+ }
101
+ }
102
+
103
+ // ─── Build agent_step Declaration ──────────────────────────
104
+
105
+ /**
106
+ * Builds a single `agent_step` function declaration that combines:
107
+ * - Structured reasoning fields (previous_goal_eval, memory, plan)
108
+ * - action_name (enum of all available tool names)
109
+ * - All tool parameter fields as flat top-level properties
110
+ *
111
+ * Flat schema avoids Gemini's "deeply nested schema" rejection in ANY mode.
112
+ */
113
+ private buildAgentStepDeclaration(tools: ToolDefinition[]): any {
114
+ const toolNames = tools.map(t => t.name);
115
+
116
+ // Collect all unique parameter fields across all tools
117
+ const actionProperties: Record<string, any> = {};
118
+ for (const tool of tools) {
119
+ for (const [paramName, param] of Object.entries(tool.parameters)) {
120
+ // Skip if already added (shared field names like 'text', 'index')
121
+ if (actionProperties[paramName]) continue;
122
+ actionProperties[paramName] = {
123
+ type: this.mapParamType(param.type),
124
+ description: param.description,
125
+ ...(param.enum ? { enum: param.enum } : {}),
126
+ };
127
+ }
128
+ }
129
+
130
+ // Build tool descriptions for the action_name enum
131
+ const toolDescriptions = tools
132
+ .map(t => {
133
+ const params = Object.keys(t.parameters).join(', ');
134
+ return `- ${t.name}(${params}): ${t.description}`;
135
+ })
136
+ .join('\n');
137
+
138
+ return {
139
+ name: AGENT_STEP_FN,
140
+ description: `Execute one agent step. Choose an action and provide reasoning.\n\nAvailable actions:\n${toolDescriptions}`,
141
+ parameters: {
142
+ type: 'OBJECT',
143
+ properties: {
144
+ // ── Reasoning fields ──
145
+ previous_goal_eval: {
146
+ type: 'STRING',
147
+ description: 'One-sentence assessment of your last action. State success, failure, or uncertain. Skip on first step.',
148
+ },
149
+ memory: {
150
+ type: 'STRING',
151
+ description: 'Key facts to remember for future steps: progress made, items found, counters, field values already collected.',
152
+ },
153
+ plan: {
154
+ type: 'STRING',
155
+ description: 'Your immediate next goal — what action you will take and why.',
156
+ },
157
+ // ── Action selection ──
158
+ action_name: {
159
+ type: 'STRING',
160
+ description: 'Which action to execute.',
161
+ enum: toolNames,
162
+ },
163
+ // ── Action parameters (flat) ──
164
+ ...actionProperties,
165
+ },
166
+ required: ['plan', 'action_name'],
167
+ },
168
+ };
169
+ }
170
+
171
+ private mapParamType(type: string): string {
172
+ switch (type) {
173
+ case 'number': return 'NUMBER';
174
+ case 'integer': return 'INTEGER';
175
+ case 'boolean': return 'BOOLEAN';
176
+ case 'string':
177
+ default: return 'STRING';
178
+ }
179
+ }
180
+
181
+ // ─── Build Contents ────────────────────────────────────────
182
+
183
+ /**
184
+ * Builds Gemini conversation contents.
185
+ *
186
+ * Each step is a STATELESS single-turn request (matching page-agent's approach):
187
+ * - System prompt has general instructions
188
+ * - User message contains full context: task, history, screen state
189
+ * - Model responds with agent_step function call
190
+ *
191
+ * History is embedded as text in assembleUserPrompt (via <agent_history>),
192
+ * NOT as functionCall/functionResponse pairs. This avoids Gemini's
193
+ * conversation format requirements and thought_signature complexity.
194
+ */
195
+ private buildContents(userMessage: string, _history: AgentStep[]): GeminiContent[] {
196
+ return [{
197
+ role: 'user',
198
+ parts: [{ text: userMessage }],
199
+ }];
200
+ }
201
+
202
+ // ─── Parse Response ────────────────────────────────────────
203
+
204
+ /**
205
+ * Parses the Gemini response expecting a single agent_step function call.
206
+ * Extracts structured reasoning + action, and determines which tool to execute.
207
+ */
208
+ private parseAgentStepResponse(data: any, tools: ToolDefinition[]): ProviderResult {
209
+ if (!data.candidates || data.candidates.length === 0) {
210
+ logger.warn('GeminiProvider', 'No candidates in response');
211
+ return {
212
+ toolCalls: [{ name: 'done', args: { text: 'No response generated.', success: false } }],
213
+ reasoning: { previousGoalEval: '', memory: '', plan: '' },
214
+ text: 'No response generated.',
215
+ };
216
+ }
217
+
218
+ const candidate = data.candidates[0];
219
+ const parts = candidate.content?.parts || [];
220
+
221
+ // Find the function call part
222
+ const fnCallPart = parts.find((p: any) => p.functionCall);
223
+ const textPart = parts.find((p: any) => p.text);
224
+
225
+ if (!fnCallPart?.functionCall) {
226
+ logger.warn('GeminiProvider', 'No function call in response. Text:', textPart?.text);
227
+ return {
228
+ toolCalls: [{ name: 'done', args: { text: textPart?.text || 'No action taken.', success: false } }],
229
+ reasoning: { previousGoalEval: '', memory: '', plan: '' },
230
+ text: textPart?.text,
231
+ };
232
+ }
233
+
234
+ const args = fnCallPart.functionCall.args || {};
235
+
236
+ // Extract reasoning fields
237
+ const reasoning: AgentReasoning = {
238
+ previousGoalEval: args.previous_goal_eval || '',
239
+ memory: args.memory || '',
240
+ plan: args.plan || '',
241
+ };
242
+
243
+ // Extract action
244
+ const actionName = args.action_name;
245
+ if (!actionName) {
246
+ logger.warn('GeminiProvider', 'No action_name in agent_step. Falling back to done.');
247
+ return {
248
+ toolCalls: [{ name: 'done', args: { text: 'Agent did not choose an action.', success: false } }],
249
+ reasoning,
250
+ text: textPart?.text,
251
+ };
252
+ }
253
+
254
+ // Build action args: everything except reasoning fields and action_name
255
+ const actionArgs: Record<string, any> = {};
256
+ const reservedKeys = new Set([...REASONING_FIELDS, 'action_name']);
257
+
258
+ // Find the matching tool to know which params belong to it
259
+ const matchedTool = tools.find(t => t.name === actionName);
260
+ if (matchedTool) {
261
+ for (const paramName of Object.keys(matchedTool.parameters)) {
262
+ if (args[paramName] !== undefined) {
263
+ actionArgs[paramName] = args[paramName];
264
+ }
265
+ }
266
+ } else {
267
+ // Custom/registered tool — grab all non-reserved fields
268
+ for (const [key, value] of Object.entries(args)) {
269
+ if (!reservedKeys.has(key)) {
270
+ actionArgs[key] = value;
271
+ }
272
+ }
273
+ }
274
+
275
+ logger.info('GeminiProvider', `Parsed: action=${actionName}, plan="${reasoning.plan}"`);
276
+
277
+ return {
278
+ toolCalls: [{ name: actionName, args: actionArgs }],
279
+ reasoning,
280
+ text: textPart?.text,
281
+ };
282
+ }
283
+ }
@@ -0,0 +1,21 @@
1
+ /**
2
+ * Logger utility — prefixed console output for easy filtering.
3
+ */
4
+ const TAG = '[AIAgent]';
5
+
6
+ export const logger = {
7
+ info: (context: string, ...args: any[]) =>
8
+ console.log(`${TAG} [${context}]`, ...args),
9
+
10
+ warn: (context: string, ...args: any[]) =>
11
+ console.warn(`${TAG} [${context}]`, ...args),
12
+
13
+ error: (context: string, ...args: any[]) =>
14
+ console.error(`${TAG} [${context}]`, ...args),
15
+
16
+ debug: (context: string, ...args: any[]) => {
17
+ if (__DEV__) {
18
+ console.log(`${TAG} [${context}] 🐛`, ...args);
19
+ }
20
+ },
21
+ };