react-native-agentic-ai 0.0.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +20 -0
- package/README.md +253 -14
- package/lib/module/components/AIAgent.js +185 -0
- package/lib/module/components/AIAgent.js.map +1 -0
- package/lib/module/components/AgentChatBar.js +268 -0
- package/lib/module/components/AgentChatBar.js.map +1 -0
- package/lib/module/components/AgentOverlay.js +53 -0
- package/lib/module/components/AgentOverlay.js.map +1 -0
- package/lib/module/core/AgentRuntime.js +640 -0
- package/lib/module/core/AgentRuntime.js.map +1 -0
- package/lib/module/core/FiberTreeWalker.js +362 -0
- package/lib/module/core/FiberTreeWalker.js.map +1 -0
- package/lib/module/core/MCPBridge.js +98 -0
- package/lib/module/core/MCPBridge.js.map +1 -0
- package/lib/module/core/ScreenDehydrator.js +46 -0
- package/lib/module/core/ScreenDehydrator.js.map +1 -0
- package/lib/module/core/systemPrompt.js +164 -0
- package/lib/module/core/systemPrompt.js.map +1 -0
- package/lib/module/core/types.js +2 -0
- package/lib/module/core/types.js.map +1 -0
- package/lib/module/hooks/useAction.js +32 -0
- package/lib/module/hooks/useAction.js.map +1 -0
- package/lib/module/index.js +17 -0
- package/lib/module/index.js.map +1 -0
- package/lib/module/package.json +1 -0
- package/lib/module/providers/GeminiProvider.js +294 -0
- package/lib/module/providers/GeminiProvider.js.map +1 -0
- package/lib/module/utils/logger.js +17 -0
- package/lib/module/utils/logger.js.map +1 -0
- package/lib/typescript/package.json +1 -0
- package/lib/typescript/src/components/AIAgent.d.ts +65 -0
- package/lib/typescript/src/components/AIAgent.d.ts.map +1 -0
- package/lib/typescript/src/components/AgentChatBar.d.ts +15 -0
- package/lib/typescript/src/components/AgentChatBar.d.ts.map +1 -0
- package/lib/typescript/src/components/AgentOverlay.d.ts +10 -0
- package/lib/typescript/src/components/AgentOverlay.d.ts.map +1 -0
- package/lib/typescript/src/core/AgentRuntime.d.ts +53 -0
- package/lib/typescript/src/core/AgentRuntime.d.ts.map +1 -0
- package/lib/typescript/src/core/FiberTreeWalker.d.ts +31 -0
- package/lib/typescript/src/core/FiberTreeWalker.d.ts.map +1 -0
- package/lib/typescript/src/core/MCPBridge.d.ts +23 -0
- package/lib/typescript/src/core/MCPBridge.d.ts.map +1 -0
- package/lib/typescript/src/core/ScreenDehydrator.d.ts +20 -0
- package/lib/typescript/src/core/ScreenDehydrator.d.ts.map +1 -0
- package/lib/typescript/src/core/systemPrompt.d.ts +9 -0
- package/lib/typescript/src/core/systemPrompt.d.ts.map +1 -0
- package/lib/typescript/src/core/types.d.ts +176 -0
- package/lib/typescript/src/core/types.d.ts.map +1 -0
- package/lib/typescript/src/hooks/useAction.d.ts +13 -0
- package/lib/typescript/src/hooks/useAction.d.ts.map +1 -0
- package/lib/typescript/src/index.d.ts +10 -0
- package/lib/typescript/src/index.d.ts.map +1 -0
- package/lib/typescript/src/providers/GeminiProvider.d.ts +43 -0
- package/lib/typescript/src/providers/GeminiProvider.d.ts.map +1 -0
- package/lib/typescript/src/utils/logger.d.ts +7 -0
- package/lib/typescript/src/utils/logger.d.ts.map +1 -0
- package/package.json +135 -12
- package/src/components/AIAgent.tsx +262 -0
- package/src/components/AgentChatBar.tsx +258 -0
- package/src/components/AgentOverlay.tsx +48 -0
- package/src/core/AgentRuntime.ts +661 -0
- package/src/core/FiberTreeWalker.ts +404 -0
- package/src/core/MCPBridge.ts +110 -0
- package/src/core/ScreenDehydrator.ts +53 -0
- package/src/core/systemPrompt.ts +162 -0
- package/src/core/types.ts +233 -0
- package/src/hooks/useAction.ts +40 -0
- package/src/index.ts +22 -0
- package/src/providers/GeminiProvider.ts +283 -0
- package/src/utils/logger.ts +21 -0
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* GeminiProvider — Gemini API integration with structured action pattern.
|
|
3
|
+
*
|
|
4
|
+
* Uses a single forced function call (`agent_step`) that bundles
|
|
5
|
+
* structured reasoning (evaluation, memory, plan) alongside the action.
|
|
6
|
+
* This replaces free-form text + separate tool calls for stability.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { logger } from '../utils/logger';
|
|
10
|
+
import type { AIProvider, ToolDefinition, AgentStep, ProviderResult, AgentReasoning } from '../core/types';
|
|
11
|
+
|
|
12
|
+
// ─── Constants ─────────────────────────────────────────────────
|
|
13
|
+
|
|
14
|
+
const AGENT_STEP_FN = 'agent_step';
|
|
15
|
+
|
|
16
|
+
// Reasoning fields that are always present in the agent_step schema
|
|
17
|
+
const REASONING_FIELDS = ['previous_goal_eval', 'memory', 'plan'] as const;
|
|
18
|
+
|
|
19
|
+
// ─── Gemini API Types ──────────────────────────────────────────
|
|
20
|
+
|
|
21
|
+
interface GeminiContent {
|
|
22
|
+
role: 'user' | 'model';
|
|
23
|
+
parts: Array<{
|
|
24
|
+
text?: string;
|
|
25
|
+
functionCall?: { name: string; args: any };
|
|
26
|
+
functionResponse?: { name: string; response: any };
|
|
27
|
+
}>;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
// ─── Provider ──────────────────────────────────────────────────
|
|
31
|
+
|
|
32
|
+
export class GeminiProvider implements AIProvider {
|
|
33
|
+
private apiKey: string;
|
|
34
|
+
private model: string;
|
|
35
|
+
|
|
36
|
+
constructor(apiKey: string, model: string = 'gemini-2.5-flash') {
|
|
37
|
+
this.apiKey = apiKey;
|
|
38
|
+
this.model = model;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
async generateContent(
|
|
42
|
+
systemPrompt: string,
|
|
43
|
+
userMessage: string,
|
|
44
|
+
tools: ToolDefinition[],
|
|
45
|
+
history: AgentStep[],
|
|
46
|
+
): Promise<ProviderResult> {
|
|
47
|
+
|
|
48
|
+
logger.info('GeminiProvider', `Sending request. Model: ${this.model}, Tools: ${tools.length}`);
|
|
49
|
+
|
|
50
|
+
// Build single agent_step function declaration
|
|
51
|
+
const agentStepDeclaration = this.buildAgentStepDeclaration(tools);
|
|
52
|
+
|
|
53
|
+
// Build conversation history with proper function call/response pairs
|
|
54
|
+
const contents = this.buildContents(userMessage, history);
|
|
55
|
+
|
|
56
|
+
// Make API request
|
|
57
|
+
const url = `https://generativelanguage.googleapis.com/v1beta/models/${this.model}:generateContent?key=${this.apiKey}`;
|
|
58
|
+
|
|
59
|
+
const body: any = {
|
|
60
|
+
contents,
|
|
61
|
+
tools: [{ functionDeclarations: [agentStepDeclaration] }],
|
|
62
|
+
systemInstruction: { parts: [{ text: systemPrompt }] },
|
|
63
|
+
// Force the model to always call agent_step
|
|
64
|
+
tool_config: {
|
|
65
|
+
function_calling_config: {
|
|
66
|
+
mode: 'ANY',
|
|
67
|
+
allowed_function_names: [AGENT_STEP_FN],
|
|
68
|
+
},
|
|
69
|
+
},
|
|
70
|
+
generationConfig: {
|
|
71
|
+
temperature: 0.2,
|
|
72
|
+
maxOutputTokens: 2048,
|
|
73
|
+
},
|
|
74
|
+
};
|
|
75
|
+
|
|
76
|
+
const startTime = Date.now();
|
|
77
|
+
|
|
78
|
+
try {
|
|
79
|
+
const response = await fetch(url, {
|
|
80
|
+
method: 'POST',
|
|
81
|
+
headers: { 'Content-Type': 'application/json' },
|
|
82
|
+
body: JSON.stringify(body),
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
const elapsed = Date.now() - startTime;
|
|
86
|
+
logger.info('GeminiProvider', `Response received in ${elapsed}ms`);
|
|
87
|
+
|
|
88
|
+
if (!response.ok) {
|
|
89
|
+
const errorText = await response.text();
|
|
90
|
+
logger.error('GeminiProvider', `API error ${response.status}: ${errorText}`);
|
|
91
|
+
throw new Error(`Gemini API error ${response.status}: ${errorText}`);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
const data = await response.json();
|
|
95
|
+
|
|
96
|
+
return this.parseAgentStepResponse(data, tools);
|
|
97
|
+
} catch (error: any) {
|
|
98
|
+
logger.error('GeminiProvider', 'Request failed:', error.message);
|
|
99
|
+
throw error;
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// ─── Build agent_step Declaration ──────────────────────────
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* Builds a single `agent_step` function declaration that combines:
|
|
107
|
+
* - Structured reasoning fields (previous_goal_eval, memory, plan)
|
|
108
|
+
* - action_name (enum of all available tool names)
|
|
109
|
+
* - All tool parameter fields as flat top-level properties
|
|
110
|
+
*
|
|
111
|
+
* Flat schema avoids Gemini's "deeply nested schema" rejection in ANY mode.
|
|
112
|
+
*/
|
|
113
|
+
private buildAgentStepDeclaration(tools: ToolDefinition[]): any {
|
|
114
|
+
const toolNames = tools.map(t => t.name);
|
|
115
|
+
|
|
116
|
+
// Collect all unique parameter fields across all tools
|
|
117
|
+
const actionProperties: Record<string, any> = {};
|
|
118
|
+
for (const tool of tools) {
|
|
119
|
+
for (const [paramName, param] of Object.entries(tool.parameters)) {
|
|
120
|
+
// Skip if already added (shared field names like 'text', 'index')
|
|
121
|
+
if (actionProperties[paramName]) continue;
|
|
122
|
+
actionProperties[paramName] = {
|
|
123
|
+
type: this.mapParamType(param.type),
|
|
124
|
+
description: param.description,
|
|
125
|
+
...(param.enum ? { enum: param.enum } : {}),
|
|
126
|
+
};
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// Build tool descriptions for the action_name enum
|
|
131
|
+
const toolDescriptions = tools
|
|
132
|
+
.map(t => {
|
|
133
|
+
const params = Object.keys(t.parameters).join(', ');
|
|
134
|
+
return `- ${t.name}(${params}): ${t.description}`;
|
|
135
|
+
})
|
|
136
|
+
.join('\n');
|
|
137
|
+
|
|
138
|
+
return {
|
|
139
|
+
name: AGENT_STEP_FN,
|
|
140
|
+
description: `Execute one agent step. Choose an action and provide reasoning.\n\nAvailable actions:\n${toolDescriptions}`,
|
|
141
|
+
parameters: {
|
|
142
|
+
type: 'OBJECT',
|
|
143
|
+
properties: {
|
|
144
|
+
// ── Reasoning fields ──
|
|
145
|
+
previous_goal_eval: {
|
|
146
|
+
type: 'STRING',
|
|
147
|
+
description: 'One-sentence assessment of your last action. State success, failure, or uncertain. Skip on first step.',
|
|
148
|
+
},
|
|
149
|
+
memory: {
|
|
150
|
+
type: 'STRING',
|
|
151
|
+
description: 'Key facts to remember for future steps: progress made, items found, counters, field values already collected.',
|
|
152
|
+
},
|
|
153
|
+
plan: {
|
|
154
|
+
type: 'STRING',
|
|
155
|
+
description: 'Your immediate next goal — what action you will take and why.',
|
|
156
|
+
},
|
|
157
|
+
// ── Action selection ──
|
|
158
|
+
action_name: {
|
|
159
|
+
type: 'STRING',
|
|
160
|
+
description: 'Which action to execute.',
|
|
161
|
+
enum: toolNames,
|
|
162
|
+
},
|
|
163
|
+
// ── Action parameters (flat) ──
|
|
164
|
+
...actionProperties,
|
|
165
|
+
},
|
|
166
|
+
required: ['plan', 'action_name'],
|
|
167
|
+
},
|
|
168
|
+
};
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
private mapParamType(type: string): string {
|
|
172
|
+
switch (type) {
|
|
173
|
+
case 'number': return 'NUMBER';
|
|
174
|
+
case 'integer': return 'INTEGER';
|
|
175
|
+
case 'boolean': return 'BOOLEAN';
|
|
176
|
+
case 'string':
|
|
177
|
+
default: return 'STRING';
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
// ─── Build Contents ────────────────────────────────────────
|
|
182
|
+
|
|
183
|
+
/**
|
|
184
|
+
* Builds Gemini conversation contents.
|
|
185
|
+
*
|
|
186
|
+
* Each step is a STATELESS single-turn request (matching page-agent's approach):
|
|
187
|
+
* - System prompt has general instructions
|
|
188
|
+
* - User message contains full context: task, history, screen state
|
|
189
|
+
* - Model responds with agent_step function call
|
|
190
|
+
*
|
|
191
|
+
* History is embedded as text in assembleUserPrompt (via <agent_history>),
|
|
192
|
+
* NOT as functionCall/functionResponse pairs. This avoids Gemini's
|
|
193
|
+
* conversation format requirements and thought_signature complexity.
|
|
194
|
+
*/
|
|
195
|
+
private buildContents(userMessage: string, _history: AgentStep[]): GeminiContent[] {
|
|
196
|
+
return [{
|
|
197
|
+
role: 'user',
|
|
198
|
+
parts: [{ text: userMessage }],
|
|
199
|
+
}];
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
// ─── Parse Response ────────────────────────────────────────
|
|
203
|
+
|
|
204
|
+
/**
|
|
205
|
+
* Parses the Gemini response expecting a single agent_step function call.
|
|
206
|
+
* Extracts structured reasoning + action, and determines which tool to execute.
|
|
207
|
+
*/
|
|
208
|
+
private parseAgentStepResponse(data: any, tools: ToolDefinition[]): ProviderResult {
|
|
209
|
+
if (!data.candidates || data.candidates.length === 0) {
|
|
210
|
+
logger.warn('GeminiProvider', 'No candidates in response');
|
|
211
|
+
return {
|
|
212
|
+
toolCalls: [{ name: 'done', args: { text: 'No response generated.', success: false } }],
|
|
213
|
+
reasoning: { previousGoalEval: '', memory: '', plan: '' },
|
|
214
|
+
text: 'No response generated.',
|
|
215
|
+
};
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
const candidate = data.candidates[0];
|
|
219
|
+
const parts = candidate.content?.parts || [];
|
|
220
|
+
|
|
221
|
+
// Find the function call part
|
|
222
|
+
const fnCallPart = parts.find((p: any) => p.functionCall);
|
|
223
|
+
const textPart = parts.find((p: any) => p.text);
|
|
224
|
+
|
|
225
|
+
if (!fnCallPart?.functionCall) {
|
|
226
|
+
logger.warn('GeminiProvider', 'No function call in response. Text:', textPart?.text);
|
|
227
|
+
return {
|
|
228
|
+
toolCalls: [{ name: 'done', args: { text: textPart?.text || 'No action taken.', success: false } }],
|
|
229
|
+
reasoning: { previousGoalEval: '', memory: '', plan: '' },
|
|
230
|
+
text: textPart?.text,
|
|
231
|
+
};
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
const args = fnCallPart.functionCall.args || {};
|
|
235
|
+
|
|
236
|
+
// Extract reasoning fields
|
|
237
|
+
const reasoning: AgentReasoning = {
|
|
238
|
+
previousGoalEval: args.previous_goal_eval || '',
|
|
239
|
+
memory: args.memory || '',
|
|
240
|
+
plan: args.plan || '',
|
|
241
|
+
};
|
|
242
|
+
|
|
243
|
+
// Extract action
|
|
244
|
+
const actionName = args.action_name;
|
|
245
|
+
if (!actionName) {
|
|
246
|
+
logger.warn('GeminiProvider', 'No action_name in agent_step. Falling back to done.');
|
|
247
|
+
return {
|
|
248
|
+
toolCalls: [{ name: 'done', args: { text: 'Agent did not choose an action.', success: false } }],
|
|
249
|
+
reasoning,
|
|
250
|
+
text: textPart?.text,
|
|
251
|
+
};
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
// Build action args: everything except reasoning fields and action_name
|
|
255
|
+
const actionArgs: Record<string, any> = {};
|
|
256
|
+
const reservedKeys = new Set([...REASONING_FIELDS, 'action_name']);
|
|
257
|
+
|
|
258
|
+
// Find the matching tool to know which params belong to it
|
|
259
|
+
const matchedTool = tools.find(t => t.name === actionName);
|
|
260
|
+
if (matchedTool) {
|
|
261
|
+
for (const paramName of Object.keys(matchedTool.parameters)) {
|
|
262
|
+
if (args[paramName] !== undefined) {
|
|
263
|
+
actionArgs[paramName] = args[paramName];
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
} else {
|
|
267
|
+
// Custom/registered tool — grab all non-reserved fields
|
|
268
|
+
for (const [key, value] of Object.entries(args)) {
|
|
269
|
+
if (!reservedKeys.has(key)) {
|
|
270
|
+
actionArgs[key] = value;
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
logger.info('GeminiProvider', `Parsed: action=${actionName}, plan="${reasoning.plan}"`);
|
|
276
|
+
|
|
277
|
+
return {
|
|
278
|
+
toolCalls: [{ name: actionName, args: actionArgs }],
|
|
279
|
+
reasoning,
|
|
280
|
+
text: textPart?.text,
|
|
281
|
+
};
|
|
282
|
+
}
|
|
283
|
+
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Logger utility — prefixed console output for easy filtering.
|
|
3
|
+
*/
|
|
4
|
+
const TAG = '[AIAgent]';
|
|
5
|
+
|
|
6
|
+
export const logger = {
|
|
7
|
+
info: (context: string, ...args: any[]) =>
|
|
8
|
+
console.log(`${TAG} [${context}]`, ...args),
|
|
9
|
+
|
|
10
|
+
warn: (context: string, ...args: any[]) =>
|
|
11
|
+
console.warn(`${TAG} [${context}]`, ...args),
|
|
12
|
+
|
|
13
|
+
error: (context: string, ...args: any[]) =>
|
|
14
|
+
console.error(`${TAG} [${context}]`, ...args),
|
|
15
|
+
|
|
16
|
+
debug: (context: string, ...args: any[]) => {
|
|
17
|
+
if (__DEV__) {
|
|
18
|
+
console.log(`${TAG} [${context}] 🐛`, ...args);
|
|
19
|
+
}
|
|
20
|
+
},
|
|
21
|
+
};
|