@mobileai/react-native 0.4.1 → 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +25 -34
- package/lib/module/components/AIAgent.js +216 -5
- package/lib/module/components/AIAgent.js.map +1 -1
- package/lib/module/components/AgentChatBar.js +358 -36
- package/lib/module/components/AgentChatBar.js.map +1 -1
- package/lib/module/core/AgentRuntime.js +122 -6
- package/lib/module/core/AgentRuntime.js.map +1 -1
- package/lib/module/core/systemPrompt.js +57 -0
- package/lib/module/core/systemPrompt.js.map +1 -1
- package/lib/module/index.js +8 -0
- package/lib/module/index.js.map +1 -1
- package/lib/module/providers/GeminiProvider.js +108 -85
- package/lib/module/providers/GeminiProvider.js.map +1 -1
- package/lib/module/services/AudioInputService.js +128 -0
- package/lib/module/services/AudioInputService.js.map +1 -0
- package/lib/module/services/AudioOutputService.js +154 -0
- package/lib/module/services/AudioOutputService.js.map +1 -0
- package/lib/module/services/VoiceService.js +362 -0
- package/lib/module/services/VoiceService.js.map +1 -0
- package/lib/module/utils/audioUtils.js +49 -0
- package/lib/module/utils/audioUtils.js.map +1 -0
- package/lib/module/utils/logger.js +21 -4
- package/lib/module/utils/logger.js.map +1 -1
- package/lib/typescript/babel.config.d.ts +10 -0
- package/lib/typescript/babel.config.d.ts.map +1 -0
- package/lib/typescript/eslint.config.d.mts +3 -0
- package/lib/typescript/eslint.config.d.mts.map +1 -0
- package/lib/typescript/fetch-models.d.mts +2 -0
- package/lib/typescript/fetch-models.d.mts.map +1 -0
- package/lib/typescript/list-all-models.d.mts +2 -0
- package/lib/typescript/list-all-models.d.mts.map +1 -0
- package/lib/typescript/list-models.d.mts +2 -0
- package/lib/typescript/list-models.d.mts.map +1 -0
- package/lib/typescript/src/components/AIAgent.d.ts +8 -2
- package/lib/typescript/src/components/AIAgent.d.ts.map +1 -1
- package/lib/typescript/src/components/AgentChatBar.d.ts +19 -2
- package/lib/typescript/src/components/AgentChatBar.d.ts.map +1 -1
- package/lib/typescript/src/core/AgentRuntime.d.ts +17 -1
- package/lib/typescript/src/core/AgentRuntime.d.ts.map +1 -1
- package/lib/typescript/src/core/systemPrompt.d.ts +8 -0
- package/lib/typescript/src/core/systemPrompt.d.ts.map +1 -1
- package/lib/typescript/src/core/types.d.ts +24 -1
- package/lib/typescript/src/core/types.d.ts.map +1 -1
- package/lib/typescript/src/index.d.ts +6 -1
- package/lib/typescript/src/index.d.ts.map +1 -1
- package/lib/typescript/src/providers/GeminiProvider.d.ts +22 -18
- package/lib/typescript/src/providers/GeminiProvider.d.ts.map +1 -1
- package/lib/typescript/src/services/AudioInputService.d.ts +31 -0
- package/lib/typescript/src/services/AudioInputService.d.ts.map +1 -0
- package/lib/typescript/src/services/AudioOutputService.d.ts +34 -0
- package/lib/typescript/src/services/AudioOutputService.d.ts.map +1 -0
- package/lib/typescript/src/services/VoiceService.d.ts +73 -0
- package/lib/typescript/src/services/VoiceService.d.ts.map +1 -0
- package/lib/typescript/src/utils/audioUtils.d.ts +17 -0
- package/lib/typescript/src/utils/audioUtils.d.ts.map +1 -0
- package/lib/typescript/src/utils/logger.d.ts +4 -0
- package/lib/typescript/src/utils/logger.d.ts.map +1 -1
- package/package.json +24 -8
- package/src/components/AIAgent.tsx +222 -3
- package/src/components/AgentChatBar.tsx +487 -42
- package/src/core/AgentRuntime.ts +131 -2
- package/src/core/systemPrompt.ts +62 -0
- package/src/core/types.ts +30 -0
- package/src/index.ts +16 -0
- package/src/providers/GeminiProvider.ts +105 -89
- package/src/services/AudioInputService.ts +141 -0
- package/src/services/AudioOutputService.ts +167 -0
- package/src/services/VoiceService.ts +409 -0
- package/src/utils/audioUtils.ts +54 -0
- package/src/utils/logger.ts +24 -7
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
* 4. Parse tool call → execute (tap, type, navigate, done)
|
|
9
9
|
* 5. If not done, repeat from step 1 (re-dehydrate after UI change)
|
|
10
10
|
*/
|
|
11
|
-
import type { AIProvider, AgentConfig, ExecutionResult, ActionDefinition } from './types';
|
|
11
|
+
import type { AIProvider, AgentConfig, ExecutionResult, ToolDefinition, ActionDefinition } from './types';
|
|
12
12
|
export declare class AgentRuntime {
|
|
13
13
|
private provider;
|
|
14
14
|
private config;
|
|
@@ -37,7 +37,23 @@ export declare class AgentRuntime {
|
|
|
37
37
|
private getDeepestScreenName;
|
|
38
38
|
/** Maps a tool call to a user-friendly status label for the loading overlay. */
|
|
39
39
|
private getToolStatusLabel;
|
|
40
|
+
/**
|
|
41
|
+
* Captures the current screen as a base64 JPEG for Gemini vision.
|
|
42
|
+
* Uses react-native-view-shot as an optional peer dependency.
|
|
43
|
+
* Returns null if the library is not installed (graceful fallback).
|
|
44
|
+
*/
|
|
45
|
+
private captureScreenshot;
|
|
46
|
+
/**
|
|
47
|
+
* Get current screen context as formatted text.
|
|
48
|
+
* Used by voice mode: sent once at connect + after each tool call.
|
|
49
|
+
* Follows page-agent pattern: tree in user prompt, not system instructions.
|
|
50
|
+
*/
|
|
51
|
+
getScreenContext(): string;
|
|
40
52
|
private buildToolsForProvider;
|
|
53
|
+
/** Public accessor for voice mode — returns all registered tool definitions. */
|
|
54
|
+
getTools(): ToolDefinition[];
|
|
55
|
+
/** Execute a tool by name (for voice mode tool calls from WebSocket). */
|
|
56
|
+
executeTool(name: string, args: Record<string, any>): Promise<string>;
|
|
41
57
|
private getWalkConfig;
|
|
42
58
|
private getInstructions;
|
|
43
59
|
private observations;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"AgentRuntime.d.ts","sourceRoot":"","sources":["../../../../src/core/AgentRuntime.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAOH,OAAO,KAAK,EACV,UAAU,EACV,WAAW,EAEX,eAAe,
|
|
1
|
+
{"version":3,"file":"AgentRuntime.d.ts","sourceRoot":"","sources":["../../../../src/core/AgentRuntime.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAOH,OAAO,KAAK,EACV,UAAU,EACV,WAAW,EAEX,eAAe,EACf,cAAc,EACd,gBAAgB,EAEjB,MAAM,SAAS,CAAC;AAMjB,qBAAa,YAAY;IACvB,OAAO,CAAC,QAAQ,CAAa;IAC7B,OAAO,CAAC,MAAM,CAAc;IAC5B,OAAO,CAAC,OAAO,CAAM;IACrB,OAAO,CAAC,MAAM,CAAM;IACpB,OAAO,CAAC,KAAK,CAA0C;IACvD,OAAO,CAAC,OAAO,CAA4C;IAC3D,OAAO,CAAC,OAAO,CAAmB;IAClC,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,mBAAmB,CAAuB;gBAGhD,QAAQ,EAAE,UAAU,EACpB,MAAM,EAAE,WAAW,EACnB,OAAO,EAAE,GAAG,EACZ,MAAM,EAAE,GAAG;IAyBb,OAAO,CAAC,oBAAoB;IAqL5B,cAAc,CAAC,MAAM,EAAE,gBAAgB,GAAG,IAAI;IAK9C,gBAAgB,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI;IAMpC;;;OAGG;IACH,OAAO,CAAC,aAAa;IAWrB,OAAO,CAAC,iBAAiB;IAczB;;;OAGG;IACH,OAAO,CAAC,oBAAoB;IAiB5B,OAAO,CAAC,oBAAoB;IAW5B,gFAAgF;IAChF,OAAO,CAAC,kBAAkB;IAmB1B;;;;OAIG;YACW,iBAAiB;IA4B/B;;;;OAIG;IACI,gBAAgB,IAAI,MAAM;IAyBjC,OAAO,CAAC,qBAAqB;IA4B7B,gFAAgF;IACzE,QAAQ,IAAI,cAAc,EAAE;IAInC,yEAAyE;IAC5D,WAAW,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC;IAkBlF,OAAO,CAAC,aAAa;IASrB,OAAO,CAAC,eAAe;IAyBvB,OAAO,CAAC,YAAY,CAAgB;IACpC,OAAO,CAAC,cAAc,CAAc;IAEpC,OAAO,CAAC,kBAAkB;IAsB1B,OAAO,CAAC,kBAAkB;IAuDpB,OAAO,CAAC,WAAW,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC;IA+M5D,qDAAqD;IACrD,UAAU,CAAC,OAAO,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,GAAG,IAAI;IAK3C,4CAA4C;IAC5C,YAAY,IAAI,OAAO;CAGxB"}
|
|
@@ -6,4 +6,12 @@
|
|
|
6
6
|
* to give the LLM clear, structured instructions.
|
|
7
7
|
*/
|
|
8
8
|
export declare function buildSystemPrompt(language: string): string;
|
|
9
|
+
/**
|
|
10
|
+
* Voice-optimized system prompt for the Gemini Live API.
|
|
11
|
+
*
|
|
12
|
+
* Includes the same screen format and tool semantics as text mode,
|
|
13
|
+
* but condensed for voice context and with guardrails against
|
|
14
|
+
* unprompted actions.
|
|
15
|
+
*/
|
|
16
|
+
export declare function buildVoiceSystemPrompt(language: string, userInstructions?: string): string;
|
|
9
17
|
//# sourceMappingURL=systemPrompt.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"systemPrompt.d.ts","sourceRoot":"","sources":["../../../../src/core/systemPrompt.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,wBAAgB,iBAAiB,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,CAyJ1D"}
|
|
1
|
+
{"version":3,"file":"systemPrompt.d.ts","sourceRoot":"","sources":["../../../../src/core/systemPrompt.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,wBAAgB,iBAAiB,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,CAyJ1D;AAED;;;;;;GAMG;AACH,wBAAgB,sBAAsB,CACpC,QAAQ,EAAE,MAAM,EAChB,gBAAgB,CAAC,EAAE,MAAM,GACxB,MAAM,CAkDR"}
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Core types for the page-agent-style React Native AI SDK.
|
|
3
3
|
*/
|
|
4
|
+
export type AgentMode = 'text' | 'voice';
|
|
4
5
|
export type ElementType = 'pressable' | 'text-input' | 'switch' | 'scrollable';
|
|
5
6
|
export interface InteractiveElement {
|
|
6
7
|
/** Unique index assigned during tree walk */
|
|
@@ -98,6 +99,11 @@ export interface AgentConfig {
|
|
|
98
99
|
* Use this to show dynamic loading text (e.g., "Tapping 'Add'...").
|
|
99
100
|
*/
|
|
100
101
|
onStatusUpdate?: (status: string) => void;
|
|
102
|
+
/**
|
|
103
|
+
* Called after each step with token usage data.
|
|
104
|
+
* Use to track cost, enforce budgets, or display usage to the user.
|
|
105
|
+
*/
|
|
106
|
+
onTokenUsage?: (usage: TokenUsage) => void;
|
|
101
107
|
/**
|
|
102
108
|
* Callback for when agent needs user input (ask_user tool).
|
|
103
109
|
* Mirrors page-agent: the agent loop blocks until the user responds.
|
|
@@ -130,6 +136,8 @@ export interface ExecutionResult {
|
|
|
130
136
|
success: boolean;
|
|
131
137
|
message: string;
|
|
132
138
|
steps: AgentStep[];
|
|
139
|
+
/** Accumulated token usage for the entire task */
|
|
140
|
+
tokenUsage?: TokenUsage;
|
|
133
141
|
}
|
|
134
142
|
export interface ToolDefinition {
|
|
135
143
|
name: string;
|
|
@@ -158,6 +166,17 @@ export interface AgentReasoning {
|
|
|
158
166
|
/** The immediate next goal and why. */
|
|
159
167
|
plan: string;
|
|
160
168
|
}
|
|
169
|
+
/** Token usage metrics for cost tracking. */
|
|
170
|
+
export interface TokenUsage {
|
|
171
|
+
/** Tokens in the input prompt */
|
|
172
|
+
promptTokens: number;
|
|
173
|
+
/** Tokens generated by the model */
|
|
174
|
+
completionTokens: number;
|
|
175
|
+
/** Total tokens (prompt + completion) */
|
|
176
|
+
totalTokens: number;
|
|
177
|
+
/** Estimated cost in USD (based on model pricing) */
|
|
178
|
+
estimatedCostUSD: number;
|
|
179
|
+
}
|
|
161
180
|
/** Result from the AI provider's generateContent call. */
|
|
162
181
|
export interface ProviderResult {
|
|
163
182
|
/** Extracted action tool call (action_name + params). */
|
|
@@ -169,8 +188,12 @@ export interface ProviderResult {
|
|
|
169
188
|
reasoning: AgentReasoning;
|
|
170
189
|
/** Raw text response (if any). */
|
|
171
190
|
text?: string;
|
|
191
|
+
/** Token usage for this specific call */
|
|
192
|
+
tokenUsage?: TokenUsage;
|
|
172
193
|
}
|
|
173
194
|
export interface AIProvider {
|
|
174
|
-
generateContent(systemPrompt: string, userMessage: string, tools: ToolDefinition[], history: AgentStep[]
|
|
195
|
+
generateContent(systemPrompt: string, userMessage: string, tools: ToolDefinition[], history: AgentStep[],
|
|
196
|
+
/** Optional base64-encoded JPEG screenshot for vision */
|
|
197
|
+
screenshot?: string): Promise<ProviderResult>;
|
|
175
198
|
}
|
|
176
199
|
//# sourceMappingURL=types.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../../src/core/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH,MAAM,MAAM,WAAW,GAAG,WAAW,GAAG,YAAY,GAAG,QAAQ,GAAG,YAAY,CAAC;AAE/E,MAAM,WAAW,kBAAkB;IACjC,6CAA6C;IAC7C,KAAK,EAAE,MAAM,CAAC;IACd,mBAAmB;IACnB,IAAI,EAAE,WAAW,CAAC;IAClB,gFAAgF;IAChF,KAAK,EAAE,MAAM,CAAC;IACd,gDAAgD;IAChD,SAAS,EAAE,GAAG,CAAC;IACf,yBAAyB;IACzB,KAAK,EAAE;QACL,OAAO,CAAC,EAAE,CAAC,GAAG,IAAI,EAAE,GAAG,EAAE,KAAK,IAAI,CAAC;QACnC,YAAY,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,IAAI,CAAC;QACtC,aAAa,CAAC,EAAE,CAAC,KAAK,EAAE,OAAO,KAAK,IAAI,CAAC;QACzC,KAAK,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC;QACzB,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,OAAO,CAAC,EAAE,OAAO,CAAC;QAClB,QAAQ,CAAC,EAAE,OAAO,CAAC;QACnB,kBAAkB,CAAC,EAAE,MAAM,CAAC;QAC5B,iBAAiB,CAAC,EAAE,MAAM,CAAC;KAC5B,CAAC;CACH;AAID,MAAM,WAAW,gBAAgB;IAC/B,kDAAkD;IAClD,UAAU,EAAE,MAAM,CAAC;IACnB,mDAAmD;IACnD,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,2CAA2C;IAC3C,YAAY,EAAE,MAAM,CAAC;IACrB,yBAAyB;IACzB,QAAQ,EAAE,kBAAkB,EAAE,CAAC;CAChC;AAID,MAAM,WAAW,SAAS;IACxB,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,cAAc,CAAC;IAC3B,MAAM,EAAE;QACN,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;QAC3B,MAAM,EAAE,MAAM,CAAC;KAChB,CAAC;CACH;AAED,MAAM,WAAW,WAAW;IAC1B,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,IAAI,GAAG,IAAI,CAAC;IAEvB,sDAAsD;IACtD,QAAQ,CAAC,EAAE,MAAM,CAAC;IAIlB;;;;OAIG;IACH,oBAAoB,CAAC,EAAE,KAAK,CAAC,SAAS,CAAC,GAAG,CAAC,EAAE,CAAC;IAE9C;;;OAGG;IACH,oBAAoB,CAAC,EAAE,KAAK,CAAC,SAAS,CAAC,GAAG,CAAC,EAAE,CAAC;IAI9C,qCAAqC;IACrC,YAAY,CAAC,EAAE,CAAC,SAAS,EAAE,MAAM,KAAK,OAAO,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC;IAE3D,sDAAsD;IACtD,WAAW,CAAC,EAAE,CAAC,OAAO,EAAE,SAAS,EAAE,KAAK,OAAO,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC;IAE7D,2CAA2C;IAC3C,YAAY,CAAC,EAAE,MAAM,OAAO,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC;IAE1C,wDAAwD;IACxD,WAAW,CAAC,EAAE,CAAC,MAAM,EAAE,eAAe,KAAK,OAAO,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC;IAIhE;;;;OAIG;IACH,sBAAsB,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM,CAAC;IAIvE;;;;OAIG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,cAAc,GAAG,IAAI,CAAC,CAAC;IAIpD,kDAAkD;IAClD,YAAY,CAAC,EAAE;QACb,8DAA8D;QAC9D,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB;;;;WAIG;QACH,qBAAqB,CAAC,EAAE,CAAC,UAAU,EAAE,MAAM,KAAK,MAAM,GAAG,SAAS,GAAG,IAAI,CAAC;KAC3E,CAAC;IAEF,6DAA6D;IAC7D,SAAS,CAAC,EAAE,MAAM,CAAC;IAInB;;;OAGG;IACH,cAAc,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,IAAI,CAAC;IAE1C;;;;;OAKG;IACH,SAAS,CAAC,EAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC;IAIlD;;;OAGG;IACH,MAAM,CAAC,EAAE;QACP,IAAI,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,IAAI,CAAC;QAC7B,OAAO,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,IAAI,CAAC;QAChC,IAAI,EAAE,MAAM,IAAI,CAAC;KAClB,CAAC;IAEF;;;OAGG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC;IAIlB;;;;OAIG;IACH,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,WAAW,eAAe;IAC9B,OAAO,EAAE,OAAO,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,SAAS,EAAE,CAAC;
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../../src/core/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH,MAAM,MAAM,SAAS,GAAG,MAAM,GAAG,OAAO,CAAC;AAIzC,MAAM,MAAM,WAAW,GAAG,WAAW,GAAG,YAAY,GAAG,QAAQ,GAAG,YAAY,CAAC;AAE/E,MAAM,WAAW,kBAAkB;IACjC,6CAA6C;IAC7C,KAAK,EAAE,MAAM,CAAC;IACd,mBAAmB;IACnB,IAAI,EAAE,WAAW,CAAC;IAClB,gFAAgF;IAChF,KAAK,EAAE,MAAM,CAAC;IACd,gDAAgD;IAChD,SAAS,EAAE,GAAG,CAAC;IACf,yBAAyB;IACzB,KAAK,EAAE;QACL,OAAO,CAAC,EAAE,CAAC,GAAG,IAAI,EAAE,GAAG,EAAE,KAAK,IAAI,CAAC;QACnC,YAAY,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,IAAI,CAAC;QACtC,aAAa,CAAC,EAAE,CAAC,KAAK,EAAE,OAAO,KAAK,IAAI,CAAC;QACzC,KAAK,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC;QACzB,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,OAAO,CAAC,EAAE,OAAO,CAAC;QAClB,QAAQ,CAAC,EAAE,OAAO,CAAC;QACnB,kBAAkB,CAAC,EAAE,MAAM,CAAC;QAC5B,iBAAiB,CAAC,EAAE,MAAM,CAAC;KAC5B,CAAC;CACH;AAID,MAAM,WAAW,gBAAgB;IAC/B,kDAAkD;IAClD,UAAU,EAAE,MAAM,CAAC;IACnB,mDAAmD;IACnD,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,2CAA2C;IAC3C,YAAY,EAAE,MAAM,CAAC;IACrB,yBAAyB;IACzB,QAAQ,EAAE,kBAAkB,EAAE,CAAC;CAChC;AAID,MAAM,WAAW,SAAS;IACxB,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,cAAc,CAAC;IAC3B,MAAM,EAAE;QACN,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;QAC3B,MAAM,EAAE,MAAM,CAAC;KAChB,CAAC;CACH;AAED,MAAM,WAAW,WAAW;IAC1B,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,IAAI,GAAG,IAAI,CAAC;IAEvB,sDAAsD;IACtD,QAAQ,CAAC,EAAE,MAAM,CAAC;IAIlB;;;;OAIG;IACH,oBAAoB,CAAC,EAAE,KAAK,CAAC,SAAS,CAAC,GAAG,CAAC,EAAE,CAAC;IAE9C;;;OAGG;IACH,oBAAoB,CAAC,EAAE,KAAK,CAAC,SAAS,CAAC,GAAG,CAAC,EAAE,CAAC;IAI9C,qCAAqC;IACrC,YAAY,CAAC,EAAE,CAAC,SAAS,EAAE,MAAM,KAAK,OAAO,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC;IAE3D,sDAAsD;IACtD,WAAW,CAAC,EAAE,CAAC,OAAO,EAAE,SAAS,EAAE,KAAK,OAAO,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC;IAE7D,2CAA2C;IAC3C,YAAY,CAAC,EAAE,MAAM,OAAO,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC;IAE1C,wDAAwD;IACxD,WAAW,CAAC,EAAE,CAAC,MAAM,EAAE,eAAe,KAAK,OAAO,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC;IAIhE;;;;OAIG;IACH,sBAAsB,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM,CAAC;IAIvE;;;;OAIG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,cAAc,GAAG,IAAI,CAAC,CAAC;IAIpD,kDAAkD;IAClD,YAAY,CAAC,EAAE;QACb,8DAA8D;QAC9D,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB;;;;WAIG;QACH,qBAAqB,CAAC,EAAE,CAAC,UAAU,EAAE,MAAM,KAAK,MAAM,GAAG,SAAS,GAAG,IAAI,CAAC;KAC3E,CAAC;IAEF,6DAA6D;IAC7D,SAAS,CAAC,EAAE,MAAM,CAAC;IAInB;;;OAGG;IACH,cAAc,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,IAAI,CAAC;IAE1C;;;OAGG;IACH,YAAY,CAAC,EAAE,CAAC,KAAK,EAAE,UAAU,KAAK,IAAI,CAAC;IAE3C;;;;;OAKG;IACH,SAAS,CAAC,EAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC;IAIlD;;;OAGG;IACH,MAAM,CAAC,EAAE;QACP,IAAI,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,IAAI,CAAC;QAC7B,OAAO,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,IAAI,CAAC;QAChC,IAAI,EAAE,MAAM,IAAI,CAAC;KAClB,CAAC;IAEF;;;OAGG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC;IAIlB;;;;OAIG;IACH,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,WAAW,eAAe;IAC9B,OAAO,EAAE,OAAO,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,SAAS,EAAE,CAAC;IACnB,kDAAkD;IAClD,UAAU,CAAC,EAAE,UAAU,CAAC;CACzB;AAID,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;IACtC,OAAO,EAAE,CAAC,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC;CACzD;AAED,MAAM,WAAW,SAAS;IACxB,IAAI,EAAE,QAAQ,GAAG,QAAQ,GAAG,SAAS,CAAC;IACtC,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;CACjB;AAID,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACnC,OAAO,EAAE,CAAC,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,KAAK,GAAG,CAAC;CAC7C;AAID,sEAAsE;AACtE,MAAM,WAAW,cAAc;IAC7B,qEAAqE;IACrE,gBAAgB,EAAE,MAAM,CAAC;IACzB,sEAAsE;IACtE,MAAM,EAAE,MAAM,CAAC;IACf,uCAAuC;IACvC,IAAI,EAAE,MAAM,CAAC;CACd;AAID,6CAA6C;AAC7C,MAAM,WAAW,UAAU;IACzB,iCAAiC;IACjC,YAAY,EAAE,MAAM,CAAC;IACrB,oCAAoC;IACpC,gBAAgB,EAAE,MAAM,CAAC;IACzB,yCAAyC;IACzC,WAAW,EAAE,MAAM,CAAC;IACpB,qDAAqD;IACrD,gBAAgB,EAAE,MAAM,CAAC;CAC1B;AAED,0DAA0D;AAC1D,MAAM,WAAW,cAAc;IAC7B,yDAAyD;IACzD,SAAS,EAAE,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAA;KAAE,CAAC,CAAC;IAC9D,2EAA2E;IAC3E,SAAS,EAAE,cAAc,CAAC;IAC1B,kCAAkC;IAClC,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,yCAAyC;IACzC,UAAU,CAAC,EAAE,UAAU,CAAC;CACzB;AAED,MAAM,WAAW,UAAU;IACzB,eAAe,CACb,YAAY,EAAE,MAAM,EACpB,WAAW,EAAE,MAAM,EACnB,KAAK,EAAE,cAAc,EAAE,EACvB,OAAO,EAAE,SAAS,EAAE;IACpB,yDAAyD;IACzD,UAAU,CAAC,EAAE,MAAM,GAClB,OAAO,CAAC,cAAc,CAAC,CAAC;CAC5B"}
|
|
@@ -6,5 +6,10 @@
|
|
|
6
6
|
*/
|
|
7
7
|
export { AIAgent } from './components/AIAgent';
|
|
8
8
|
export { useAction } from './hooks/useAction';
|
|
9
|
-
export
|
|
9
|
+
export { VoiceService } from './services/VoiceService';
|
|
10
|
+
export { AudioInputService } from './services/AudioInputService';
|
|
11
|
+
export { AudioOutputService } from './services/AudioOutputService';
|
|
12
|
+
export { logger } from './utils/logger';
|
|
13
|
+
export type { AgentConfig, AgentMode, ExecutionResult, InteractiveElement, DehydratedScreen, ToolDefinition, ActionDefinition, TokenUsage, } from './core/types';
|
|
14
|
+
export type { VoiceServiceConfig, VoiceServiceCallbacks, VoiceStatus, } from './services/VoiceService';
|
|
10
15
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/index.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAGH,OAAO,EAAE,OAAO,EAAE,MAAM,sBAAsB,CAAC;AAG/C,OAAO,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC;AAG9C,YAAY,EACV,WAAW,EACX,eAAe,EACf,kBAAkB,EAClB,gBAAgB,EAChB,cAAc,EACd,gBAAgB,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/index.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAGH,OAAO,EAAE,OAAO,EAAE,MAAM,sBAAsB,CAAC;AAG/C,OAAO,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC;AAG9C,OAAO,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAC;AACvD,OAAO,EAAE,iBAAiB,EAAE,MAAM,8BAA8B,CAAC;AACjE,OAAO,EAAE,kBAAkB,EAAE,MAAM,+BAA+B,CAAC;AAGnE,OAAO,EAAE,MAAM,EAAE,MAAM,gBAAgB,CAAC;AAGxC,YAAY,EACV,WAAW,EACX,SAAS,EACT,eAAe,EACf,kBAAkB,EAClB,gBAAgB,EAChB,cAAc,EACd,gBAAgB,EAChB,UAAU,GACX,MAAM,cAAc,CAAC;AAEtB,YAAY,EACV,kBAAkB,EAClB,qBAAqB,EACrB,WAAW,GACZ,MAAM,yBAAyB,CAAC"}
|
|
@@ -1,16 +1,20 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* GeminiProvider — Gemini API integration
|
|
2
|
+
* GeminiProvider — Gemini API integration via @google/genai SDK.
|
|
3
3
|
*
|
|
4
|
-
* Uses
|
|
5
|
-
*
|
|
6
|
-
*
|
|
4
|
+
* Uses the official Google GenAI SDK for:
|
|
5
|
+
* - generateContent with structured function calling (agent_step)
|
|
6
|
+
* - inlineData for vision (base64 screenshots)
|
|
7
|
+
* - System instructions
|
|
8
|
+
*
|
|
9
|
+
* Implements the AIProvider interface so it can be swapped
|
|
10
|
+
* with OpenAIProvider, AnthropicProvider, etc.
|
|
7
11
|
*/
|
|
8
12
|
import type { AIProvider, ToolDefinition, AgentStep, ProviderResult } from '../core/types';
|
|
9
13
|
export declare class GeminiProvider implements AIProvider {
|
|
10
|
-
private
|
|
14
|
+
private ai;
|
|
11
15
|
private model;
|
|
12
16
|
constructor(apiKey: string, model?: string);
|
|
13
|
-
generateContent(systemPrompt: string, userMessage: string, tools: ToolDefinition[], history: AgentStep[]): Promise<ProviderResult>;
|
|
17
|
+
generateContent(systemPrompt: string, userMessage: string, tools: ToolDefinition[], history: AgentStep[], screenshot?: string): Promise<ProviderResult>;
|
|
14
18
|
/**
|
|
15
19
|
* Builds a single `agent_step` function declaration that combines:
|
|
16
20
|
* - Structured reasoning fields (previous_goal_eval, memory, plan)
|
|
@@ -22,22 +26,22 @@ export declare class GeminiProvider implements AIProvider {
|
|
|
22
26
|
private buildAgentStepDeclaration;
|
|
23
27
|
private mapParamType;
|
|
24
28
|
/**
|
|
25
|
-
* Builds
|
|
26
|
-
*
|
|
27
|
-
* Each step is a STATELESS single-turn request (matching page-agent's approach):
|
|
28
|
-
* - System prompt has general instructions
|
|
29
|
-
* - User message contains full context: task, history, screen state
|
|
30
|
-
* - Model responds with agent_step function call
|
|
31
|
-
*
|
|
32
|
-
* History is embedded as text in assembleUserPrompt (via <agent_history>),
|
|
33
|
-
* NOT as functionCall/functionResponse pairs. This avoids Gemini's
|
|
34
|
-
* conversation format requirements and thought_signature complexity.
|
|
29
|
+
* Builds contents for the generateContent call.
|
|
30
|
+
* Single-turn: user message + optional screenshot as inlineData.
|
|
35
31
|
*/
|
|
36
32
|
private buildContents;
|
|
37
33
|
/**
|
|
38
|
-
* Parses the
|
|
39
|
-
* Extracts structured reasoning + action
|
|
34
|
+
* Parses the SDK response expecting a single agent_step function call.
|
|
35
|
+
* Extracts structured reasoning + action.
|
|
40
36
|
*/
|
|
41
37
|
private parseAgentStepResponse;
|
|
38
|
+
/**
|
|
39
|
+
* Extracts token usage from SDK response and calculates estimated cost.
|
|
40
|
+
*
|
|
41
|
+
* Pricing (Gemini 2.5 Flash):
|
|
42
|
+
* - Input: $0.30 / 1M tokens
|
|
43
|
+
* - Output: $2.50 / 1M tokens
|
|
44
|
+
*/
|
|
45
|
+
private extractTokenUsage;
|
|
42
46
|
}
|
|
43
47
|
//# sourceMappingURL=GeminiProvider.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"GeminiProvider.d.ts","sourceRoot":"","sources":["../../../../src/providers/GeminiProvider.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"GeminiProvider.d.ts","sourceRoot":"","sources":["../../../../src/providers/GeminiProvider.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAIH,OAAO,KAAK,EAAE,UAAU,EAAE,cAAc,EAAE,SAAS,EAAE,cAAc,EAA8B,MAAM,eAAe,CAAC;AAWvH,qBAAa,cAAe,YAAW,UAAU;IAC/C,OAAO,CAAC,EAAE,CAAc;IACxB,OAAO,CAAC,KAAK,CAAS;gBAEV,MAAM,EAAE,MAAM,EAAE,KAAK,GAAE,MAA2B;IAKxD,eAAe,CACnB,YAAY,EAAE,MAAM,EACpB,WAAW,EAAE,MAAM,EACnB,KAAK,EAAE,cAAc,EAAE,EACvB,OAAO,EAAE,SAAS,EAAE,EACpB,UAAU,CAAC,EAAE,MAAM,GAClB,OAAO,CAAC,cAAc,CAAC;IAuD1B;;;;;;;OAOG;IACH,OAAO,CAAC,yBAAyB;IAsDjC,OAAO,CAAC,YAAY;IAYpB;;;OAGG;IACH,OAAO,CAAC,aAAa;IAkBrB;;;OAGG;IACH,OAAO,CAAC,sBAAsB;IA8E9B;;;;;;OAMG;IACH,OAAO,CAAC,iBAAiB;CAkB1B"}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AudioInputService — Real-time microphone capture for voice mode.
|
|
3
|
+
*
|
|
4
|
+
* Uses react-native-audio-api (Software Mansion) AudioRecorder for native
|
|
5
|
+
* PCM streaming from the microphone. Each chunk is converted from Float32
|
|
6
|
+
* to Int16 PCM and base64-encoded for the Gemini Live API.
|
|
7
|
+
*
|
|
8
|
+
* Requires: react-native-audio-api (development build only, not Expo Go)
|
|
9
|
+
*/
|
|
10
|
+
export interface AudioInputConfig {
|
|
11
|
+
sampleRate?: number;
|
|
12
|
+
/** Number of samples per callback buffer (default: 4096) */
|
|
13
|
+
bufferLength?: number;
|
|
14
|
+
/** Callback with base64 PCM audio chunk */
|
|
15
|
+
onAudioChunk: (base64Audio: string) => void;
|
|
16
|
+
onError?: (error: string) => void;
|
|
17
|
+
onPermissionDenied?: () => void;
|
|
18
|
+
}
|
|
19
|
+
type RecordingStatus = 'idle' | 'recording' | 'paused';
|
|
20
|
+
export declare class AudioInputService {
|
|
21
|
+
private config;
|
|
22
|
+
private status;
|
|
23
|
+
private recorder;
|
|
24
|
+
constructor(config: AudioInputConfig);
|
|
25
|
+
start(): Promise<boolean>;
|
|
26
|
+
stop(): Promise<void>;
|
|
27
|
+
get isRecording(): boolean;
|
|
28
|
+
get currentStatus(): RecordingStatus;
|
|
29
|
+
}
|
|
30
|
+
export {};
|
|
31
|
+
//# sourceMappingURL=AudioInputService.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"AudioInputService.d.ts","sourceRoot":"","sources":["../../../../src/services/AudioInputService.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAOH,MAAM,WAAW,gBAAgB;IAC/B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,4DAA4D;IAC5D,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,2CAA2C;IAC3C,YAAY,EAAE,CAAC,WAAW,EAAE,MAAM,KAAK,IAAI,CAAC;IAC5C,OAAO,CAAC,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,IAAI,CAAC;IAClC,kBAAkB,CAAC,EAAE,MAAM,IAAI,CAAC;CACjC;AAED,KAAK,eAAe,GAAG,MAAM,GAAG,WAAW,GAAG,QAAQ,CAAC;AAIvD,qBAAa,iBAAiB;IAC5B,OAAO,CAAC,MAAM,CAAmB;IACjC,OAAO,CAAC,MAAM,CAA2B;IACzC,OAAO,CAAC,QAAQ,CAAa;gBAEjB,MAAM,EAAE,gBAAgB;IAM9B,KAAK,IAAI,OAAO,CAAC,OAAO,CAAC;IA0EzB,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAmB3B,IAAI,WAAW,IAAI,OAAO,CAEzB;IAED,IAAI,aAAa,IAAI,eAAe,CAEnC;CACF"}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AudioOutputService — AI speech playback for voice mode.
|
|
3
|
+
*
|
|
4
|
+
* Uses react-native-audio-api (Software Mansion) for gapless, low-latency
|
|
5
|
+
* PCM playback. Decodes base64 PCM from Gemini Live API and queues it via
|
|
6
|
+
* AudioBufferQueueSourceNode for seamless streaming.
|
|
7
|
+
*
|
|
8
|
+
* Requires: react-native-audio-api (development build only, not Expo Go)
|
|
9
|
+
*/
|
|
10
|
+
export interface AudioOutputConfig {
|
|
11
|
+
sampleRate?: number;
|
|
12
|
+
onPlaybackStart?: () => void;
|
|
13
|
+
onPlaybackEnd?: () => void;
|
|
14
|
+
onError?: (error: string) => void;
|
|
15
|
+
}
|
|
16
|
+
export declare class AudioOutputService {
|
|
17
|
+
private config;
|
|
18
|
+
private audioContext;
|
|
19
|
+
private queueSourceNode;
|
|
20
|
+
private gainNode;
|
|
21
|
+
private muted;
|
|
22
|
+
private isStarted;
|
|
23
|
+
private chunkCount;
|
|
24
|
+
constructor(config?: AudioOutputConfig);
|
|
25
|
+
initialize(): Promise<boolean>;
|
|
26
|
+
/** Add a base64-encoded PCM chunk from Gemini to the playback queue */
|
|
27
|
+
enqueue(base64Audio: string): void;
|
|
28
|
+
mute(): void;
|
|
29
|
+
unmute(): void;
|
|
30
|
+
get isMuted(): boolean;
|
|
31
|
+
stop(): Promise<void>;
|
|
32
|
+
cleanup(): Promise<void>;
|
|
33
|
+
}
|
|
34
|
+
//# sourceMappingURL=AudioOutputService.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"AudioOutputService.d.ts","sourceRoot":"","sources":["../../../../src/services/AudioOutputService.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAUH,MAAM,WAAW,iBAAiB;IAChC,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,eAAe,CAAC,EAAE,MAAM,IAAI,CAAC;IAC7B,aAAa,CAAC,EAAE,MAAM,IAAI,CAAC;IAC3B,OAAO,CAAC,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,IAAI,CAAC;CACnC;AAID,qBAAa,kBAAkB;IAC7B,OAAO,CAAC,MAAM,CAAoB;IAClC,OAAO,CAAC,YAAY,CAAa;IACjC,OAAO,CAAC,eAAe,CAAa;IACpC,OAAO,CAAC,QAAQ,CAAa;IAC7B,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,UAAU,CAAK;gBAEX,MAAM,GAAE,iBAAsB;IAMpC,UAAU,IAAI,OAAO,CAAC,OAAO,CAAC;IAsCpC,uEAAuE;IACvE,OAAO,CAAC,WAAW,EAAE,MAAM,GAAG,IAAI;IAmClC,IAAI,IAAI,IAAI;IAQZ,MAAM,IAAI,IAAI;IAQd,IAAI,OAAO,IAAI,OAAO,CAErB;IAIK,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAerB,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAa/B"}
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* VoiceService — WebSocket connection to Gemini Live API.
|
|
3
|
+
*
|
|
4
|
+
* Handles bidirectional audio streaming between the app and Gemini:
|
|
5
|
+
* - Sends PCM 16kHz 16-bit audio chunks (mic input)
|
|
6
|
+
* - Receives PCM 24kHz 16-bit audio chunks (AI responses)
|
|
7
|
+
* - Receives function calls (tap, navigate, etc.) for agentic actions
|
|
8
|
+
* - Sends screen context (DOM text + optional screenshot) for live mode
|
|
9
|
+
*
|
|
10
|
+
* Protocol: wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent
|
|
11
|
+
*/
|
|
12
|
+
import type { ToolDefinition } from '../core/types';
|
|
13
|
+
export interface VoiceServiceConfig {
|
|
14
|
+
apiKey: string;
|
|
15
|
+
model?: string;
|
|
16
|
+
systemPrompt?: string;
|
|
17
|
+
tools?: ToolDefinition[];
|
|
18
|
+
/** Audio sample rate for mic input (default: 16000) */
|
|
19
|
+
inputSampleRate?: number;
|
|
20
|
+
/** Language for Gemini speech generation (e.g., 'en', 'ar') */
|
|
21
|
+
language?: string;
|
|
22
|
+
}
|
|
23
|
+
export interface VoiceServiceCallbacks {
|
|
24
|
+
onAudioResponse?: (base64Audio: string) => void;
|
|
25
|
+
onToolCall?: (toolCall: {
|
|
26
|
+
name: string;
|
|
27
|
+
args: Record<string, any>;
|
|
28
|
+
id: string;
|
|
29
|
+
}) => void;
|
|
30
|
+
onTranscript?: (text: string, isFinal: boolean, role: 'user' | 'model') => void;
|
|
31
|
+
onStatusChange?: (status: VoiceStatus) => void;
|
|
32
|
+
onError?: (error: string) => void;
|
|
33
|
+
/** Called when AI turn is complete (all audio sent) */
|
|
34
|
+
onTurnComplete?: () => void;
|
|
35
|
+
}
|
|
36
|
+
export type VoiceStatus = 'disconnected' | 'connecting' | 'connected' | 'error';
|
|
37
|
+
export declare class VoiceService {
|
|
38
|
+
private ws;
|
|
39
|
+
private config;
|
|
40
|
+
private callbacks;
|
|
41
|
+
private setupComplete;
|
|
42
|
+
private _status;
|
|
43
|
+
constructor(config: VoiceServiceConfig);
|
|
44
|
+
connect(callbacks: VoiceServiceCallbacks): void;
|
|
45
|
+
disconnect(): void;
|
|
46
|
+
get isConnected(): boolean;
|
|
47
|
+
get currentStatus(): VoiceStatus;
|
|
48
|
+
/** Send PCM audio chunk (base64 encoded) to Gemini */
|
|
49
|
+
private sendCount;
|
|
50
|
+
sendAudio(base64Audio: string): void;
|
|
51
|
+
/** Send text message via realtimeInput (same channel as audio) */
|
|
52
|
+
sendText(text: string): void;
|
|
53
|
+
/** Send DOM tree as passive context during live conversation.
|
|
54
|
+
*
|
|
55
|
+
* Uses `clientContent` with `turnComplete: false` to inject context
|
|
56
|
+
* WITHOUT triggering a model response. This is the "incremental content
|
|
57
|
+
* updates" pattern from the Gemini docs for establishing session context.
|
|
58
|
+
*
|
|
59
|
+
* Called once at connect + after each tool call (not on a timer).
|
|
60
|
+
* Screenshots are handled separately via the capture_screenshot tool.
|
|
61
|
+
*/
|
|
62
|
+
sendScreenContext(domText: string): void;
|
|
63
|
+
/** Send function call result back to Gemini */
|
|
64
|
+
sendFunctionResponse(name: string, id: string, result: any): void;
|
|
65
|
+
private sendSetup;
|
|
66
|
+
private handleMessage;
|
|
67
|
+
private handleBinaryMessage;
|
|
68
|
+
private processBinaryBytes;
|
|
69
|
+
private processMessage;
|
|
70
|
+
private setStatus;
|
|
71
|
+
private arrayBufferToBase64;
|
|
72
|
+
}
|
|
73
|
+
//# sourceMappingURL=VoiceService.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"VoiceService.d.ts","sourceRoot":"","sources":["../../../../src/services/VoiceService.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAGH,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,eAAe,CAAC;AAIpD,MAAM,WAAW,kBAAkB;IACjC,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,KAAK,CAAC,EAAE,cAAc,EAAE,CAAC;IACzB,uDAAuD;IACvD,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,+DAA+D;IAC/D,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,qBAAqB;IACpC,eAAe,CAAC,EAAE,CAAC,WAAW,EAAE,MAAM,KAAK,IAAI,CAAC;IAChD,UAAU,CAAC,EAAE,CAAC,QAAQ,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;QAAC,EAAE,EAAE,MAAM,CAAA;KAAE,KAAK,IAAI,CAAC;IACzF,YAAY,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,GAAG,OAAO,KAAK,IAAI,CAAC;IAChF,cAAc,CAAC,EAAE,CAAC,MAAM,EAAE,WAAW,KAAK,IAAI,CAAC;IAC/C,OAAO,CAAC,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,IAAI,CAAC;IAClC,uDAAuD;IACvD,cAAc,CAAC,EAAE,MAAM,IAAI,CAAC;CAC7B;AAED,MAAM,MAAM,WAAW,GAAG,cAAc,GAAG,YAAY,GAAG,WAAW,GAAG,OAAO,CAAC;AAWhF,qBAAa,YAAY;IACvB,OAAO,CAAC,EAAE,CAA0B;IACpC,OAAO,CAAC,MAAM,CAAqB;IACnC,OAAO,CAAC,SAAS,CAA6B;IAC9C,OAAO,CAAC,aAAa,CAAS;IAC9B,OAAO,CAAC,OAAO,CAA+B;gBAElC,MAAM,EAAE,kBAAkB;IAMtC,OAAO,CAAC,SAAS,EAAE,qBAAqB,GAAG,IAAI;IAqC/C,UAAU,IAAI,IAAI;IAUlB,IAAI,WAAW,IAAI,OAAO,CAEzB;IAED,IAAI,aAAa,IAAI,WAAW,CAE/B;IAID,sDAAsD;IACtD,OAAO,CAAC,SAAS,CAAK;IACtB,SAAS,CAAC,WAAW,EAAE,MAAM,GAAG,IAAI;IAsBpC,kEAAkE;IAClE,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI;IAU5B;;;;;;;;OAQG;IACH,iBAAiB,CAAC,OAAO,EAAE,MAAM,GAAG,IAAI;IAgBxC,+CAA+C;IAC/C,oBAAoB,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,GAAG,GAAG,IAAI;IAmBjE,OAAO,CAAC,SAAS;IA6DjB,OAAO,CAAC,aAAa;IAsBrB,OAAO,CAAC,mBAAmB;IA0B3B,OAAO,CAAC,kBAAkB;IAmB1B,OAAO,CAAC,cAAc;IAoEtB,OAAO,CAAC,SAAS;IAKjB,OAAO,CAAC,mBAAmB;CAQ5B"}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Audio utility functions for PCM conversion.
|
|
3
|
+
*
|
|
4
|
+
* Used by AudioInputService and AudioOutputService to convert between
|
|
5
|
+
* Float32 (Web Audio API) and Int16 (Gemini Live API) PCM formats.
|
|
6
|
+
*/
|
|
7
|
+
/**
|
|
8
|
+
* Convert Float32Array PCM samples to Int16 PCM and encode as base64.
|
|
9
|
+
* Gemini Live API expects Int16 little-endian PCM.
|
|
10
|
+
*/
|
|
11
|
+
export declare function float32ToInt16Base64(float32Data: Float32Array): string;
|
|
12
|
+
/**
|
|
13
|
+
* Decode base64 Int16 PCM to Float32Array.
|
|
14
|
+
* Used for manual decoding when decodePCMInBase64 is unavailable.
|
|
15
|
+
*/
|
|
16
|
+
export declare function base64ToFloat32(base64: string): Float32Array;
|
|
17
|
+
//# sourceMappingURL=audioUtils.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"audioUtils.d.ts","sourceRoot":"","sources":["../../../../src/utils/audioUtils.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH;;;GAGG;AACH,wBAAgB,oBAAoB,CAAC,WAAW,EAAE,YAAY,GAAG,MAAM,CAkBtE;AAED;;;GAGG;AACH,wBAAgB,eAAe,CAAC,MAAM,EAAE,MAAM,GAAG,YAAY,CAkB5D"}
|
|
@@ -1,4 +1,8 @@
|
|
|
1
1
|
export declare const logger: {
|
|
2
|
+
/** Enable or disable all SDK logging. */
|
|
3
|
+
setEnabled: (value: boolean) => void;
|
|
4
|
+
/** Check if logging is enabled. */
|
|
5
|
+
isEnabled: () => boolean;
|
|
2
6
|
info: (context: string, ...args: any[]) => void;
|
|
3
7
|
warn: (context: string, ...args: any[]) => void;
|
|
4
8
|
error: (context: string, ...args: any[]) => void;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"logger.d.ts","sourceRoot":"","sources":["../../../../src/utils/logger.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"logger.d.ts","sourceRoot":"","sources":["../../../../src/utils/logger.ts"],"names":[],"mappings":"AAUA,eAAO,MAAM,MAAM;IACjB,yCAAyC;wBACrB,OAAO;IAI3B,mCAAmC;;oBAGnB,MAAM,WAAW,GAAG,EAAE;oBAItB,MAAM,WAAW,GAAG,EAAE;qBAIrB,MAAM,WAAW,GAAG,EAAE;qBAKtB,MAAM,WAAW,GAAG,EAAE;CAKxC,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mobileai/react-native",
|
|
3
|
-
"version": "0.4.
|
|
3
|
+
"version": "0.4.3",
|
|
4
4
|
"description": "Build autonomous AI agents for React Native and Expo apps. Provides AI-native UI traversal, tool calling, and structured reasoning.",
|
|
5
5
|
"main": "./lib/module/index.js",
|
|
6
6
|
"source": "./src/index.ts",
|
|
@@ -30,7 +30,6 @@
|
|
|
30
30
|
"!**/__tests__",
|
|
31
31
|
"!**/__fixtures__",
|
|
32
32
|
"!**/__mocks__",
|
|
33
|
-
"!**/__mocks__",
|
|
34
33
|
"!**/.*",
|
|
35
34
|
"!mcp-server"
|
|
36
35
|
],
|
|
@@ -85,21 +84,38 @@
|
|
|
85
84
|
"eslint-config-prettier": "^10.1.8",
|
|
86
85
|
"eslint-plugin-prettier": "^5.5.4",
|
|
87
86
|
"jest": "^29.7.0",
|
|
88
|
-
"prettier": "^
|
|
87
|
+
"prettier": "^3.5.0",
|
|
89
88
|
"react": "19.2.0",
|
|
90
89
|
"react-native": "0.83.2",
|
|
91
90
|
"react-native-builder-bob": "^0.40.18",
|
|
92
91
|
"turbo": "^2.5.6",
|
|
93
92
|
"typescript": "^5.9.2"
|
|
94
93
|
},
|
|
94
|
+
"dependencies": {
|
|
95
|
+
"@google/genai": "^1.0.0",
|
|
96
|
+
"react-native-audio-api": "^0.11.7"
|
|
97
|
+
},
|
|
95
98
|
"peerDependencies": {
|
|
96
99
|
"react": "*",
|
|
97
|
-
"react-native": "*"
|
|
100
|
+
"react-native": "*",
|
|
101
|
+
"react-native-audio-api": "*",
|
|
102
|
+
"react-native-view-shot": "*",
|
|
103
|
+
"expo-speech-recognition": "*"
|
|
104
|
+
},
|
|
105
|
+
"peerDependenciesMeta": {
|
|
106
|
+
"react-native-audio-api": {
|
|
107
|
+
"optional": true
|
|
108
|
+
},
|
|
109
|
+
"react-native-view-shot": {
|
|
110
|
+
"optional": true
|
|
111
|
+
},
|
|
112
|
+
"expo-speech-recognition": {
|
|
113
|
+
"optional": true
|
|
114
|
+
}
|
|
98
115
|
},
|
|
99
116
|
"workspaces": [
|
|
100
117
|
"example"
|
|
101
118
|
],
|
|
102
|
-
"packageManager": "yarn@4.11.0",
|
|
103
119
|
"react-native-builder-bob": {
|
|
104
120
|
"source": "src",
|
|
105
121
|
"output": "lib",
|
|
@@ -129,7 +145,8 @@
|
|
|
129
145
|
"preset": "react-native",
|
|
130
146
|
"modulePathIgnorePatterns": [
|
|
131
147
|
"<rootDir>/example/node_modules",
|
|
132
|
-
"<rootDir>/lib/"
|
|
148
|
+
"<rootDir>/lib/",
|
|
149
|
+
"<rootDir>/rntl-reference"
|
|
133
150
|
]
|
|
134
151
|
},
|
|
135
152
|
"create-react-native-library": {
|
|
@@ -140,6 +157,5 @@
|
|
|
140
157
|
"jest"
|
|
141
158
|
],
|
|
142
159
|
"version": "0.57.2"
|
|
143
|
-
}
|
|
144
|
-
"dependencies": {}
|
|
160
|
+
}
|
|
145
161
|
}
|