@mobileai/react-native 0.4.6 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +80 -4
- package/lib/module/components/AIAgent.js +179 -38
- package/lib/module/components/AIAgent.js.map +1 -1
- package/lib/module/components/AgentChatBar.js +53 -29
- package/lib/module/components/AgentChatBar.js.map +1 -1
- package/lib/module/components/Icons.js +337 -0
- package/lib/module/components/Icons.js.map +1 -0
- package/lib/module/core/AgentRuntime.js +74 -3
- package/lib/module/core/AgentRuntime.js.map +1 -1
- package/lib/module/core/systemPrompt.js +57 -38
- package/lib/module/core/systemPrompt.js.map +1 -1
- package/lib/module/index.js +3 -9
- package/lib/module/index.js.map +1 -1
- package/lib/module/services/AudioInputService.js +73 -2
- package/lib/module/services/AudioInputService.js.map +1 -1
- package/lib/module/services/AudioOutputService.js +58 -5
- package/lib/module/services/AudioOutputService.js.map +1 -1
- package/lib/module/services/VoiceService.js +281 -275
- package/lib/module/services/VoiceService.js.map +1 -1
- package/lib/typescript/src/components/AIAgent.d.ts.map +1 -1
- package/lib/typescript/src/components/AgentChatBar.d.ts.map +1 -1
- package/lib/typescript/src/components/Icons.d.ts +43 -0
- package/lib/typescript/src/components/Icons.d.ts.map +1 -0
- package/lib/typescript/src/core/AgentRuntime.d.ts +12 -0
- package/lib/typescript/src/core/AgentRuntime.d.ts.map +1 -1
- package/lib/typescript/src/core/systemPrompt.d.ts.map +1 -1
- package/lib/typescript/src/index.d.ts +4 -0
- package/lib/typescript/src/index.d.ts.map +1 -1
- package/lib/typescript/src/services/AudioInputService.d.ts +13 -0
- package/lib/typescript/src/services/AudioInputService.d.ts.map +1 -1
- package/lib/typescript/src/services/AudioOutputService.d.ts.map +1 -1
- package/lib/typescript/src/services/VoiceService.d.ts +38 -29
- package/lib/typescript/src/services/VoiceService.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/components/AIAgent.tsx +192 -39
- package/src/components/AgentChatBar.tsx +44 -25
- package/src/components/Icons.tsx +253 -0
- package/src/core/AgentRuntime.ts +70 -3
- package/src/core/systemPrompt.ts +57 -38
- package/src/index.ts +8 -8
- package/src/services/AudioInputService.ts +77 -2
- package/src/services/AudioOutputService.ts +59 -5
- package/src/services/VoiceService.ts +278 -290
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Icons — Zero-dependency, View-based icons for the AI Agent chat bar.
|
|
3
|
+
*
|
|
4
|
+
* Why not emoji? iOS Simulator 26+ has a bug where emoji renders as "?".
|
|
5
|
+
* Why not Unicode symbols? They look obscure and unprofessional.
|
|
6
|
+
* Why not icon libraries? This is a library — zero runtime dependencies.
|
|
7
|
+
*
|
|
8
|
+
* These icons are built purely from React Native View components,
|
|
9
|
+
* rendering identically on every platform and screen size.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import { View } from 'react-native';
|
|
13
|
+
|
|
14
|
+
// ─── Mic Icon (pill + stem + base) ────────────────────────────
|
|
15
|
+
|
|
16
|
+
export function MicIcon({ size = 20, color = '#fff' }: { size?: number; color?: string }) {
|
|
17
|
+
const pillW = size * 0.4;
|
|
18
|
+
const pillH = size * 0.5;
|
|
19
|
+
const stemW = size * 0.08;
|
|
20
|
+
const stemH = size * 0.18;
|
|
21
|
+
const baseW = size * 0.35;
|
|
22
|
+
const arcW = size * 0.55;
|
|
23
|
+
const arcH = size * 0.35;
|
|
24
|
+
const arcBorder = size * 0.07;
|
|
25
|
+
|
|
26
|
+
return (
|
|
27
|
+
<View style={{ width: size, height: size, alignItems: 'center', justifyContent: 'center' }}>
|
|
28
|
+
{/* Pill (mic head) */}
|
|
29
|
+
<View style={{
|
|
30
|
+
width: pillW,
|
|
31
|
+
height: pillH,
|
|
32
|
+
borderRadius: pillW / 2,
|
|
33
|
+
backgroundColor: color,
|
|
34
|
+
}} />
|
|
35
|
+
{/* Arc (U-shape around mic) */}
|
|
36
|
+
<View style={{
|
|
37
|
+
width: arcW,
|
|
38
|
+
height: arcH,
|
|
39
|
+
borderBottomLeftRadius: arcW / 2,
|
|
40
|
+
borderBottomRightRadius: arcW / 2,
|
|
41
|
+
borderWidth: arcBorder,
|
|
42
|
+
borderTopWidth: 0,
|
|
43
|
+
borderColor: color,
|
|
44
|
+
marginTop: -(pillH * 0.3),
|
|
45
|
+
}} />
|
|
46
|
+
{/* Stem */}
|
|
47
|
+
<View style={{
|
|
48
|
+
width: stemW,
|
|
49
|
+
height: stemH,
|
|
50
|
+
backgroundColor: color,
|
|
51
|
+
marginTop: -1,
|
|
52
|
+
}} />
|
|
53
|
+
{/* Base */}
|
|
54
|
+
<View style={{
|
|
55
|
+
width: baseW,
|
|
56
|
+
height: stemW,
|
|
57
|
+
backgroundColor: color,
|
|
58
|
+
borderRadius: stemW / 2,
|
|
59
|
+
}} />
|
|
60
|
+
</View>
|
|
61
|
+
);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// ─── Speaker Icon (cone + sound waves) ────────────────────────
|
|
65
|
+
|
|
66
|
+
export function SpeakerIcon({ size = 20, color = '#fff', muted = false }: { size?: number; color?: string; muted?: boolean }) {
|
|
67
|
+
const bodyW = size * 0.25;
|
|
68
|
+
const bodyH = size * 0.3;
|
|
69
|
+
const coneW = size * 0.2;
|
|
70
|
+
|
|
71
|
+
return (
|
|
72
|
+
<View style={{ width: size, height: size, alignItems: 'center', justifyContent: 'center', flexDirection: 'row' }}>
|
|
73
|
+
{/* Speaker body (rectangle) */}
|
|
74
|
+
<View style={{
|
|
75
|
+
width: bodyW,
|
|
76
|
+
height: bodyH,
|
|
77
|
+
backgroundColor: color,
|
|
78
|
+
borderRadius: size * 0.03,
|
|
79
|
+
}} />
|
|
80
|
+
{/* Speaker cone (triangle via borders) */}
|
|
81
|
+
<View style={{
|
|
82
|
+
width: 0,
|
|
83
|
+
height: 0,
|
|
84
|
+
borderTopWidth: size * 0.25,
|
|
85
|
+
borderTopColor: 'transparent',
|
|
86
|
+
borderBottomWidth: size * 0.25,
|
|
87
|
+
borderBottomColor: 'transparent',
|
|
88
|
+
borderLeftWidth: coneW,
|
|
89
|
+
borderLeftColor: color,
|
|
90
|
+
marginLeft: -1,
|
|
91
|
+
}} />
|
|
92
|
+
{muted ? (
|
|
93
|
+
/* Mute slash */
|
|
94
|
+
<View style={{
|
|
95
|
+
position: 'absolute',
|
|
96
|
+
width: size * 0.08,
|
|
97
|
+
height: size * 0.8,
|
|
98
|
+
backgroundColor: color,
|
|
99
|
+
borderRadius: size * 0.04,
|
|
100
|
+
transform: [{ rotate: '45deg' }],
|
|
101
|
+
}} />
|
|
102
|
+
) : (
|
|
103
|
+
/* Sound waves */
|
|
104
|
+
<View style={{ marginLeft: size * 0.05 }}>
|
|
105
|
+
<View style={{
|
|
106
|
+
width: size * 0.15,
|
|
107
|
+
height: size * 0.3,
|
|
108
|
+
borderWidth: size * 0.05,
|
|
109
|
+
borderColor: color,
|
|
110
|
+
borderLeftWidth: 0,
|
|
111
|
+
borderTopLeftRadius: 0,
|
|
112
|
+
borderBottomLeftRadius: 0,
|
|
113
|
+
borderTopRightRadius: size * 0.15,
|
|
114
|
+
borderBottomRightRadius: size * 0.15,
|
|
115
|
+
}} />
|
|
116
|
+
</View>
|
|
117
|
+
)}
|
|
118
|
+
</View>
|
|
119
|
+
);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// ─── Send Arrow (upward arrow) ────────────────────────────────
|
|
123
|
+
|
|
124
|
+
export function SendArrowIcon({ size = 18, color = '#fff' }: { size?: number; color?: string }) {
|
|
125
|
+
// Filled right-pointing triangle (like iOS Messages send button)
|
|
126
|
+
const triH = size * 0.55;
|
|
127
|
+
return (
|
|
128
|
+
<View style={{ width: size, height: size, alignItems: 'center', justifyContent: 'center' }}>
|
|
129
|
+
<View style={{
|
|
130
|
+
width: 0,
|
|
131
|
+
height: 0,
|
|
132
|
+
borderTopWidth: triH / 2,
|
|
133
|
+
borderTopColor: 'transparent',
|
|
134
|
+
borderBottomWidth: triH / 2,
|
|
135
|
+
borderBottomColor: 'transparent',
|
|
136
|
+
borderLeftWidth: triH * 0.85,
|
|
137
|
+
borderLeftColor: color,
|
|
138
|
+
marginLeft: size * 0.1,
|
|
139
|
+
}} />
|
|
140
|
+
</View>
|
|
141
|
+
);
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// ─── Stop Icon (filled square) ────────────────────────────────
|
|
145
|
+
|
|
146
|
+
export function StopIcon({ size = 18, color = '#fff' }: { size?: number; color?: string }) {
|
|
147
|
+
const sq = size * 0.45;
|
|
148
|
+
return (
|
|
149
|
+
<View style={{ width: size, height: size, alignItems: 'center', justifyContent: 'center' }}>
|
|
150
|
+
<View style={{
|
|
151
|
+
width: sq,
|
|
152
|
+
height: sq,
|
|
153
|
+
backgroundColor: color,
|
|
154
|
+
borderRadius: size * 0.05,
|
|
155
|
+
}} />
|
|
156
|
+
</View>
|
|
157
|
+
);
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// ─── Recording Dot (pulsing filled circle) ────────────────────
|
|
161
|
+
|
|
162
|
+
export function RecordingDot({ size = 18, color = '#FF3B30' }: { size?: number; color?: string }) {
|
|
163
|
+
const dotSize = size * 0.45;
|
|
164
|
+
return (
|
|
165
|
+
<View style={{ width: size, height: size, alignItems: 'center', justifyContent: 'center' }}>
|
|
166
|
+
<View style={{
|
|
167
|
+
width: dotSize,
|
|
168
|
+
height: dotSize,
|
|
169
|
+
borderRadius: dotSize / 2,
|
|
170
|
+
backgroundColor: color,
|
|
171
|
+
}} />
|
|
172
|
+
</View>
|
|
173
|
+
);
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
// ─── Loading Spinner (three dots) ─────────────────────────────
|
|
177
|
+
|
|
178
|
+
export function LoadingDots({ size = 18, color = '#fff' }: { size?: number; color?: string }) {
|
|
179
|
+
const dotSize = size * 0.15;
|
|
180
|
+
return (
|
|
181
|
+
<View style={{ width: size, height: size, alignItems: 'center', justifyContent: 'center', flexDirection: 'row', gap: dotSize * 0.8 }}>
|
|
182
|
+
{[0.4, 0.7, 1].map((opacity, i) => (
|
|
183
|
+
<View key={i} style={{
|
|
184
|
+
width: dotSize,
|
|
185
|
+
height: dotSize,
|
|
186
|
+
borderRadius: dotSize / 2,
|
|
187
|
+
backgroundColor: color,
|
|
188
|
+
opacity,
|
|
189
|
+
}} />
|
|
190
|
+
))}
|
|
191
|
+
</View>
|
|
192
|
+
);
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// ─── Close / Dismiss (X mark) ─────────────────────────────────
|
|
196
|
+
|
|
197
|
+
export function CloseIcon({ size = 14, color = 'rgba(255,255,255,0.6)' }: { size?: number; color?: string }) {
|
|
198
|
+
const barW = size * 0.7;
|
|
199
|
+
const barH = size * 0.12;
|
|
200
|
+
return (
|
|
201
|
+
<View style={{ width: size, height: size, alignItems: 'center', justifyContent: 'center' }}>
|
|
202
|
+
<View style={{
|
|
203
|
+
position: 'absolute',
|
|
204
|
+
width: barW,
|
|
205
|
+
height: barH,
|
|
206
|
+
backgroundColor: color,
|
|
207
|
+
borderRadius: barH,
|
|
208
|
+
transform: [{ rotate: '45deg' }],
|
|
209
|
+
}} />
|
|
210
|
+
<View style={{
|
|
211
|
+
position: 'absolute',
|
|
212
|
+
width: barW,
|
|
213
|
+
height: barH,
|
|
214
|
+
backgroundColor: color,
|
|
215
|
+
borderRadius: barH,
|
|
216
|
+
transform: [{ rotate: '-45deg' }],
|
|
217
|
+
}} />
|
|
218
|
+
</View>
|
|
219
|
+
);
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
// ─── AI Badge (for FAB) ───────────────────────────────────────
|
|
223
|
+
|
|
224
|
+
export function AIBadge({ size = 28 }: { size?: number }) {
|
|
225
|
+
// Chat bubble — clean, universally represents AI assistant
|
|
226
|
+
const bubbleW = size * 0.6;
|
|
227
|
+
const bubbleH = size * 0.45;
|
|
228
|
+
const tailSize = size * 0.12;
|
|
229
|
+
return (
|
|
230
|
+
<View style={{ width: size, height: size, alignItems: 'center', justifyContent: 'center' }}>
|
|
231
|
+
{/* Bubble body */}
|
|
232
|
+
<View style={{
|
|
233
|
+
width: bubbleW,
|
|
234
|
+
height: bubbleH,
|
|
235
|
+
backgroundColor: '#fff',
|
|
236
|
+
borderRadius: size * 0.12,
|
|
237
|
+
marginBottom: tailSize * 0.5,
|
|
238
|
+
}} />
|
|
239
|
+
{/* Tail (small triangle at bottom-left) */}
|
|
240
|
+
<View style={{
|
|
241
|
+
position: 'absolute',
|
|
242
|
+
bottom: size * 0.18,
|
|
243
|
+
left: size * 0.22,
|
|
244
|
+
width: 0,
|
|
245
|
+
height: 0,
|
|
246
|
+
borderTopWidth: tailSize,
|
|
247
|
+
borderTopColor: '#fff',
|
|
248
|
+
borderRightWidth: tailSize,
|
|
249
|
+
borderRightColor: 'transparent',
|
|
250
|
+
}} />
|
|
251
|
+
</View>
|
|
252
|
+
);
|
|
253
|
+
}
|
package/src/core/AgentRuntime.ts
CHANGED
|
@@ -146,6 +146,9 @@ export class AgentRuntime {
|
|
|
146
146
|
}
|
|
147
147
|
try {
|
|
148
148
|
element.props.onChangeText(args.text);
|
|
149
|
+
// Wait for React to process the state update and re-render
|
|
150
|
+
// (same pattern as navigate tool's 500ms post-action delay)
|
|
151
|
+
await new Promise(resolve => setTimeout(resolve, 500));
|
|
149
152
|
return `✅ Typed "${args.text}" into [${args.index}] "${element.label}"`;
|
|
150
153
|
} catch (error: any) {
|
|
151
154
|
return `❌ Error typing: ${error.message}`;
|
|
@@ -174,7 +177,7 @@ export class AgentRuntime {
|
|
|
174
177
|
}
|
|
175
178
|
}
|
|
176
179
|
|
|
177
|
-
// React Navigation path: use navRef
|
|
180
|
+
// React Navigation path: use navRef
|
|
178
181
|
if (!this.navRef) {
|
|
179
182
|
return '❌ Navigation ref not available.';
|
|
180
183
|
}
|
|
@@ -188,10 +191,31 @@ export class AgentRuntime {
|
|
|
188
191
|
const params = args.params ? (typeof args.params === 'string' ? JSON.parse(args.params) : args.params) : undefined;
|
|
189
192
|
// Case-insensitive screen name matching
|
|
190
193
|
const availableRoutes = this.getRouteNames();
|
|
194
|
+
logger.info('AgentRuntime', `🧭 Navigate requested: "${args.screen}" | Available: [${availableRoutes.join(', ')}] | Params: ${JSON.stringify(params)}`);
|
|
191
195
|
const matchedScreen = availableRoutes.find(
|
|
192
196
|
r => r.toLowerCase() === args.screen.toLowerCase()
|
|
193
|
-
)
|
|
194
|
-
|
|
197
|
+
);
|
|
198
|
+
|
|
199
|
+
// Guard: screen must exist in the navigation tree
|
|
200
|
+
if (!matchedScreen) {
|
|
201
|
+
const errMsg = `❌ "${args.screen}" is not a screen — it may be content within a screen. Available screens: ${availableRoutes.join(', ')}. Look at the current screen context for "${args.screen}" as a section, category, or element, and scroll/tap to find it. If it's on a different screen, navigate to the correct screen first.`;
|
|
202
|
+
logger.warn('AgentRuntime', `🧭 Navigate REJECTED: ${errMsg}`);
|
|
203
|
+
return errMsg;
|
|
204
|
+
}
|
|
205
|
+
logger.info('AgentRuntime', `🧭 Navigate matched: "${args.screen}" → "${matchedScreen}"`);
|
|
206
|
+
|
|
207
|
+
// Find the path to the screen (handles nested navigators)
|
|
208
|
+
const screenPath = this.findScreenPath(matchedScreen);
|
|
209
|
+
if (screenPath.length > 1) {
|
|
210
|
+
// Nested screen: navigate using parent → { screen: child } pattern
|
|
211
|
+
// e.g. navigate('HomeTab', { screen: 'Home', params })
|
|
212
|
+
logger.info('AgentRuntime', `Nested navigation: ${screenPath.join(' → ')}`);
|
|
213
|
+
const nestedParams = this.buildNestedParams(screenPath, params);
|
|
214
|
+
this.navRef.navigate(screenPath[0], nestedParams);
|
|
215
|
+
} else {
|
|
216
|
+
// Top-level screen: direct navigate
|
|
217
|
+
this.navRef.navigate(matchedScreen, params);
|
|
218
|
+
}
|
|
195
219
|
await new Promise(resolve => setTimeout(resolve, 500));
|
|
196
220
|
return `✅ Navigated to "${matchedScreen}"${params ? ` with params: ${JSON.stringify(params)}` : ''}`;
|
|
197
221
|
} catch (error: any) {
|
|
@@ -289,6 +313,49 @@ export class AgentRuntime {
|
|
|
289
313
|
return [...new Set(names)];
|
|
290
314
|
}
|
|
291
315
|
|
|
316
|
+
/**
|
|
317
|
+
* Find the path from root navigator to a target screen.
|
|
318
|
+
* Returns [parentTab, screen] for nested screens, or [screen] for top-level.
|
|
319
|
+
* Example: findScreenPath('Home') → ['HomeTab', 'Home']
|
|
320
|
+
*/
|
|
321
|
+
private findScreenPath(targetScreen: string): string[] {
|
|
322
|
+
try {
|
|
323
|
+
const state = this.navRef?.getRootState?.() || this.navRef?.getState?.();
|
|
324
|
+
if (!state?.routes) return [targetScreen];
|
|
325
|
+
|
|
326
|
+
// Check if target is a direct top-level route
|
|
327
|
+
if (state.routes.some((r: any) => r.name === targetScreen)) {
|
|
328
|
+
return [targetScreen];
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
// Search nested navigators
|
|
332
|
+
for (const route of state.routes) {
|
|
333
|
+
const nestedNames = route.state ? this.collectRouteNames(route.state) : [];
|
|
334
|
+
if (nestedNames.includes(targetScreen)) {
|
|
335
|
+
return [route.name, targetScreen];
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
return [targetScreen]; // Fallback: try direct
|
|
340
|
+
} catch {
|
|
341
|
+
return [targetScreen];
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
/**
|
|
346
|
+
* Build nested params for React Navigation nested screen navigation.
|
|
347
|
+
* ['HomeTab', 'Home'] → { screen: 'Home', params }
|
|
348
|
+
* ['Tab', 'Stack', 'Screen'] → { screen: 'Stack', params: { screen: 'Screen', params } }
|
|
349
|
+
*/
|
|
350
|
+
private buildNestedParams(path: string[], leafParams?: any): any {
|
|
351
|
+
// Build from the end: innermost screen gets the leafParams
|
|
352
|
+
let result = leafParams;
|
|
353
|
+
for (let i = path.length - 1; i >= 1; i--) {
|
|
354
|
+
result = { screen: path[i], ...(result !== undefined ? { params: result } : {}) };
|
|
355
|
+
}
|
|
356
|
+
return result;
|
|
357
|
+
}
|
|
358
|
+
|
|
292
359
|
/**
|
|
293
360
|
* Recursively find the deepest active screen name.
|
|
294
361
|
* For tabs: follows active tab → active screen inside that tab.
|
package/src/core/systemPrompt.ts
CHANGED
|
@@ -62,6 +62,12 @@ Available tools:
|
|
|
62
62
|
- ask_user(question): Ask the user for clarification ONLY when you cannot determine what action to take.
|
|
63
63
|
</tools>
|
|
64
64
|
|
|
65
|
+
<custom_actions>
|
|
66
|
+
In addition to the built-in tools above, the app may register custom actions (e.g. checkout, addToCart). These appear as additional callable tools in your tool list.
|
|
67
|
+
When a custom action exists for something the user wants to do, ALWAYS call the action instead of tapping a UI button — even if you see a matching button on screen. Custom actions may include security flows like user confirmation dialogs.
|
|
68
|
+
If a UI element is hidden (aiIgnore) but a matching custom action exists, use the action.
|
|
69
|
+
</custom_actions>
|
|
70
|
+
|
|
65
71
|
<rules>
|
|
66
72
|
- There are 2 types of requests — always determine which type BEFORE acting:
|
|
67
73
|
1. Information requests (e.g. "what's available?", "how much is X?", "list the items"):
|
|
@@ -177,12 +183,9 @@ export function buildVoiceSystemPrompt(
|
|
|
177
183
|
): string {
|
|
178
184
|
const isArabic = language === 'ar';
|
|
179
185
|
|
|
180
|
-
let prompt = `You are a voice-controlled AI
|
|
186
|
+
let prompt = `You are a voice-controlled AI assistant for a React Native mobile app.
|
|
181
187
|
|
|
182
|
-
|
|
183
|
-
${isArabic ? '- Working language: **Arabic**. Respond in Arabic.' : '- Working language: **English**. Respond in English.'}
|
|
184
|
-
- Use the same language as the user. Return in user's language.
|
|
185
|
-
</language_settings>
|
|
188
|
+
You always have access to the current screen context — it shows you exactly what the user sees on their phone. Use it to answer questions and execute actions when the user speaks a command. Wait for the user to speak a clear voice command before taking any action. Screen context updates arrive automatically as the UI changes.
|
|
186
189
|
|
|
187
190
|
<screen_state>
|
|
188
191
|
Interactive elements are listed as [index]<type attrs>label />
|
|
@@ -198,56 +201,72 @@ Pure text elements without [] are NOT interactive — they are informational con
|
|
|
198
201
|
<tools>
|
|
199
202
|
Available tools:
|
|
200
203
|
- tap(index): Tap an interactive element by its index. Works universally on buttons, switches, and custom components. For switches, this toggles their state.
|
|
201
|
-
- type(index, text): Type text into a text-input element by its index.
|
|
202
|
-
- navigate(screen, params): Navigate to a
|
|
203
|
-
- done(text, success): Complete task
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
When you
|
|
204
|
+
- type(index, text): Type text into a text-input element by its index. ONLY works on text-input elements.
|
|
205
|
+
- navigate(screen, params): Navigate to a screen listed in Available Screens. ONLY use screen names from the Available Screens list — section titles, category names, or other visible text are content within a screen, not navigable screens.
|
|
206
|
+
- done(text, success): Complete task and respond to the user.
|
|
207
|
+
|
|
208
|
+
CRITICAL — tool call protocol:
|
|
209
|
+
When you decide to use a tool, emit the function call IMMEDIATELY as the first thing in your response — before any speech or audio output.
|
|
210
|
+
Speaking before a tool call causes a fatal connection error. Always: call the tool first, wait for the result, then speak about what happened.
|
|
211
|
+
Correct: [function call] → receive result → speak to user about the outcome.
|
|
212
|
+
Wrong: "Sure, let me tap on..." → [function call] → crash.
|
|
207
213
|
</tools>
|
|
208
214
|
|
|
209
|
-
<
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
- When you have NO voice command from the user, stay silent. Do NOT narrate the screen.
|
|
215
|
-
- When the user speaks, determine the request type BEFORE acting:
|
|
216
|
-
1. Information requests ("what's on screen?", "how much is X?"): Respond with spoken audio. Do NOT call any tools.
|
|
217
|
-
2. Action requests ("go to settings", "add pizza to cart"): Call the appropriate tool function directly (e.g. navigate, tap).
|
|
218
|
-
- After completing an action, speak a brief confirmation to the user.
|
|
219
|
-
- Keep all spoken responses concise — the user is listening, not reading.
|
|
220
|
-
</voice_interaction_rules>
|
|
215
|
+
<custom_actions>
|
|
216
|
+
In addition to the built-in tools above, the app may register custom actions (e.g. checkout, addToCart). These appear as additional callable tools in your tool list.
|
|
217
|
+
When a custom action exists for something the user wants to do, ALWAYS call the action instead of tapping a UI button — even if you see a matching button on screen. Custom actions may include security flows like user confirmation dialogs.
|
|
218
|
+
If a UI element is hidden but a matching custom action exists, use the action.
|
|
219
|
+
</custom_actions>
|
|
221
220
|
|
|
222
221
|
<rules>
|
|
223
222
|
- There are 2 types of requests — always determine which type BEFORE acting:
|
|
224
223
|
1. Information requests (e.g. "what's available?", "how much is X?", "list the items"):
|
|
225
|
-
|
|
224
|
+
Read the screen content and answer by speaking. Do NOT perform any tap/type/navigate actions.
|
|
226
225
|
2. Action requests (e.g. "add margherita to cart", "go to checkout", "fill in my name"):
|
|
227
226
|
Execute the required UI interactions using tap/type/navigate tools.
|
|
227
|
+
- For action requests, determine whether the user gave specific step-by-step instructions or an open-ended task:
|
|
228
|
+
1. Specific instructions: Follow each step precisely, do not skip.
|
|
229
|
+
2. Open-ended tasks: Plan the steps yourself.
|
|
228
230
|
- Only interact with elements that have an [index].
|
|
229
|
-
-
|
|
230
|
-
-
|
|
231
|
-
-
|
|
232
|
-
- Do not
|
|
233
|
-
-
|
|
234
|
-
-
|
|
231
|
+
- After tapping an element, the screen may change. Wait for updated screen context before the next action.
|
|
232
|
+
- If the current screen doesn't have what you need, use navigate() to go to another screen from the Available Screens list.
|
|
233
|
+
- If a tap navigates to another screen, the next screen context update will show the new screen's elements.
|
|
234
|
+
- Do not repeat one action more than 3 times unless conditions changed.
|
|
235
|
+
- After typing into a text input, check if the screen changed (e.g., suggestions or autocomplete appeared). If so, interact with the new elements.
|
|
236
|
+
- After typing into a search field, you may need to tap a search button, press enter, or select from a dropdown to complete the search.
|
|
237
|
+
- If the user request includes specific details (product type, price, category), use available filters or search to be more efficient.
|
|
238
|
+
- For destructive/purchase actions (place order, delete, pay), tap the button exactly ONCE. Do not repeat — the user could be charged multiple times.
|
|
239
|
+
- SECURITY & PRIVACY: Do not guess or auto-fill sensitive data (passwords, payment info, personal details). Ask the user verbally.
|
|
240
|
+
- SECURITY & PRIVACY: Do not fill in login/signup forms unless the user provides credentials.
|
|
241
|
+
- Do NOT ask for confirmation of actions the user explicitly requested. If they said "place my order", just do it.
|
|
235
242
|
</rules>
|
|
236
243
|
|
|
237
244
|
<capability>
|
|
245
|
+
- You can see the current screen context — use it to answer questions directly.
|
|
238
246
|
- It is ok to just provide information without performing any actions.
|
|
239
|
-
-
|
|
240
|
-
-
|
|
241
|
-
- The
|
|
247
|
+
- It is ok to fail the task. The user would rather you report failure than repeat failed actions endlessly.
|
|
248
|
+
- The user can be wrong. If the request is not achievable, tell them.
|
|
249
|
+
- The app can have bugs. If something is not working as expected, tell the user.
|
|
250
|
+
- Trying too hard can be harmful. If stuck, tell the user what you accomplished and what remains.
|
|
242
251
|
</capability>
|
|
243
252
|
|
|
244
|
-
<
|
|
245
|
-
-
|
|
253
|
+
<speech_rules>
|
|
254
|
+
- Keep spoken output to 1-2 short sentences.
|
|
255
|
+
- Speak naturally — no markdown, no headers, no bullet points.
|
|
256
|
+
- Only speak confirmations and answers. Do not narrate your reasoning.
|
|
257
|
+
- Confirm what you did: summarize the action result briefly (e.g., "Added to cart" or "Navigated to Settings").
|
|
246
258
|
- Be transparent about errors: If an action fails, explain what failed and why.
|
|
247
|
-
-
|
|
259
|
+
- Track multi-item progress: For requests involving multiple items, keep track and report which ones succeeded and which did not.
|
|
260
|
+
- Stay on the user's screen: For information requests, read from the current screen. Only navigate away if the needed information is on another screen.
|
|
261
|
+
- When a request is ambiguous, pick the most common interpretation rather than always asking. State your assumption in your spoken response.
|
|
248
262
|
- Suggest next steps: After completing an action, briefly suggest what the user might want to do next.
|
|
249
|
-
-
|
|
250
|
-
</
|
|
263
|
+
- Be concise: Users are on mobile — avoid long speech.
|
|
264
|
+
</speech_rules>
|
|
265
|
+
|
|
266
|
+
<language_settings>
|
|
267
|
+
${isArabic ? '- Working language: **Arabic**. Respond in Arabic.' : '- Working language: **English**. Respond in English.'}
|
|
268
|
+
- Use the same language as the user.
|
|
269
|
+
</language_settings>`;
|
|
251
270
|
|
|
252
271
|
// Append user-provided instructions if any
|
|
253
272
|
if (userInstructions?.trim()) {
|
package/src/index.ts
CHANGED
|
@@ -12,9 +12,9 @@ export { AIAgent } from './components/AIAgent';
|
|
|
12
12
|
export { useAction } from './hooks/useAction';
|
|
13
13
|
|
|
14
14
|
// ─── Services ────────────────────────────────────────────────
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
15
|
+
export { VoiceService } from './services/VoiceService';
|
|
16
|
+
export { AudioInputService } from './services/AudioInputService';
|
|
17
|
+
export { AudioOutputService } from './services/AudioOutputService';
|
|
18
18
|
|
|
19
19
|
// ─── Utilities ───────────────────────────────────────────────
|
|
20
20
|
export { logger } from './utils/logger';
|
|
@@ -31,8 +31,8 @@ export type {
|
|
|
31
31
|
TokenUsage,
|
|
32
32
|
} from './core/types';
|
|
33
33
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
34
|
+
export type {
|
|
35
|
+
VoiceServiceConfig,
|
|
36
|
+
VoiceServiceCallbacks,
|
|
37
|
+
VoiceStatus,
|
|
38
|
+
} from './services/VoiceService';
|
|
@@ -5,6 +5,9 @@
|
|
|
5
5
|
* PCM streaming from the microphone. Each chunk is converted from Float32
|
|
6
6
|
* to Int16 PCM and base64-encoded for the Gemini Live API.
|
|
7
7
|
*
|
|
8
|
+
* Echo cancellation is handled at the OS/hardware level via
|
|
9
|
+
* react-native-incall-manager (VOICE_COMMUNICATION mode) — not in JS.
|
|
10
|
+
*
|
|
8
11
|
* Requires: react-native-audio-api (development build only, not Expo Go)
|
|
9
12
|
*/
|
|
10
13
|
|
|
@@ -32,6 +35,14 @@ export class AudioInputService {
|
|
|
32
35
|
private status: RecordingStatus = 'idle';
|
|
33
36
|
private recorder: any = null;
|
|
34
37
|
|
|
38
|
+
// Auto-recovery: detect when mic session dies after audio playback.
|
|
39
|
+
// This is a react-native-audio-api bug where AudioRecorder loses mic access
|
|
40
|
+
// after AudioBufferQueueSourceNode plays audio (audio session conflict).
|
|
41
|
+
private consecutiveSilentFrames = 0;
|
|
42
|
+
private isRecovering = false;
|
|
43
|
+
private static readonly SILENT_THRESHOLD = 0.01;
|
|
44
|
+
private static readonly SILENT_FRAMES_BEFORE_RESTART = 15;
|
|
45
|
+
|
|
35
46
|
constructor(config: AudioInputConfig) {
|
|
36
47
|
this.config = config;
|
|
37
48
|
}
|
|
@@ -71,6 +82,7 @@ export class AudioInputService {
|
|
|
71
82
|
|
|
72
83
|
// Create AudioRecorder
|
|
73
84
|
this.recorder = new audioApi.AudioRecorder();
|
|
85
|
+
this.consecutiveSilentFrames = 0;
|
|
74
86
|
|
|
75
87
|
const sampleRate = this.config.sampleRate || 16000;
|
|
76
88
|
const bufferLength = this.config.bufferLength || 4096;
|
|
@@ -84,9 +96,53 @@ export class AudioInputService {
|
|
|
84
96
|
try {
|
|
85
97
|
// event.buffer is an AudioBuffer — get Float32 channel data
|
|
86
98
|
const float32Data = event.buffer.getChannelData(0);
|
|
87
|
-
|
|
99
|
+
|
|
100
|
+
// Measure peak amplitude for diagnostics + silent detection
|
|
101
|
+
let maxAmp = 0;
|
|
102
|
+
for (let i = 0; i < float32Data.length; i++) {
|
|
103
|
+
const abs = Math.abs(float32Data[i] || 0);
|
|
104
|
+
if (abs > maxAmp) maxAmp = abs;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// Diagnostic: log amplitude on first 5 frames, then every 10th
|
|
108
|
+
if (frameCount <= 5 || frameCount % 10 === 0) {
|
|
109
|
+
logger.info('AudioInput', `🔬 Frame #${frameCount}: maxAmp=${maxAmp.toFixed(6)}, samples=${float32Data.length}`);
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// ─── Auto-Recovery: Silent mic detection ─────────────
|
|
113
|
+
// After audio playback, react-native-audio-api's AudioRecorder
|
|
114
|
+
// can lose its mic session (all-zero frames). Detect this and
|
|
115
|
+
// restart the recorder to re-acquire the audio session.
|
|
116
|
+
if (maxAmp < AudioInputService.SILENT_THRESHOLD) {
|
|
117
|
+
this.consecutiveSilentFrames++;
|
|
118
|
+
if (
|
|
119
|
+
this.consecutiveSilentFrames >= AudioInputService.SILENT_FRAMES_BEFORE_RESTART &&
|
|
120
|
+
!this.isRecovering
|
|
121
|
+
) {
|
|
122
|
+
this.isRecovering = true;
|
|
123
|
+
logger.warn('AudioInput', `⚠️ ${this.consecutiveSilentFrames} silent frames — restarting recorder...`);
|
|
124
|
+
this.restartRecorder().then(() => {
|
|
125
|
+
this.isRecovering = false;
|
|
126
|
+
this.consecutiveSilentFrames = 0;
|
|
127
|
+
logger.info('AudioInput', '✅ Recorder restarted — mic session re-acquired');
|
|
128
|
+
}).catch((err: any) => {
|
|
129
|
+
this.isRecovering = false;
|
|
130
|
+
logger.error('AudioInput', `❌ Recorder restart failed: ${err?.message || err}`);
|
|
131
|
+
});
|
|
132
|
+
return; // Skip this frame
|
|
133
|
+
}
|
|
134
|
+
} else {
|
|
135
|
+
// Got real audio — reset counter
|
|
136
|
+
if (this.consecutiveSilentFrames > 5) {
|
|
137
|
+
logger.info('AudioInput', `🎤 Mic recovered after ${this.consecutiveSilentFrames} silent frames`);
|
|
138
|
+
}
|
|
139
|
+
this.consecutiveSilentFrames = 0;
|
|
140
|
+
}
|
|
141
|
+
|
|
88
142
|
const base64Chunk = float32ToInt16Base64(float32Data);
|
|
89
|
-
|
|
143
|
+
if (frameCount <= 5 || frameCount % 10 === 0) {
|
|
144
|
+
logger.info('AudioInput', `🎤 Frame #${frameCount}: chunk=${base64Chunk.length} chars, calling onAudioChunk...`);
|
|
145
|
+
}
|
|
90
146
|
this.config.onAudioChunk(base64Chunk);
|
|
91
147
|
} catch (err: any) {
|
|
92
148
|
logger.error('AudioInput', `Frame processing error: ${err.message}`);
|
|
@@ -121,6 +177,7 @@ export class AudioInputService {
|
|
|
121
177
|
}
|
|
122
178
|
this.recorder = null;
|
|
123
179
|
this.status = 'idle';
|
|
180
|
+
this.consecutiveSilentFrames = 0;
|
|
124
181
|
logger.info('AudioInput', 'Streaming stopped');
|
|
125
182
|
} catch (error: any) {
|
|
126
183
|
logger.error('AudioInput', `Failed to stop: ${error.message}`);
|
|
@@ -129,6 +186,24 @@ export class AudioInputService {
|
|
|
129
186
|
}
|
|
130
187
|
}
|
|
131
188
|
|
|
189
|
+
// ─── Auto-Recovery ─────────────────────────────────────────
|
|
190
|
+
|
|
191
|
+
/**
|
|
192
|
+
* Restart the recorder to re-acquire the audio session.
|
|
193
|
+
* Fixes react-native-audio-api bug where AudioRecorder loses mic access
|
|
194
|
+
* after AudioBufferQueueSourceNode plays audio.
|
|
195
|
+
*/
|
|
196
|
+
private async restartRecorder(): Promise<void> {
|
|
197
|
+
logger.info('AudioInput', '🔄 Restarting recorder for mic recovery...');
|
|
198
|
+
await this.stop();
|
|
199
|
+
// Brief pause to let the audio system release resources
|
|
200
|
+
await new Promise(resolve => setTimeout(resolve, 300));
|
|
201
|
+
const ok = await this.start();
|
|
202
|
+
if (!ok) {
|
|
203
|
+
throw new Error('Recorder restart failed');
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
|
|
132
207
|
// ─── Status ───────────────────────────────────────────────
|
|
133
208
|
|
|
134
209
|
get isRecording(): boolean {
|