@mobileai/react-native 0.4.2 → 0.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +23 -3
- package/lib/module/components/AIAgent.js +216 -5
- package/lib/module/components/AIAgent.js.map +1 -1
- package/lib/module/components/AgentChatBar.js +358 -36
- package/lib/module/components/AgentChatBar.js.map +1 -1
- package/lib/module/core/AgentRuntime.js +122 -6
- package/lib/module/core/AgentRuntime.js.map +1 -1
- package/lib/module/core/systemPrompt.js +57 -0
- package/lib/module/core/systemPrompt.js.map +1 -1
- package/lib/module/index.js +8 -0
- package/lib/module/index.js.map +1 -1
- package/lib/module/providers/GeminiProvider.js +108 -85
- package/lib/module/providers/GeminiProvider.js.map +1 -1
- package/lib/module/services/AudioInputService.js +128 -0
- package/lib/module/services/AudioInputService.js.map +1 -0
- package/lib/module/services/AudioOutputService.js +154 -0
- package/lib/module/services/AudioOutputService.js.map +1 -0
- package/lib/module/services/VoiceService.js +361 -0
- package/lib/module/services/VoiceService.js.map +1 -0
- package/lib/module/utils/audioUtils.js +49 -0
- package/lib/module/utils/audioUtils.js.map +1 -0
- package/lib/module/utils/logger.js +21 -4
- package/lib/module/utils/logger.js.map +1 -1
- package/lib/typescript/babel.config.d.ts +10 -0
- package/lib/typescript/babel.config.d.ts.map +1 -0
- package/lib/typescript/eslint.config.d.mts +3 -0
- package/lib/typescript/eslint.config.d.mts.map +1 -0
- package/lib/typescript/fetch-models.d.mts +2 -0
- package/lib/typescript/fetch-models.d.mts.map +1 -0
- package/lib/typescript/list-all-models.d.mts +2 -0
- package/lib/typescript/list-all-models.d.mts.map +1 -0
- package/lib/typescript/list-models.d.mts +2 -0
- package/lib/typescript/list-models.d.mts.map +1 -0
- package/lib/typescript/src/components/AIAgent.d.ts +8 -2
- package/lib/typescript/src/components/AIAgent.d.ts.map +1 -1
- package/lib/typescript/src/components/AgentChatBar.d.ts +19 -2
- package/lib/typescript/src/components/AgentChatBar.d.ts.map +1 -1
- package/lib/typescript/src/core/AgentRuntime.d.ts +17 -1
- package/lib/typescript/src/core/AgentRuntime.d.ts.map +1 -1
- package/lib/typescript/src/core/systemPrompt.d.ts +8 -0
- package/lib/typescript/src/core/systemPrompt.d.ts.map +1 -1
- package/lib/typescript/src/core/types.d.ts +24 -1
- package/lib/typescript/src/core/types.d.ts.map +1 -1
- package/lib/typescript/src/index.d.ts +6 -1
- package/lib/typescript/src/index.d.ts.map +1 -1
- package/lib/typescript/src/providers/GeminiProvider.d.ts +22 -18
- package/lib/typescript/src/providers/GeminiProvider.d.ts.map +1 -1
- package/lib/typescript/src/services/AudioInputService.d.ts +31 -0
- package/lib/typescript/src/services/AudioInputService.d.ts.map +1 -0
- package/lib/typescript/src/services/AudioOutputService.d.ts +34 -0
- package/lib/typescript/src/services/AudioOutputService.d.ts.map +1 -0
- package/lib/typescript/src/services/VoiceService.d.ts +73 -0
- package/lib/typescript/src/services/VoiceService.d.ts.map +1 -0
- package/lib/typescript/src/utils/audioUtils.d.ts +17 -0
- package/lib/typescript/src/utils/audioUtils.d.ts.map +1 -0
- package/lib/typescript/src/utils/logger.d.ts +4 -0
- package/lib/typescript/src/utils/logger.d.ts.map +1 -1
- package/package.json +24 -8
- package/src/components/AIAgent.tsx +222 -3
- package/src/components/AgentChatBar.tsx +487 -42
- package/src/core/AgentRuntime.ts +131 -2
- package/src/core/systemPrompt.ts +62 -0
- package/src/core/types.ts +30 -0
- package/src/index.ts +16 -0
- package/src/providers/GeminiProvider.ts +105 -89
- package/src/services/AudioInputService.ts +141 -0
- package/src/services/AudioOutputService.ts +167 -0
- package/src/services/VoiceService.ts +407 -0
- package/src/utils/audioUtils.ts +54 -0
- package/src/utils/logger.ts +24 -7
|
@@ -1,40 +1,34 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* GeminiProvider — Gemini API integration
|
|
2
|
+
* GeminiProvider — Gemini API integration via @google/genai SDK.
|
|
3
3
|
*
|
|
4
|
-
* Uses
|
|
5
|
-
*
|
|
6
|
-
*
|
|
4
|
+
* Uses the official Google GenAI SDK for:
|
|
5
|
+
* - generateContent with structured function calling (agent_step)
|
|
6
|
+
* - inlineData for vision (base64 screenshots)
|
|
7
|
+
* - System instructions
|
|
8
|
+
*
|
|
9
|
+
* Implements the AIProvider interface so it can be swapped
|
|
10
|
+
* with OpenAIProvider, AnthropicProvider, etc.
|
|
7
11
|
*/
|
|
8
12
|
|
|
13
|
+
import { GoogleGenAI, FunctionCallingConfigMode, Type } from '@google/genai';
|
|
9
14
|
import { logger } from '../utils/logger';
|
|
10
|
-
import type { AIProvider, ToolDefinition, AgentStep, ProviderResult, AgentReasoning } from '../core/types';
|
|
15
|
+
import type { AIProvider, ToolDefinition, AgentStep, ProviderResult, AgentReasoning, TokenUsage } from '../core/types';
|
|
11
16
|
|
|
12
17
|
// ─── Constants ─────────────────────────────────────────────────
|
|
13
18
|
|
|
14
19
|
const AGENT_STEP_FN = 'agent_step';
|
|
15
20
|
|
|
16
|
-
// Reasoning fields
|
|
21
|
+
// Reasoning fields always present in the agent_step schema
|
|
17
22
|
const REASONING_FIELDS = ['previous_goal_eval', 'memory', 'plan'] as const;
|
|
18
23
|
|
|
19
|
-
// ─── Gemini API Types ──────────────────────────────────────────
|
|
20
|
-
|
|
21
|
-
interface GeminiContent {
|
|
22
|
-
role: 'user' | 'model';
|
|
23
|
-
parts: Array<{
|
|
24
|
-
text?: string;
|
|
25
|
-
functionCall?: { name: string; args: any };
|
|
26
|
-
functionResponse?: { name: string; response: any };
|
|
27
|
-
}>;
|
|
28
|
-
}
|
|
29
|
-
|
|
30
24
|
// ─── Provider ──────────────────────────────────────────────────
|
|
31
25
|
|
|
32
26
|
export class GeminiProvider implements AIProvider {
|
|
33
|
-
private
|
|
27
|
+
private ai: GoogleGenAI;
|
|
34
28
|
private model: string;
|
|
35
29
|
|
|
36
30
|
constructor(apiKey: string, model: string = 'gemini-2.5-flash') {
|
|
37
|
-
this.
|
|
31
|
+
this.ai = new GoogleGenAI({ apiKey });
|
|
38
32
|
this.model = model;
|
|
39
33
|
}
|
|
40
34
|
|
|
@@ -43,59 +37,56 @@ export class GeminiProvider implements AIProvider {
|
|
|
43
37
|
userMessage: string,
|
|
44
38
|
tools: ToolDefinition[],
|
|
45
39
|
history: AgentStep[],
|
|
40
|
+
screenshot?: string,
|
|
46
41
|
): Promise<ProviderResult> {
|
|
47
42
|
|
|
48
|
-
logger.info('GeminiProvider', `Sending request. Model: ${this.model}, Tools: ${tools.length}`);
|
|
43
|
+
logger.info('GeminiProvider', `Sending request. Model: ${this.model}, Tools: ${tools.length}${screenshot ? ', with screenshot' : ''}`);
|
|
49
44
|
|
|
50
45
|
// Build single agent_step function declaration
|
|
51
46
|
const agentStepDeclaration = this.buildAgentStepDeclaration(tools);
|
|
52
47
|
|
|
53
|
-
// Build
|
|
54
|
-
const contents = this.buildContents(userMessage, history);
|
|
55
|
-
|
|
56
|
-
// Make API request
|
|
57
|
-
const url = `https://generativelanguage.googleapis.com/v1beta/models/${this.model}:generateContent?key=${this.apiKey}`;
|
|
58
|
-
|
|
59
|
-
const body: any = {
|
|
60
|
-
contents,
|
|
61
|
-
tools: [{ functionDeclarations: [agentStepDeclaration] }],
|
|
62
|
-
systemInstruction: { parts: [{ text: systemPrompt }] },
|
|
63
|
-
// Force the model to always call agent_step
|
|
64
|
-
tool_config: {
|
|
65
|
-
function_calling_config: {
|
|
66
|
-
mode: 'ANY',
|
|
67
|
-
allowed_function_names: [AGENT_STEP_FN],
|
|
68
|
-
},
|
|
69
|
-
},
|
|
70
|
-
generationConfig: {
|
|
71
|
-
temperature: 0.2,
|
|
72
|
-
maxOutputTokens: 2048,
|
|
73
|
-
},
|
|
74
|
-
};
|
|
48
|
+
// Build contents (user message + optional screenshot)
|
|
49
|
+
const contents = this.buildContents(userMessage, history, screenshot);
|
|
75
50
|
|
|
76
51
|
const startTime = Date.now();
|
|
77
52
|
|
|
78
53
|
try {
|
|
79
|
-
const response = await
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
54
|
+
const response = await this.ai.models.generateContent({
|
|
55
|
+
model: this.model,
|
|
56
|
+
contents,
|
|
57
|
+
config: {
|
|
58
|
+
systemInstruction: systemPrompt,
|
|
59
|
+
tools: [{ functionDeclarations: [agentStepDeclaration] }],
|
|
60
|
+
toolConfig: {
|
|
61
|
+
functionCallingConfig: {
|
|
62
|
+
mode: FunctionCallingConfigMode.ANY,
|
|
63
|
+
allowedFunctionNames: [AGENT_STEP_FN],
|
|
64
|
+
},
|
|
65
|
+
},
|
|
66
|
+
temperature: 0.2,
|
|
67
|
+
maxOutputTokens: 2048,
|
|
68
|
+
},
|
|
83
69
|
});
|
|
84
70
|
|
|
85
71
|
const elapsed = Date.now() - startTime;
|
|
86
72
|
logger.info('GeminiProvider', `Response received in ${elapsed}ms`);
|
|
87
73
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
74
|
+
// Extract token usage from SDK response
|
|
75
|
+
const tokenUsage = this.extractTokenUsage(response);
|
|
76
|
+
if (tokenUsage) {
|
|
77
|
+
logger.info('GeminiProvider', `Tokens: ${tokenUsage.promptTokens} in / ${tokenUsage.completionTokens} out / $${tokenUsage.estimatedCostUSD.toFixed(6)}`);
|
|
92
78
|
}
|
|
93
79
|
|
|
94
|
-
const
|
|
95
|
-
|
|
96
|
-
return
|
|
80
|
+
const result = this.parseAgentStepResponse(response, tools);
|
|
81
|
+
result.tokenUsage = tokenUsage;
|
|
82
|
+
return result;
|
|
97
83
|
} catch (error: any) {
|
|
98
84
|
logger.error('GeminiProvider', 'Request failed:', error.message);
|
|
85
|
+
|
|
86
|
+
// Preserve HTTP error format for backward compatibility with tests
|
|
87
|
+
if (error.status) {
|
|
88
|
+
throw new Error(`Gemini API error ${error.status}: ${error.message}`);
|
|
89
|
+
}
|
|
99
90
|
throw error;
|
|
100
91
|
}
|
|
101
92
|
}
|
|
@@ -117,7 +108,6 @@ export class GeminiProvider implements AIProvider {
|
|
|
117
108
|
const actionProperties: Record<string, any> = {};
|
|
118
109
|
for (const tool of tools) {
|
|
119
110
|
for (const [paramName, param] of Object.entries(tool.parameters)) {
|
|
120
|
-
// Skip if already added (shared field names like 'text', 'index')
|
|
121
111
|
if (actionProperties[paramName]) continue;
|
|
122
112
|
actionProperties[paramName] = {
|
|
123
113
|
type: this.mapParamType(param.type),
|
|
@@ -139,28 +129,25 @@ export class GeminiProvider implements AIProvider {
|
|
|
139
129
|
name: AGENT_STEP_FN,
|
|
140
130
|
description: `Execute one agent step. Choose an action and provide reasoning.\n\nAvailable actions:\n${toolDescriptions}`,
|
|
141
131
|
parameters: {
|
|
142
|
-
type:
|
|
132
|
+
type: Type.OBJECT,
|
|
143
133
|
properties: {
|
|
144
|
-
// ── Reasoning fields ──
|
|
145
134
|
previous_goal_eval: {
|
|
146
|
-
type:
|
|
135
|
+
type: Type.STRING,
|
|
147
136
|
description: 'One-sentence assessment of your last action. State success, failure, or uncertain. Skip on first step.',
|
|
148
137
|
},
|
|
149
138
|
memory: {
|
|
150
|
-
type:
|
|
139
|
+
type: Type.STRING,
|
|
151
140
|
description: 'Key facts to remember for future steps: progress made, items found, counters, field values already collected.',
|
|
152
141
|
},
|
|
153
142
|
plan: {
|
|
154
|
-
type:
|
|
143
|
+
type: Type.STRING,
|
|
155
144
|
description: 'Your immediate next goal — what action you will take and why.',
|
|
156
145
|
},
|
|
157
|
-
// ── Action selection ──
|
|
158
146
|
action_name: {
|
|
159
|
-
type:
|
|
147
|
+
type: Type.STRING,
|
|
160
148
|
description: 'Which action to execute.',
|
|
161
149
|
enum: toolNames,
|
|
162
150
|
},
|
|
163
|
-
// ── Action parameters (flat) ──
|
|
164
151
|
...actionProperties,
|
|
165
152
|
},
|
|
166
153
|
required: ['plan', 'action_name'],
|
|
@@ -170,43 +157,46 @@ export class GeminiProvider implements AIProvider {
|
|
|
170
157
|
|
|
171
158
|
private mapParamType(type: string): string {
|
|
172
159
|
switch (type) {
|
|
173
|
-
case 'number': return
|
|
174
|
-
case 'integer': return
|
|
175
|
-
case 'boolean': return
|
|
160
|
+
case 'number': return Type.NUMBER;
|
|
161
|
+
case 'integer': return Type.INTEGER;
|
|
162
|
+
case 'boolean': return Type.BOOLEAN;
|
|
176
163
|
case 'string':
|
|
177
|
-
default: return
|
|
164
|
+
default: return Type.STRING;
|
|
178
165
|
}
|
|
179
166
|
}
|
|
180
167
|
|
|
181
168
|
// ─── Build Contents ────────────────────────────────────────
|
|
182
169
|
|
|
183
170
|
/**
|
|
184
|
-
* Builds
|
|
185
|
-
*
|
|
186
|
-
* Each step is a STATELESS single-turn request (matching page-agent's approach):
|
|
187
|
-
* - System prompt has general instructions
|
|
188
|
-
* - User message contains full context: task, history, screen state
|
|
189
|
-
* - Model responds with agent_step function call
|
|
190
|
-
*
|
|
191
|
-
* History is embedded as text in assembleUserPrompt (via <agent_history>),
|
|
192
|
-
* NOT as functionCall/functionResponse pairs. This avoids Gemini's
|
|
193
|
-
* conversation format requirements and thought_signature complexity.
|
|
171
|
+
* Builds contents for the generateContent call.
|
|
172
|
+
* Single-turn: user message + optional screenshot as inlineData.
|
|
194
173
|
*/
|
|
195
|
-
private buildContents(userMessage: string, _history: AgentStep[]):
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
174
|
+
private buildContents(userMessage: string, _history: AgentStep[], screenshot?: string): any[] {
|
|
175
|
+
const parts: any[] = [{ text: userMessage }];
|
|
176
|
+
|
|
177
|
+
// Append screenshot as inlineData for Gemini vision
|
|
178
|
+
if (screenshot) {
|
|
179
|
+
parts.push({
|
|
180
|
+
inlineData: {
|
|
181
|
+
mimeType: 'image/jpeg',
|
|
182
|
+
data: screenshot,
|
|
183
|
+
},
|
|
184
|
+
});
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
return [{ role: 'user', parts }];
|
|
200
188
|
}
|
|
201
189
|
|
|
202
190
|
// ─── Parse Response ────────────────────────────────────────
|
|
203
191
|
|
|
204
192
|
/**
|
|
205
|
-
* Parses the
|
|
206
|
-
* Extracts structured reasoning + action
|
|
193
|
+
* Parses the SDK response expecting a single agent_step function call.
|
|
194
|
+
* Extracts structured reasoning + action.
|
|
207
195
|
*/
|
|
208
|
-
private parseAgentStepResponse(
|
|
209
|
-
|
|
196
|
+
private parseAgentStepResponse(response: any, tools: ToolDefinition[]): ProviderResult {
|
|
197
|
+
const candidates = response.candidates || [];
|
|
198
|
+
|
|
199
|
+
if (candidates.length === 0) {
|
|
210
200
|
logger.warn('GeminiProvider', 'No candidates in response');
|
|
211
201
|
return {
|
|
212
202
|
toolCalls: [{ name: 'done', args: { text: 'No response generated.', success: false } }],
|
|
@@ -215,7 +205,7 @@ export class GeminiProvider implements AIProvider {
|
|
|
215
205
|
};
|
|
216
206
|
}
|
|
217
207
|
|
|
218
|
-
const candidate =
|
|
208
|
+
const candidate = candidates[0];
|
|
219
209
|
const parts = candidate.content?.parts || [];
|
|
220
210
|
|
|
221
211
|
// Find the function call part
|
|
@@ -251,11 +241,10 @@ export class GeminiProvider implements AIProvider {
|
|
|
251
241
|
};
|
|
252
242
|
}
|
|
253
243
|
|
|
254
|
-
// Build action args:
|
|
244
|
+
// Build action args: extract only the params that belong to the matched tool
|
|
255
245
|
const actionArgs: Record<string, any> = {};
|
|
256
246
|
const reservedKeys = new Set([...REASONING_FIELDS, 'action_name']);
|
|
257
247
|
|
|
258
|
-
// Find the matching tool to know which params belong to it
|
|
259
248
|
const matchedTool = tools.find(t => t.name === actionName);
|
|
260
249
|
if (matchedTool) {
|
|
261
250
|
for (const paramName of Object.keys(matchedTool.parameters)) {
|
|
@@ -264,7 +253,6 @@ export class GeminiProvider implements AIProvider {
|
|
|
264
253
|
}
|
|
265
254
|
}
|
|
266
255
|
} else {
|
|
267
|
-
// Custom/registered tool — grab all non-reserved fields
|
|
268
256
|
for (const [key, value] of Object.entries(args)) {
|
|
269
257
|
if (!reservedKeys.has(key)) {
|
|
270
258
|
actionArgs[key] = value;
|
|
@@ -280,4 +268,32 @@ export class GeminiProvider implements AIProvider {
|
|
|
280
268
|
text: textPart?.text,
|
|
281
269
|
};
|
|
282
270
|
}
|
|
271
|
+
|
|
272
|
+
// ─── Token Usage Extraction ─────────────────────────────────
|
|
273
|
+
|
|
274
|
+
/**
|
|
275
|
+
* Extracts token usage from SDK response and calculates estimated cost.
|
|
276
|
+
*
|
|
277
|
+
* Pricing (Gemini 2.5 Flash):
|
|
278
|
+
* - Input: $0.30 / 1M tokens
|
|
279
|
+
* - Output: $2.50 / 1M tokens
|
|
280
|
+
*/
|
|
281
|
+
private extractTokenUsage(response: any): TokenUsage | undefined {
|
|
282
|
+
const meta = response?.usageMetadata;
|
|
283
|
+
if (!meta) return undefined;
|
|
284
|
+
|
|
285
|
+
const promptTokens = meta.promptTokenCount ?? 0;
|
|
286
|
+
const completionTokens = meta.candidatesTokenCount ?? 0;
|
|
287
|
+
const totalTokens = meta.totalTokenCount ?? (promptTokens + completionTokens);
|
|
288
|
+
|
|
289
|
+
// Cost estimation based on Gemini 2.5 Flash pricing
|
|
290
|
+
const INPUT_COST_PER_M = 0.30;
|
|
291
|
+
const OUTPUT_COST_PER_M = 2.50;
|
|
292
|
+
|
|
293
|
+
const estimatedCostUSD =
|
|
294
|
+
(promptTokens / 1_000_000) * INPUT_COST_PER_M +
|
|
295
|
+
(completionTokens / 1_000_000) * OUTPUT_COST_PER_M;
|
|
296
|
+
|
|
297
|
+
return { promptTokens, completionTokens, totalTokens, estimatedCostUSD };
|
|
298
|
+
}
|
|
283
299
|
}
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AudioInputService — Real-time microphone capture for voice mode.
|
|
3
|
+
*
|
|
4
|
+
* Uses react-native-audio-api (Software Mansion) AudioRecorder for native
|
|
5
|
+
* PCM streaming from the microphone. Each chunk is converted from Float32
|
|
6
|
+
* to Int16 PCM and base64-encoded for the Gemini Live API.
|
|
7
|
+
*
|
|
8
|
+
* Requires: react-native-audio-api (development build only, not Expo Go)
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { logger } from '../utils/logger';
|
|
12
|
+
import { float32ToInt16Base64 } from '../utils/audioUtils';
|
|
13
|
+
|
|
14
|
+
// ─── Types ─────────────────────────────────────────────────────
|
|
15
|
+
|
|
16
|
+
export interface AudioInputConfig {
|
|
17
|
+
sampleRate?: number;
|
|
18
|
+
/** Number of samples per callback buffer (default: 4096) */
|
|
19
|
+
bufferLength?: number;
|
|
20
|
+
/** Callback with base64 PCM audio chunk */
|
|
21
|
+
onAudioChunk: (base64Audio: string) => void;
|
|
22
|
+
onError?: (error: string) => void;
|
|
23
|
+
onPermissionDenied?: () => void;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
type RecordingStatus = 'idle' | 'recording' | 'paused';
|
|
27
|
+
|
|
28
|
+
// ─── Service ───────────────────────────────────────────────────
|
|
29
|
+
|
|
30
|
+
export class AudioInputService {
|
|
31
|
+
private config: AudioInputConfig;
|
|
32
|
+
private status: RecordingStatus = 'idle';
|
|
33
|
+
private recorder: any = null;
|
|
34
|
+
|
|
35
|
+
constructor(config: AudioInputConfig) {
|
|
36
|
+
this.config = config;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// ─── Lifecycle ─────────────────────────────────────────────
|
|
40
|
+
|
|
41
|
+
async start(): Promise<boolean> {
|
|
42
|
+
try {
|
|
43
|
+
// Lazy-load react-native-audio-api (optional peer dependency)
|
|
44
|
+
let audioApi: any;
|
|
45
|
+
try {
|
|
46
|
+
audioApi = require('react-native-audio-api');
|
|
47
|
+
} catch {
|
|
48
|
+
const msg =
|
|
49
|
+
'Voice mode requires react-native-audio-api. Install with: npm install react-native-audio-api';
|
|
50
|
+
logger.error('AudioInput', msg);
|
|
51
|
+
this.config.onError?.(msg);
|
|
52
|
+
return false;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// Request mic permission (Android)
|
|
56
|
+
try {
|
|
57
|
+
const { PermissionsAndroid, Platform } = require('react-native');
|
|
58
|
+
if (Platform.OS === 'android') {
|
|
59
|
+
const result = await PermissionsAndroid.request(
|
|
60
|
+
PermissionsAndroid.PERMISSIONS.RECORD_AUDIO
|
|
61
|
+
);
|
|
62
|
+
if (result !== PermissionsAndroid.RESULTS.GRANTED) {
|
|
63
|
+
logger.warn('AudioInput', 'Microphone permission denied');
|
|
64
|
+
this.config.onPermissionDenied?.();
|
|
65
|
+
return false;
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
} catch {
|
|
69
|
+
// Permission check failed — continue and let native layer handle it
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// Create AudioRecorder
|
|
73
|
+
this.recorder = new audioApi.AudioRecorder();
|
|
74
|
+
|
|
75
|
+
const sampleRate = this.config.sampleRate || 16000;
|
|
76
|
+
const bufferLength = this.config.bufferLength || 4096;
|
|
77
|
+
|
|
78
|
+
// Register audio data callback
|
|
79
|
+
let frameCount = 0;
|
|
80
|
+
this.recorder.onAudioReady(
|
|
81
|
+
{ sampleRate, bufferLength, channelCount: 1 },
|
|
82
|
+
(event: any) => {
|
|
83
|
+
frameCount++;
|
|
84
|
+
try {
|
|
85
|
+
// event.buffer is an AudioBuffer — get Float32 channel data
|
|
86
|
+
const float32Data = event.buffer.getChannelData(0);
|
|
87
|
+
// Convert Float32 → Int16 → base64 for Gemini
|
|
88
|
+
const base64Chunk = float32ToInt16Base64(float32Data);
|
|
89
|
+
logger.debug('AudioInput', `🎤 Frame #${frameCount}: size=${base64Chunk.length}`);
|
|
90
|
+
this.config.onAudioChunk(base64Chunk);
|
|
91
|
+
} catch (err: any) {
|
|
92
|
+
logger.error('AudioInput', `Frame processing error: ${err.message}`);
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
);
|
|
96
|
+
|
|
97
|
+
// Register error callback
|
|
98
|
+
this.recorder.onError((error: any) => {
|
|
99
|
+
logger.error('AudioInput', `Recorder error: ${error.message || error}`);
|
|
100
|
+
this.config.onError?.(error.message || String(error));
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
// Start recording
|
|
104
|
+
this.recorder.start();
|
|
105
|
+
this.status = 'recording';
|
|
106
|
+
logger.info('AudioInput', `Streaming started (${sampleRate}Hz, bufLen=${bufferLength})`);
|
|
107
|
+
return true;
|
|
108
|
+
} catch (error: any) {
|
|
109
|
+
logger.error('AudioInput', `Failed to start: ${error.message}`);
|
|
110
|
+
this.config.onError?.(error.message);
|
|
111
|
+
return false;
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
async stop(): Promise<void> {
|
|
116
|
+
try {
|
|
117
|
+
if (this.recorder && this.status !== 'idle') {
|
|
118
|
+
this.recorder.clearOnAudioReady();
|
|
119
|
+
this.recorder.clearOnError();
|
|
120
|
+
this.recorder.stop();
|
|
121
|
+
}
|
|
122
|
+
this.recorder = null;
|
|
123
|
+
this.status = 'idle';
|
|
124
|
+
logger.info('AudioInput', 'Streaming stopped');
|
|
125
|
+
} catch (error: any) {
|
|
126
|
+
logger.error('AudioInput', `Failed to stop: ${error.message}`);
|
|
127
|
+
this.recorder = null;
|
|
128
|
+
this.status = 'idle';
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
// ─── Status ───────────────────────────────────────────────
|
|
133
|
+
|
|
134
|
+
get isRecording(): boolean {
|
|
135
|
+
return this.status === 'recording';
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
get currentStatus(): RecordingStatus {
|
|
139
|
+
return this.status;
|
|
140
|
+
}
|
|
141
|
+
}
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AudioOutputService — AI speech playback for voice mode.
|
|
3
|
+
*
|
|
4
|
+
* Uses react-native-audio-api (Software Mansion) for gapless, low-latency
|
|
5
|
+
* PCM playback. Decodes base64 PCM from Gemini Live API and queues it via
|
|
6
|
+
* AudioBufferQueueSourceNode for seamless streaming.
|
|
7
|
+
*
|
|
8
|
+
* Requires: react-native-audio-api (development build only, not Expo Go)
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { logger } from '../utils/logger';
|
|
12
|
+
import { base64ToFloat32 } from '../utils/audioUtils';
|
|
13
|
+
|
|
14
|
+
// ─── Types ─────────────────────────────────────────────────────
|
|
15
|
+
|
|
16
|
+
/** Gemini Live API outputs 24kHz 16-bit mono PCM */
|
|
17
|
+
const GEMINI_OUTPUT_SAMPLE_RATE = 24000;
|
|
18
|
+
|
|
19
|
+
export interface AudioOutputConfig {
|
|
20
|
+
sampleRate?: number;
|
|
21
|
+
onPlaybackStart?: () => void;
|
|
22
|
+
onPlaybackEnd?: () => void;
|
|
23
|
+
onError?: (error: string) => void;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
// ─── Service ───────────────────────────────────────────────────
|
|
27
|
+
|
|
28
|
+
export class AudioOutputService {
|
|
29
|
+
private config: AudioOutputConfig;
|
|
30
|
+
private audioContext: any = null;
|
|
31
|
+
private queueSourceNode: any = null;
|
|
32
|
+
private gainNode: any = null;
|
|
33
|
+
private muted = false;
|
|
34
|
+
private isStarted = false;
|
|
35
|
+
private chunkCount = 0;
|
|
36
|
+
|
|
37
|
+
constructor(config: AudioOutputConfig = {}) {
|
|
38
|
+
this.config = config;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// ─── Lifecycle ─────────────────────────────────────────────
|
|
42
|
+
|
|
43
|
+
async initialize(): Promise<boolean> {
|
|
44
|
+
try {
|
|
45
|
+
let audioApi: any;
|
|
46
|
+
try {
|
|
47
|
+
audioApi = require('react-native-audio-api');
|
|
48
|
+
} catch {
|
|
49
|
+
const msg =
|
|
50
|
+
'react-native-audio-api is required for audio output. Install with: npm install react-native-audio-api';
|
|
51
|
+
logger.error('AudioOutput', msg);
|
|
52
|
+
this.config.onError?.(msg);
|
|
53
|
+
return false;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
const sampleRate = this.config.sampleRate || GEMINI_OUTPUT_SAMPLE_RATE;
|
|
57
|
+
|
|
58
|
+
// Create AudioContext at Gemini's output sample rate
|
|
59
|
+
this.audioContext = new audioApi.AudioContext({ sampleRate });
|
|
60
|
+
|
|
61
|
+
// Create GainNode for mute control
|
|
62
|
+
this.gainNode = this.audioContext.createGain();
|
|
63
|
+
this.gainNode.gain.value = 1.0;
|
|
64
|
+
this.gainNode.connect(this.audioContext.destination);
|
|
65
|
+
|
|
66
|
+
// Create AudioBufferQueueSourceNode for gapless streaming
|
|
67
|
+
this.queueSourceNode = this.audioContext.createBufferQueueSource();
|
|
68
|
+
this.queueSourceNode.connect(this.gainNode);
|
|
69
|
+
|
|
70
|
+
logger.info('AudioOutput', `Initialized (${sampleRate}Hz, AudioBufferQueueSourceNode)`);
|
|
71
|
+
return true;
|
|
72
|
+
} catch (error: any) {
|
|
73
|
+
logger.error('AudioOutput', `Failed to initialize: ${error.message}`);
|
|
74
|
+
this.config.onError?.(error.message);
|
|
75
|
+
return false;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// ─── Enqueue Audio ─────────────────────────────────────────
|
|
80
|
+
|
|
81
|
+
/** Add a base64-encoded PCM chunk from Gemini to the playback queue */
|
|
82
|
+
enqueue(base64Audio: string): void {
|
|
83
|
+
if (this.muted || !this.audioContext || !this.queueSourceNode) return;
|
|
84
|
+
|
|
85
|
+
try {
|
|
86
|
+
this.chunkCount++;
|
|
87
|
+
|
|
88
|
+
// Decode base64 Int16 PCM → Float32
|
|
89
|
+
const float32Data = base64ToFloat32(base64Audio);
|
|
90
|
+
const sampleRate = this.config.sampleRate || GEMINI_OUTPUT_SAMPLE_RATE;
|
|
91
|
+
|
|
92
|
+
// Create an AudioBuffer and fill it with PCM data
|
|
93
|
+
const audioBuffer = this.audioContext.createBuffer(1, float32Data.length, sampleRate);
|
|
94
|
+
audioBuffer.copyToChannel(float32Data, 0);
|
|
95
|
+
|
|
96
|
+
// Enqueue the buffer for gapless playback
|
|
97
|
+
this.queueSourceNode.enqueueBuffer(audioBuffer);
|
|
98
|
+
|
|
99
|
+
// Start playback on first enqueue
|
|
100
|
+
if (!this.isStarted) {
|
|
101
|
+
this.queueSourceNode.start();
|
|
102
|
+
this.isStarted = true;
|
|
103
|
+
this.config.onPlaybackStart?.();
|
|
104
|
+
logger.info('AudioOutput', '▶️ Playback started');
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
if (this.chunkCount % 20 === 0) {
|
|
108
|
+
logger.debug('AudioOutput', `Queued chunk #${this.chunkCount}`);
|
|
109
|
+
}
|
|
110
|
+
} catch (error: any) {
|
|
111
|
+
logger.error('AudioOutput', `Enqueue error: ${error.message}`);
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// ─── Mute/Unmute ──────────────────────────────────────────
|
|
116
|
+
|
|
117
|
+
mute(): void {
|
|
118
|
+
this.muted = true;
|
|
119
|
+
if (this.gainNode) {
|
|
120
|
+
this.gainNode.gain.value = 0;
|
|
121
|
+
}
|
|
122
|
+
logger.info('AudioOutput', 'Speaker muted');
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
unmute(): void {
|
|
126
|
+
this.muted = false;
|
|
127
|
+
if (this.gainNode) {
|
|
128
|
+
this.gainNode.gain.value = 1.0;
|
|
129
|
+
}
|
|
130
|
+
logger.info('AudioOutput', 'Speaker unmuted');
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
get isMuted(): boolean {
|
|
134
|
+
return this.muted;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
// ─── Stop & Cleanup ───────────────────────────────────────
|
|
138
|
+
|
|
139
|
+
async stop(): Promise<void> {
|
|
140
|
+
try {
|
|
141
|
+
if (this.queueSourceNode && this.isStarted) {
|
|
142
|
+
this.queueSourceNode.stop();
|
|
143
|
+
this.queueSourceNode.clearBuffers();
|
|
144
|
+
}
|
|
145
|
+
this.isStarted = false;
|
|
146
|
+
this.chunkCount = 0;
|
|
147
|
+
this.config.onPlaybackEnd?.();
|
|
148
|
+
logger.info('AudioOutput', 'Playback stopped');
|
|
149
|
+
} catch (error: any) {
|
|
150
|
+
logger.error('AudioOutput', `Stop error: ${error.message}`);
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
async cleanup(): Promise<void> {
|
|
155
|
+
await this.stop();
|
|
156
|
+
try {
|
|
157
|
+
if (this.audioContext) {
|
|
158
|
+
await this.audioContext.close();
|
|
159
|
+
}
|
|
160
|
+
} catch {
|
|
161
|
+
// Non-critical
|
|
162
|
+
}
|
|
163
|
+
this.audioContext = null;
|
|
164
|
+
this.queueSourceNode = null;
|
|
165
|
+
this.gainNode = null;
|
|
166
|
+
}
|
|
167
|
+
}
|