voicelayer-rn 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,566 @@
1
+ /**
2
+ * VoiceLayerButton — zero-config floating voice assistant for React Native.
3
+ *
4
+ * ┌─ Minimal integration (3 lines total) ────────────────────────────────────┐
5
+ * │ │
6
+ * │ // 1. In your navigation file (you probably have this already): │
7
+ * │ export const navigationRef = createNavigationContainerRef() │
8
+ * │ │
9
+ * │ // 2. Wire the ref: │
10
+ * │ <NavigationContainer ref={navigationRef}> │
11
+ * │ │
12
+ * │ // 3. Drop the button anywhere inside NavigationContainer: │
13
+ * │ <VoiceLayerButton navigationRef={navigationRef} apiKey="vl-xxx" /> │
14
+ * │ │
15
+ * └───────────────────────────────────────────────────────────────────────────┘
16
+ *
17
+ * Props:
18
+ * navigationRef NavigationContainerRef REQUIRED — your app's ref
19
+ * apiKey string REQUIRED — get one at voicelayer.dev
20
+ * screenNames? string[] push-only or parameterised screens
21
+ * e.g. ['CheckoutScreen','OrderScreen:orderId']
22
+ * appId? string for analytics grouping (default: 'app')
23
+ * language? string 'hi' | 'en' | 'auto' (default: 'hi')
24
+ * serverUrl? string override for self-hosting
25
+ *
26
+ * Intelligence layers (all automatic):
27
+ * 1. Screen auto-discovery — reads navigation state
28
+ * 2. Current screen context — tells Claude where user is
29
+ * 3. Param extraction — 'OrderScreen:orderId' → navigate with params
30
+ * 4. Context-aware chips — always relevant to current screen
31
+ * 5. Navigation history — powers "wapas jaao"
32
+ * 6. useVoiceLayerScreen — optional per-screen hints
33
+ * 7. Multi-turn conversation — pronoun resolution across commands
34
+ */
35
+
36
+ import React, {
37
+ useState, useRef, useCallback, useEffect,
38
+ } from 'react';
39
+ import {
40
+ View, Text, StyleSheet, TouchableOpacity,
41
+ Modal, TextInput, ActivityIndicator,
42
+ Animated, KeyboardAvoidingView, Platform,
43
+ Pressable, ScrollView,
44
+ } from 'react-native';
45
+ import { useNavigationState } from '@react-navigation/native';
46
+
47
+ import { useVoiceIntent, setVoiceLayerServer } from './useVoiceIntent';
48
+ import { screenRegistry } from './screenRegistry';
49
+ import { navigationHistory } from './navigationHistory';
50
+ import { conversationStore } from './conversationStore';
51
+
52
+ const ACCENT = '#4CAF50';
53
+ const DARK_BG = '#161616';
54
+
55
+ // ── Route utilities ───────────────────────────────────────────────────────────
56
+
57
+ function extractRoutes(state, containerName = null) {
58
+ if (!state?.routes) return [];
59
+ const result = [];
60
+ for (const route of state.routes) {
61
+ if (route.state?.routes?.length > 0) {
62
+ result.push(...extractRoutes(route.state, route.name));
63
+ } else {
64
+ result.push({ name: route.name, container: containerName, paramKeys: [] });
65
+ }
66
+ }
67
+ return result;
68
+ }
69
+
70
+ function parseScreenHint(hint) {
71
+ if (typeof hint !== 'string') return { name: String(hint), paramKeys: [] };
72
+ const [name, paramStr = ''] = hint.split(':');
73
+ return {
74
+ name: name.trim(),
75
+ paramKeys: paramStr ? paramStr.split(',').map(p => p.trim()).filter(Boolean) : [],
76
+ };
77
+ }
78
+
79
+ function buildRoutes(navRef, screenNames = []) {
80
+ if (!navRef?.isReady()) return [];
81
+
82
+ const state = navRef.getRootState();
83
+ const auto = extractRoutes(state);
84
+ const autoNames = new Set(auto.map(r => r.name));
85
+
86
+ const hintMap = new Map(
87
+ screenNames.map(parseScreenHint).map(({ name, paramKeys }) => [name, paramKeys])
88
+ );
89
+
90
+ const merged = auto.map(r => ({
91
+ ...r,
92
+ paramKeys: hintMap.get(r.name) ?? [],
93
+ }));
94
+
95
+ const extras = screenNames
96
+ .map(parseScreenHint)
97
+ .filter(({ name }) => !autoNames.has(name))
98
+ .map(({ name, paramKeys }) => ({ name, container: null, paramKeys }));
99
+
100
+ return [...merged, ...extras];
101
+ }
102
+
103
+ function routeToLabel(name) {
104
+ return name
105
+ .replace(/Screen$|Navigator$|Tab$/i, '')
106
+ .replace(/([A-Z])/g, ' $1')
107
+ .trim()
108
+ .replace(/\b\w/g, c => c.toUpperCase()) || name;
109
+ }
110
+
111
+ function buildContextualChips(currentScreenName, allRoutes, screenMeta) {
112
+ const chips = [], used = new Set();
113
+
114
+ const add = (label) => {
115
+ const key = label.toLowerCase();
116
+ if (!used.has(key)) { chips.push(label); used.add(key); }
117
+ };
118
+
119
+ // 1. Per-screen hints from useVoiceLayerScreen (most specific)
120
+ if (screenMeta?.hints?.length > 0) {
121
+ screenMeta.hints.slice(0, 3).forEach(h =>
122
+ add(h.charAt(0).toUpperCase() + h.slice(1))
123
+ );
124
+ }
125
+
126
+ // 2. Sibling screens (same tab/container)
127
+ const currentContainer = allRoutes.find(r => r.name === currentScreenName)?.container ?? null;
128
+ allRoutes
129
+ .filter(r => r.name !== currentScreenName && r.container === currentContainer)
130
+ .slice(0, 4)
131
+ .forEach(r => add(routeToLabel(r.name)));
132
+
133
+ // 3. Fill to 6 with any remaining screens
134
+ allRoutes
135
+ .filter(r => r.name !== currentScreenName && !used.has(routeToLabel(r.name).toLowerCase()))
136
+ .slice(0, 6 - chips.length)
137
+ .forEach(r => add(routeToLabel(r.name)));
138
+
139
+ return chips.slice(0, 6);
140
+ }
141
+
142
+ // ── Waveform animation ────────────────────────────────────────────────────────
143
+
144
+ function Waveform({ active }) {
145
+ const bars = [
146
+ useRef(new Animated.Value(0.4)).current,
147
+ useRef(new Animated.Value(0.4)).current,
148
+ useRef(new Animated.Value(0.4)).current,
149
+ useRef(new Animated.Value(0.4)).current,
150
+ useRef(new Animated.Value(0.4)).current,
151
+ ];
152
+
153
+ useEffect(() => {
154
+ if (!active) {
155
+ bars.forEach(b =>
156
+ Animated.timing(b, { toValue: 0.4, duration: 200, useNativeDriver: true }).start()
157
+ );
158
+ return;
159
+ }
160
+ const anims = bars.map((b, i) =>
161
+ Animated.loop(
162
+ Animated.sequence([
163
+ Animated.delay(i * 80),
164
+ Animated.timing(b, { toValue: 1, duration: 300, useNativeDriver: true }),
165
+ Animated.timing(b, { toValue: 0.4, duration: 300, useNativeDriver: true }),
166
+ ])
167
+ )
168
+ );
169
+ anims.forEach(a => a.start());
170
+ return () => anims.forEach(a => a.stop());
171
+ }, [active]);
172
+
173
+ return (
174
+ <View style={waveStyles.row}>
175
+ {bars.map((b, i) => (
176
+ <Animated.View
177
+ key={i}
178
+ style={[
179
+ waveStyles.bar,
180
+ { transform: [{ scaleY: b }], backgroundColor: active ? ACCENT : '#555' },
181
+ ]}
182
+ />
183
+ ))}
184
+ </View>
185
+ );
186
+ }
187
+
188
+ const waveStyles = StyleSheet.create({
189
+ row: { flexDirection: 'row', alignItems: 'center', gap: 4, height: 32 },
190
+ bar: { width: 4, height: 28, borderRadius: 2 },
191
+ });
192
+
193
+ // ── Main component ────────────────────────────────────────────────────────────
194
+
195
+ export default function VoiceLayerButton({
196
+ navigationRef,
197
+ apiKey,
198
+ screenNames = [],
199
+ appId = 'app',
200
+ language = 'hi',
201
+ serverUrl, // optional: overrides default https://api.voicelayer.dev
202
+ }) {
203
+ // Apply server URL override once, synchronously
204
+ if (serverUrl) setVoiceLayerServer(serverUrl);
205
+
206
+ if (!navigationRef) {
207
+ console.error('[VoiceLayer] navigationRef prop is required.');
208
+ }
209
+ if (!apiKey) {
210
+ console.warn('[VoiceLayer] apiKey prop missing. Get one at voicelayer.dev.');
211
+ }
212
+
213
+ const [sheetVisible, setSheetVisible] = useState(false);
214
+ const [inputText, setInputText] = useState('');
215
+ const [listening, setListening] = useState(false);
216
+ const [toastState, setToastState] = useState(null);
217
+ const [chips, setChips] = useState([]);
218
+
219
+ const inputRef = useRef(null);
220
+ const fabPulse = useRef(new Animated.Value(1)).current;
221
+ const loopAnim = useRef(null);
222
+
223
+ const { resolve, loading } = useVoiceIntent({ apiKey, appId, language });
224
+
225
+ // [5] Init navigation history on mount (with retry until nav is ready)
226
+ useEffect(() => {
227
+ if (!navigationRef) return;
228
+ const tryInit = () => {
229
+ if (navigationRef.isReady()) {
230
+ navigationHistory.init(navigationRef);
231
+ } else {
232
+ const t = setTimeout(tryInit, 200);
233
+ return () => clearTimeout(t);
234
+ }
235
+ };
236
+ tryInit();
237
+ }, [navigationRef]);
238
+
239
+ // Build full context snapshot at command-submit time (freshest possible)
240
+ const buildContext = useCallback(() => {
241
+ const allRoutes = buildRoutes(navigationRef, screenNames);
242
+ const currentRoute = navigationRef?.isReady() ? navigationRef.getCurrentRoute() : null;
243
+ const currentName = currentRoute?.name ?? null;
244
+ const screenMeta = currentName ? screenRegistry.get(currentName) : null;
245
+ const recentScreens = navigationHistory.getRecent(6);
246
+ const contextChips = buildContextualChips(currentName, allRoutes, screenMeta);
247
+ return { allRoutes, currentRoute, currentName, screenMeta, recentScreens, contextChips };
248
+ }, [navigationRef, screenNames]);
249
+
250
+ const openSheet = useCallback(() => {
251
+ const ctx = buildContext();
252
+ setChips(ctx.contextChips);
253
+ setInputText('');
254
+ setListening(false);
255
+ setSheetVisible(true);
256
+ }, [buildContext]);
257
+
258
+ // FAB pulse while waiting for response
259
+ const startPulse = useCallback(() => {
260
+ loopAnim.current = Animated.loop(
261
+ Animated.sequence([
262
+ Animated.timing(fabPulse, { toValue: 1.18, duration: 500, useNativeDriver: true }),
263
+ Animated.timing(fabPulse, { toValue: 1, duration: 500, useNativeDriver: true }),
264
+ ])
265
+ );
266
+ loopAnim.current.start();
267
+ }, [fabPulse]);
268
+
269
+ const stopPulse = useCallback(() => {
270
+ loopAnim.current?.stop();
271
+ fabPulse.setValue(1);
272
+ }, [fabPulse]);
273
+
274
+ const showToast = useCallback((kind, text, ms = 3500) => {
275
+ setToastState({ kind, text });
276
+ setTimeout(() => setToastState(null), ms);
277
+ }, []);
278
+
279
+ // Navigate — with canGoBack guard + case-insensitive name matching
280
+ const executeRoute = useCallback(({ name, container }, params = {}) => {
281
+ if (!navigationRef?.isReady()) return false;
282
+
283
+ if (name === '__back__') {
284
+ if (navigationRef.canGoBack()) {
285
+ navigationRef.goBack();
286
+ } else {
287
+ showToast('info', 'Pehle koi screen nahi hai');
288
+ }
289
+ return true;
290
+ }
291
+
292
+ // Case-insensitive fallback if Claude returns slight name variation
293
+ const state = navigationRef.getRootState();
294
+ const allKnown = extractRoutes(state).map(r => r.name);
295
+ const exact = allKnown.find(n => n === name);
296
+ const fuzzy = !exact && allKnown.find(n => n.toLowerCase() === name.toLowerCase());
297
+ const resolved = exact ?? fuzzy ?? name;
298
+
299
+ if (container) {
300
+ navigationRef.navigate(container, { screen: resolved, params });
301
+ } else {
302
+ navigationRef.navigate(resolved, Object.keys(params).length > 0 ? params : undefined);
303
+ }
304
+ return true;
305
+ }, [navigationRef, showToast]);
306
+
307
+ // Core submit — build context, call server, navigate
308
+ const submit = useCallback(async (text) => {
309
+ const cmd = text.trim();
310
+ if (!cmd) return;
311
+
312
+ const ctx = buildContext();
313
+ setSheetVisible(false);
314
+ setInputText('');
315
+ setListening(false);
316
+ startPulse();
317
+ showToast('loading', 'Samajh raha hoon…');
318
+
319
+ // [7] Add to conversation history before resolving
320
+ conversationStore.addUser(cmd);
321
+
322
+ try {
323
+ const result = await resolve(cmd, ctx);
324
+ stopPulse();
325
+
326
+ const hasRoute = !!result.route?.name;
327
+ showToast(hasRoute ? 'success' : 'info', result.speak || 'Samajh nahi aaya');
328
+
329
+ if (result.speak) conversationStore.addAssistant(result.speak);
330
+ if (hasRoute) executeRoute(result.route, result.params ?? {});
331
+ } catch {
332
+ stopPulse();
333
+ conversationStore.clear(); // stale context could mislead next command
334
+ showToast('error', 'Server se connect nahi ho paya. API key sahi hai?', 5000);
335
+ }
336
+ }, [resolve, buildContext, executeRoute, startPulse, stopPulse, showToast]);
337
+
338
+ const handleMicPress = useCallback(() => {
339
+ if (listening) {
340
+ setListening(false);
341
+ if (inputText.trim()) submit(inputText);
342
+ } else {
343
+ setListening(true);
344
+ // Wire @react-native-voice/voice here for real STT on physical devices.
345
+ // Falls back to text input focus on emulator — works for demos.
346
+ setTimeout(() => inputRef.current?.focus(), 100);
347
+ }
348
+ }, [listening, inputText, submit]);
349
+
350
+ // ── Render ──────────────────────────────────────────────────────────────────
351
+ return (
352
+ <>
353
+ {/* Floating mic button */}
354
+ <Animated.View style={[styles.fab, { transform: [{ scale: fabPulse }] }]}>
355
+ <TouchableOpacity
356
+ style={[styles.fabInner, loading && styles.fabInnerActive]}
357
+ onPress={openSheet}
358
+ activeOpacity={0.85}
359
+ >
360
+ {loading
361
+ ? <ActivityIndicator color="#fff" size="small" />
362
+ : <Text style={styles.fabIcon}>🎙</Text>}
363
+ </TouchableOpacity>
364
+ </Animated.View>
365
+
366
+ {/* Response toast */}
367
+ {toastState && (
368
+ <View style={[
369
+ styles.toast,
370
+ toastState.kind === 'error' && styles.toastError,
371
+ toastState.kind === 'success' && styles.toastSuccess,
372
+ ]}>
373
+ {toastState.kind === 'loading'
374
+ ? <ActivityIndicator size="small" color="#fff" style={{ marginRight: 8 }} />
375
+ : <Text style={styles.toastIcon}>
376
+ {toastState.kind === 'success' ? '✓' : toastState.kind === 'error' ? '✕' : 'ℹ'}
377
+ </Text>
378
+ }
379
+ <Text style={styles.toastText} numberOfLines={3}>{toastState.text}</Text>
380
+ </View>
381
+ )}
382
+
383
+ {/* Bottom sheet */}
384
+ <Modal
385
+ visible={sheetVisible}
386
+ transparent
387
+ animationType="slide"
388
+ onRequestClose={() => setSheetVisible(false)}
389
+ >
390
+ <KeyboardAvoidingView
391
+ behavior={Platform.OS === 'ios' ? 'padding' : 'height'}
392
+ style={styles.overlay}
393
+ >
394
+ <Pressable style={StyleSheet.absoluteFill} onPress={() => setSheetVisible(false)} />
395
+
396
+ <View style={styles.sheet}>
397
+ <View style={styles.handle} />
398
+
399
+ {/* Header */}
400
+ <View style={styles.sheetHeader}>
401
+ <View style={styles.headerLeft}>
402
+ <Text style={styles.headerIcon}>🎙</Text>
403
+ <Text style={styles.headerTitle}>VoiceLayer</Text>
404
+ </View>
405
+ <TouchableOpacity
406
+ onPress={() => setSheetVisible(false)}
407
+ hitSlop={{ top: 10, bottom: 10, left: 10, right: 10 }}
408
+ >
409
+ <Text style={styles.closeBtn}>✕</Text>
410
+ </TouchableOpacity>
411
+ </View>
412
+
413
+ {/* Context-aware chips */}
414
+ {chips.length > 0 && (
415
+ <>
416
+ <Text style={styles.sectionLabel}>QUICK ACTIONS</Text>
417
+ <ScrollView
418
+ horizontal
419
+ showsHorizontalScrollIndicator={false}
420
+ style={styles.chipsScroll}
421
+ contentContainerStyle={styles.chipsContent}
422
+ >
423
+ {chips.map(chip => (
424
+ <TouchableOpacity
425
+ key={chip}
426
+ style={styles.chip}
427
+ onPress={() => submit(chip)}
428
+ activeOpacity={0.7}
429
+ >
430
+ <Text style={styles.chipIcon}>⚡</Text>
431
+ <Text style={styles.chipText}>{chip}</Text>
432
+ </TouchableOpacity>
433
+ ))}
434
+ </ScrollView>
435
+ </>
436
+ )}
437
+
438
+ {/* Input row */}
439
+ <View style={styles.inputRow}>
440
+ <TextInput
441
+ ref={inputRef}
442
+ style={styles.input}
443
+ value={inputText}
444
+ onChangeText={setInputText}
445
+ placeholder="Ya type karo kuch bhi…"
446
+ placeholderTextColor="#555"
447
+ returnKeyType="send"
448
+ onSubmitEditing={() => submit(inputText)}
449
+ />
450
+ <TouchableOpacity
451
+ style={[styles.micBtn, listening && styles.micBtnActive]}
452
+ onPress={handleMicPress}
453
+ activeOpacity={0.8}
454
+ >
455
+ <Text style={styles.micIcon}>{listening ? '■' : '🎙'}</Text>
456
+ </TouchableOpacity>
457
+ {inputText.trim().length > 0 && (
458
+ <TouchableOpacity
459
+ style={styles.sendBtn}
460
+ onPress={() => submit(inputText)}
461
+ activeOpacity={0.8}
462
+ >
463
+ <Text style={styles.sendIcon}>↑</Text>
464
+ </TouchableOpacity>
465
+ )}
466
+ </View>
467
+
468
+ {listening && (
469
+ <View style={styles.waveRow}>
470
+ <Waveform active />
471
+ <Text style={styles.listeningLabel}>Sun raha hoon…</Text>
472
+ </View>
473
+ )}
474
+ </View>
475
+ </KeyboardAvoidingView>
476
+ </Modal>
477
+ </>
478
+ );
479
+ }
480
+
481
+ // ── Styles ────────────────────────────────────────────────────────────────────
482
+ const styles = StyleSheet.create({
483
+ fab: {
484
+ position: 'absolute', bottom: 90, left: 20,
485
+ zIndex: 100, elevation: 5,
486
+ shadowColor: '#000',
487
+ shadowOffset: { width: 0, height: 3 },
488
+ shadowOpacity: 0.25, shadowRadius: 5,
489
+ },
490
+ fabInner: {
491
+ width: 52, height: 52, borderRadius: 26,
492
+ backgroundColor: ACCENT,
493
+ alignItems: 'center', justifyContent: 'center',
494
+ },
495
+ fabInnerActive: { backgroundColor: '#388E3C' },
496
+ fabIcon: { fontSize: 22 },
497
+
498
+ toast: {
499
+ position: 'absolute', bottom: 155, left: 16, right: 80,
500
+ backgroundColor: 'rgba(20,20,20,0.93)',
501
+ borderRadius: 12, paddingHorizontal: 14, paddingVertical: 10,
502
+ flexDirection: 'row', alignItems: 'center',
503
+ zIndex: 99, elevation: 8,
504
+ },
505
+ toastError: { backgroundColor: 'rgba(180,0,0,0.9)' },
506
+ toastSuccess: { backgroundColor: 'rgba(30,100,30,0.93)' },
507
+ toastIcon: { color: '#fff', marginRight: 8, fontSize: 15, fontWeight: '700' },
508
+ toastText: { color: '#fff', fontSize: 13, flex: 1, lineHeight: 19 },
509
+
510
+ overlay: { flex: 1, backgroundColor: 'rgba(0,0,0,0.55)', justifyContent: 'flex-end' },
511
+ sheet: {
512
+ backgroundColor: DARK_BG,
513
+ borderTopLeftRadius: 22, borderTopRightRadius: 22,
514
+ paddingHorizontal: 20, paddingBottom: 32, paddingTop: 12,
515
+ },
516
+ handle: {
517
+ alignSelf: 'center', width: 38, height: 4,
518
+ borderRadius: 2, backgroundColor: '#333', marginBottom: 14,
519
+ },
520
+
521
+ sheetHeader: {
522
+ flexDirection: 'row', alignItems: 'center',
523
+ justifyContent: 'space-between', marginBottom: 20,
524
+ },
525
+ headerLeft: { flexDirection: 'row', alignItems: 'center', gap: 8 },
526
+ headerIcon: { fontSize: 18 },
527
+ headerTitle: { color: '#fff', fontSize: 16, fontWeight: '700' },
528
+ closeBtn: { color: '#666', fontSize: 18, fontWeight: '600' },
529
+
530
+ sectionLabel: {
531
+ color: '#555', fontSize: 11, fontWeight: '600',
532
+ letterSpacing: 0.8, marginBottom: 10,
533
+ },
534
+ chipsScroll: { marginBottom: 16 },
535
+ chipsContent: { gap: 8, paddingRight: 4 },
536
+ chip: {
537
+ flexDirection: 'row', alignItems: 'center',
538
+ backgroundColor: '#222', borderRadius: 20,
539
+ paddingHorizontal: 14, paddingVertical: 8,
540
+ borderWidth: 1, borderColor: '#2e2e2e',
541
+ },
542
+ chipIcon: { fontSize: 11, marginRight: 4 },
543
+ chipText: { color: '#ccc', fontSize: 13 },
544
+
545
+ inputRow: { flexDirection: 'row', alignItems: 'center', gap: 8 },
546
+ input: {
547
+ flex: 1, backgroundColor: '#222', borderRadius: 12,
548
+ borderWidth: 1, borderColor: '#2e2e2e',
549
+ color: '#fff', fontSize: 15,
550
+ paddingHorizontal: 14, paddingVertical: 12,
551
+ },
552
+ micBtn: {
553
+ width: 46, height: 46, borderRadius: 23,
554
+ backgroundColor: '#333', alignItems: 'center', justifyContent: 'center',
555
+ },
556
+ micBtnActive: { backgroundColor: '#c0392b' },
557
+ micIcon: { fontSize: 18 },
558
+ sendBtn: {
559
+ width: 46, height: 46, borderRadius: 23,
560
+ backgroundColor: ACCENT, alignItems: 'center', justifyContent: 'center',
561
+ },
562
+ sendIcon: { color: '#fff', fontSize: 20, fontWeight: '800' },
563
+
564
+ waveRow: { flexDirection: 'row', alignItems: 'center', gap: 12, marginTop: 16 },
565
+ listeningLabel: { color: ACCENT, fontSize: 13 },
566
+ });
package/src/api.ts ADDED
@@ -0,0 +1,94 @@
1
+ // ─────────────────────────────────────────────────────────────────────────────
2
+ // VoiceLayer RN — /api/voice server client
3
+ //
4
+ // Called only on cache MISS. Sends the transcript (on-device STT already done)
5
+ // or raw audio (if on-device STT not ready yet) to the VoiceLayer server.
6
+ //
7
+ // Server resolves intent via Claude, returns { actionId, params, speak }.
8
+ // ─────────────────────────────────────────────────────────────────────────────
9
+
10
+ import type { VLAction, VoiceResult } from './types'
11
+
12
+ export interface VoiceApiOptions {
13
+ proxyUrl: string
14
+ appId: string
15
+ screenName?: string
16
+ language?: string
17
+ apiKey?: string
18
+ }
19
+
20
+ function authHeaders(apiKey?: string): Record<string, string> {
21
+ return apiKey ? { Authorization: `Bearer ${apiKey}` } : {}
22
+ }
23
+
24
+ /**
25
+ * Resolve a transcript to an action via the VoiceLayer server.
26
+ * Used when the LearningStore doesn't have a match.
27
+ */
28
+ export async function resolveViaServer(
29
+ transcript: string,
30
+ actions: VLAction[],
31
+ opts: VoiceApiOptions,
32
+ ): Promise<VoiceResult> {
33
+ const { proxyUrl, appId, screenName, language } = opts
34
+
35
+ const res = await fetch(`${proxyUrl}/api/voice`, {
36
+ method: 'POST',
37
+ headers: { 'Content-Type': 'application/json', ...authHeaders(opts.apiKey) },
38
+ body: JSON.stringify({
39
+ transcript,
40
+ appId,
41
+ screenName: screenName ?? 'unknown',
42
+ language: language ?? 'hi',
43
+ actions: actions.map((a) => ({
44
+ id: a.id,
45
+ description: a.description,
46
+ params: a.params ?? [],
47
+ })),
48
+ }),
49
+ })
50
+
51
+ if (!res.ok) {
52
+ const body = await res.text().catch(() => '')
53
+ throw new Error(`VoiceLayer server error ${res.status}: ${body.slice(0, 100)}`)
54
+ }
55
+
56
+ return res.json() as Promise<VoiceResult>
57
+ }
58
+
59
+ /**
60
+ * Send raw audio to the server for STT + intent resolution in one call.
61
+ * Used when on-device Whisper isn't loaded yet (first launch).
62
+ */
63
+ export async function resolveViaServerWithAudio(
64
+ audioBase64: string,
65
+ audioFormat: 'wav' | 'm4a' | 'webm',
66
+ actions: VLAction[],
67
+ opts: VoiceApiOptions,
68
+ ): Promise<VoiceResult & { transcript: string }> {
69
+ const { proxyUrl, appId, screenName, language } = opts
70
+
71
+ const res = await fetch(`${proxyUrl}/api/voice`, {
72
+ method: 'POST',
73
+ headers: { 'Content-Type': 'application/json', ...authHeaders(opts.apiKey) },
74
+ body: JSON.stringify({
75
+ audio: audioBase64,
76
+ audioFormat,
77
+ appId,
78
+ screenName: screenName ?? 'unknown',
79
+ language: language ?? 'hi',
80
+ actions: actions.map((a) => ({
81
+ id: a.id,
82
+ description: a.description,
83
+ params: a.params ?? [],
84
+ })),
85
+ }),
86
+ })
87
+
88
+ if (!res.ok) {
89
+ const body = await res.text().catch(() => '')
90
+ throw new Error(`VoiceLayer server error ${res.status}: ${body.slice(0, 100)}`)
91
+ }
92
+
93
+ return res.json() as Promise<VoiceResult & { transcript: string }>
94
+ }
@@ -0,0 +1,44 @@
1
+ /**
2
+ * conversationStore — rolling window of recent voice exchanges.
3
+ *
4
+ * Stores spoken text only (not raw JSON payloads) so Claude can resolve
5
+ * pronouns and references across commands: "unhe sort karo" → previous subject.
6
+ *
7
+ * MAX_TURNS = 3 means up to 3 user + 3 assistant messages (6 total).
8
+ */
9
+
10
+ const MAX_TURNS = 3;
11
+ let _turns = [];
12
+
13
+ export const conversationStore = {
14
+ addUser(transcript) {
15
+ _push({ role: 'user', content: transcript });
16
+ },
17
+
18
+ addAssistant(spokenResponse) {
19
+ if (spokenResponse) _push({ role: 'assistant', content: spokenResponse });
20
+ },
21
+
22
+ /**
23
+ * Returns prior turns for Claude's messages array.
24
+ * The current in-flight user message is excluded — it's sent separately.
25
+ */
26
+ getHistory() {
27
+ const copy = [..._turns];
28
+ // Drop the last entry if it's an in-flight user message
29
+ if (copy.length > 0 && copy[copy.length - 1].role === 'user') {
30
+ copy.pop();
31
+ }
32
+ return copy;
33
+ },
34
+
35
+ clear() {
36
+ _turns = [];
37
+ },
38
+ };
39
+
40
+ function _push(turn) {
41
+ _turns.push(turn);
42
+ const max = MAX_TURNS * 2;
43
+ if (_turns.length > max) _turns = _turns.slice(-max);
44
+ }