voicelayer-rn 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.js ADDED
@@ -0,0 +1,12 @@
1
+ /**
2
+ * voicelayer-rn — Voice intelligence for React Native
3
+ *
4
+ * Main exports:
5
+ * VoiceLayerButton — drop-in floating voice assistant component
6
+ * useVoiceLayerScreen — optional hook for per-screen context hints
7
+ * setVoiceLayerServer — override server URL (for self-hosting)
8
+ */
9
+
10
+ export { default as VoiceLayerButton } from './VoiceLayerButton';
11
+ export { useVoiceLayerScreen } from './useVoiceLayerScreen';
12
+ export { setVoiceLayerServer } from './useVoiceIntent';
package/src/index.ts ADDED
@@ -0,0 +1,14 @@
1
+ // VoiceLayer RN — public exports
2
+
3
+ export { VoiceLayer } from './VoiceLayer'
4
+ export { useVoiceLayer } from './useVoiceLayer'
5
+ export { LearningStore } from './LearningStore'
6
+ export { Transcriber } from './Transcriber'
7
+ export { TTSPlayer } from './TTSPlayer'
8
+ export type {
9
+ VLAction,
10
+ StoredCommand,
11
+ VoiceResult,
12
+ VoiceLayerProps,
13
+ VoiceState,
14
+ } from './types'
@@ -0,0 +1,52 @@
1
+ /**
2
+ * navigationHistory — tracks screen transitions for "wapas jaao" and pronoun resolution.
3
+ *
4
+ * Call navigationHistory.init(navigationRef) once after NavigationContainer is ready.
5
+ * VoiceLayerButton does this automatically on mount.
6
+ */
7
+
8
+ const MAX_HISTORY = 12;
9
+ let _history = [];
10
+ let _listener = null;
11
+
12
+ export const navigationHistory = {
13
+ /**
14
+ * Start listening to navigation state changes.
15
+ * Safe to call multiple times — only registers once.
16
+ * @param {NavigationContainerRef} navRef
17
+ */
18
+ init(navRef) {
19
+ if (_listener || !navRef?.isReady()) return;
20
+
21
+ const handler = () => {
22
+ const route = navRef.getCurrentRoute();
23
+ if (!route?.name) return;
24
+ // Deduplicate consecutive same-screen entries
25
+ if (_history[_history.length - 1] === route.name) return;
26
+ _history.push(route.name);
27
+ if (_history.length > MAX_HISTORY) _history.shift();
28
+ };
29
+
30
+ _listener = navRef.addListener('state', handler);
31
+ handler(); // seed with current screen immediately
32
+ },
33
+
34
+ /** Most recent n screen names, oldest first. */
35
+ getRecent(n = 6) {
36
+ return _history.slice(-n);
37
+ },
38
+
39
+ /** The screen visited before the current one, or null. */
40
+ getPrevious() {
41
+ return _history.length >= 2 ? _history[_history.length - 2] : null;
42
+ },
43
+
44
+ canGoBack() {
45
+ return _history.length >= 2;
46
+ },
47
+
48
+ reset() {
49
+ _history = [];
50
+ _listener = null;
51
+ },
52
+ };
@@ -0,0 +1,23 @@
1
+ /**
2
+ * screenRegistry — global store for per-screen context from useVoiceLayerScreen.
3
+ *
4
+ * Screens register on mount, unregister on unmount. VoiceLayerButton reads
5
+ * the current screen's registration at command-submit time to enrich Claude's context.
6
+ */
7
+
8
+ const _registry = new Map();
9
+
10
+ export const screenRegistry = {
11
+ register(screenName, { hints = [], elements = [] } = {}) {
12
+ _registry.set(screenName, { hints: [...hints], elements: [...elements] });
13
+ },
14
+ unregister(screenName) {
15
+ _registry.delete(screenName);
16
+ },
17
+ get(screenName) {
18
+ return _registry.get(screenName) ?? null;
19
+ },
20
+ clear() {
21
+ _registry.clear();
22
+ },
23
+ };
package/src/types.ts ADDED
@@ -0,0 +1,102 @@
1
+ // ─────────────────────────────────────────────────────────────────────────────
2
+ // VoiceLayer RN — shared types
3
+ // ─────────────────────────────────────────────────────────────────────────────
4
+
5
+ /** A single action the user can trigger by voice on the current screen. */
6
+ export interface VLAction {
7
+ /** Unique stable ID. Stored in LearningStore — never change after shipping. */
8
+ id: string
9
+ /** Plain English/Hindi description used by Claude for intent matching. */
10
+ description: string
11
+ /**
12
+ * Parameter names Claude can extract from the voice command.
13
+ * e.g. ['name', 'phone', 'plan'] for a registration action.
14
+ * If empty / omitted, the action takes no parameters.
15
+ */
16
+ params?: string[]
17
+ /**
18
+ * Called when this action is resolved — either from cache (instant)
19
+ * or from the server (first time).
20
+ * Receives extracted params as a key→value map.
21
+ */
22
+ onTrigger: (params: Record<string, string>) => void
23
+ }
24
+
25
+ /** One learned command stored in AsyncStorage. */
26
+ export interface StoredCommand {
27
+ /** Normalised transcript used for fuzzy matching. */
28
+ transcript: string
29
+ /** Original transcript as spoken (for debugging). */
30
+ original: string
31
+ actionId: string
32
+ params: Record<string, string>
33
+ speak: string
34
+ hitCount: number
35
+ lastUsed: number
36
+ confidence: number
37
+ }
38
+
39
+ /** Response from the /api/voice server endpoint. */
40
+ export interface VoiceResult {
41
+ actionId: string
42
+ params: Record<string, string>
43
+ speak: string
44
+ confidence: number
45
+ }
46
+
47
+ /** Props for the <VoiceLayer /> component. */
48
+ export interface VoiceLayerProps {
49
+ /**
50
+ * Base URL of your VoiceLayer analytics server.
51
+ * e.g. "https://your-server.com"
52
+ * Used for the /api/voice fallback when a command isn't in local cache.
53
+ */
54
+ proxyUrl: string
55
+
56
+ /**
57
+ * SDK API key — must match SDK_API_KEY in your server's .env.
58
+ * Passed as Authorization: Bearer <apiKey> on every server call.
59
+ * Leave empty if SDK_API_KEY is not set on the server (open dev mode).
60
+ */
61
+ apiKey?: string
62
+
63
+ /** App identifier — scopes the LearningStore so commands don't bleed between apps. */
64
+ appId: string
65
+
66
+ /** Voice commands registered for the current screen. */
67
+ actions: VLAction[]
68
+
69
+ /**
70
+ * Current screen / route name — helps the server resolve ambiguous commands.
71
+ * e.g. "CustomerList", "Dashboard", "OrderDetail"
72
+ */
73
+ screenName?: string
74
+
75
+ /** Primary language of your users. Default: 'hi' */
76
+ language?: 'hi' | 'en' | 'auto'
77
+
78
+ /**
79
+ * Absolute path to the ggml-tiny.bin Whisper model file.
80
+ * Download it once via: npx whisper.rn download tiny
81
+ * Then pass: modelPath={require('../assets/ggml-tiny.bin')}
82
+ *
83
+ * If omitted, STT falls back to the server (audio is sent to /api/voice).
84
+ * The server path works but adds ~200ms latency.
85
+ */
86
+ modelPath?: number | string
87
+
88
+ /** Floating button position. Default: 'bottom-right' */
89
+ position?: 'bottom-right' | 'bottom-left' | 'bottom-center'
90
+
91
+ /** Tint color for the mic button. Default: '#6C63FF' */
92
+ color?: string
93
+
94
+ /** Log debug output to console. Default: false */
95
+ debug?: boolean
96
+
97
+ /** Called on unrecoverable errors (permission denied, server down, etc.) */
98
+ onError?: (error: Error) => void
99
+ }
100
+
101
+ /** States the mic button and overlay can be in. */
102
+ export type VoiceState = 'idle' | 'listening' | 'processing' | 'speaking' | 'error'
@@ -0,0 +1,91 @@
1
+ /**
2
+ * useVoiceIntent — sends voice context to VoiceLayer server and returns navigation intent.
3
+ *
4
+ * Default server: https://voicelayer-sdk-production.up.railway.app
5
+ * Override: pass serverUrl prop on VoiceLayerButton, or call setVoiceLayerServer()
6
+ * e.g. for local dev: setVoiceLayerServer('http://10.0.2.2:3001')
7
+ */
8
+
9
+ import { useState, useCallback } from 'react';
10
+ import { conversationStore } from './conversationStore';
11
+
12
+ // ── Server URL ─────────────────────────────────────────────────────────────
13
+ let _serverUrl = 'https://voicelayer-sdk-production.up.railway.app';
14
+
15
+ /**
16
+ * Override the VoiceLayer server URL.
17
+ * Call once in App.js before any voice commands are issued.
18
+ *
19
+ * Examples:
20
+ * setVoiceLayerServer('http://10.0.2.2:3001') // Android emulator → local server
21
+ * setVoiceLayerServer('http://localhost:3001') // iOS Simulator → local server
22
+ * setVoiceLayerServer('http://192.168.1.5:3001') // Real device → local server
23
+ * setVoiceLayerServer('https://api.voicelayer.dev') // Default (no call needed)
24
+ */
25
+ export function setVoiceLayerServer(url) {
26
+ _serverUrl = url.replace(/\/$/, '');
27
+ }
28
+
29
+ // ── Hook ───────────────────────────────────────────────────────────────────
30
+ /**
31
+ * @param {{ apiKey: string, appId?: string, language?: string }}
32
+ */
33
+ export function useVoiceIntent({ apiKey, appId = 'app', language = 'hi' }) {
34
+ const [loading, setLoading] = useState(false);
35
+ const [error, setError] = useState(null);
36
+
37
+ /**
38
+ * Resolve a voice transcript to a navigation route.
39
+ * @param {string} transcript
40
+ * @param {object} ctx — context snapshot from VoiceLayerButton
41
+ */
42
+ const resolve = useCallback(async (transcript, ctx = {}) => {
43
+ setLoading(true);
44
+ setError(null);
45
+
46
+ const {
47
+ allRoutes = [],
48
+ currentRoute = null,
49
+ screenMeta = null,
50
+ recentScreens = [],
51
+ } = ctx;
52
+
53
+ const conversationHistory = conversationStore.getHistory();
54
+
55
+ try {
56
+ const headers = { 'Content-Type': 'application/json' };
57
+ if (apiKey) headers['Authorization'] = `Bearer ${apiKey}`;
58
+
59
+ const res = await fetch(`${_serverUrl}/api/voice`, {
60
+ method: 'POST',
61
+ headers,
62
+ body: JSON.stringify({
63
+ transcript,
64
+ routes: allRoutes,
65
+ currentScreen: currentRoute?.name ?? null,
66
+ currentScreenParams: currentRoute?.params ?? {},
67
+ screenHints: screenMeta?.hints ?? [],
68
+ screenElements: screenMeta?.elements ?? [],
69
+ recentScreens,
70
+ conversationHistory,
71
+ appId,
72
+ language,
73
+ }),
74
+ });
75
+
76
+ if (!res.ok) {
77
+ const body = await res.json().catch(() => ({}));
78
+ throw new Error(body.error || `VoiceLayer error ${res.status}`);
79
+ }
80
+
81
+ return await res.json();
82
+ } catch (e) {
83
+ setError(e.message);
84
+ throw e;
85
+ } finally {
86
+ setLoading(false);
87
+ }
88
+ }, [apiKey, appId, language]);
89
+
90
+ return { resolve, loading, error };
91
+ }
@@ -0,0 +1,269 @@
1
+ // ─────────────────────────────────────────────────────────────────────────────
2
+ // VoiceLayer RN — useVoiceLayer hook
3
+ //
4
+ // Orchestrates the full voice pipeline:
5
+ // record → STT (on-device) → LearningStore → server fallback → execute
6
+ //
7
+ // Used internally by <VoiceLayer /> but exported so advanced users can
8
+ // build custom UIs on top of it.
9
+ // ─────────────────────────────────────────────────────────────────────────────
10
+
11
+ import { useCallback, useEffect, useMemo, useRef, useState } from 'react'
12
+ import { Audio } from 'expo-av'
13
+ import * as FileSystem from 'expo-file-system'
14
+
15
+ import { LearningStore } from './LearningStore'
16
+ import { Transcriber } from './Transcriber'
17
+ import { TTSPlayer } from './TTSPlayer'
18
+ import { resolveViaServer, resolveViaServerWithAudio } from './api'
19
+ import type { VLAction, VoiceState, VoiceLayerProps } from './types'
20
+
21
+ // ── Recording options ─────────────────────────────────────────────────────────
22
+ // 16kHz mono WAV is what Whisper expects. Same settings on iOS and Android.
23
+ const RECORDING_OPTIONS: Audio.RecordingOptions = {
24
+ android: {
25
+ extension: '.wav',
26
+ outputFormat: Audio.AndroidOutputFormat.DEFAULT,
27
+ audioEncoder: Audio.AndroidAudioEncoder.DEFAULT,
28
+ sampleRate: 16000,
29
+ numberOfChannels: 1,
30
+ bitRate: 128_000,
31
+ },
32
+ ios: {
33
+ extension: '.wav',
34
+ outputFormat: Audio.IOSOutputFormat.LINEARPCM,
35
+ audioQuality: Audio.IOSAudioQuality.HIGH,
36
+ sampleRate: 16000,
37
+ numberOfChannels: 1,
38
+ bitRate: 128_000,
39
+ linearPCMBitDepth: 16,
40
+ linearPCMIsBigEndian: false,
41
+ linearPCMIsFloat: false,
42
+ },
43
+ web: {}, // not used — this is the RN SDK
44
+ isMeteringEnabled: false,
45
+ }
46
+
47
+ const MAX_RECORD_MS = 10_000 // auto-stop after 10 s of silence
48
+ const SILENCE_PAUSE = 1_500 // stop if no new audio for 1.5 s (future: VAD)
49
+
50
+ export interface UseVoiceLayerReturn {
51
+ state: VoiceState
52
+ transcript: string
53
+ message: string // TTS message shown in overlay
54
+ toggle: () => Promise<void> // start or stop listening
55
+ isReady: boolean // true once LearningStore has hydrated
56
+ }
57
+
58
+ export function useVoiceLayer({
59
+ proxyUrl,
60
+ appId,
61
+ actions,
62
+ screenName,
63
+ language = 'hi',
64
+ modelPath,
65
+ apiKey,
66
+ debug = false,
67
+ onError,
68
+ }: VoiceLayerProps): UseVoiceLayerReturn {
69
+ const [state, setState] = useState<VoiceState>('idle')
70
+ const [transcript, setTranscript] = useState('')
71
+ const [message, setMessage] = useState('')
72
+ const [isReady, setReady] = useState(false)
73
+
74
+ const recordingRef = useRef<Audio.Recording | null>(null)
75
+ const autoStopRef = useRef<ReturnType<typeof setTimeout> | null>(null)
76
+
77
+ // ── Stable singletons (don't recreate on every render) ───────────────────
78
+ const store = useMemo(() => new LearningStore(appId), [appId])
79
+
80
+ const transcriber = useMemo(
81
+ () => new Transcriber(language, modelPath, debug),
82
+ [language, modelPath, debug],
83
+ )
84
+
85
+ const tts = useMemo(() => new TTSPlayer(language), [language])
86
+
87
+ // ── Initialise on mount ───────────────────────────────────────────────────
88
+ useEffect(() => {
89
+ Promise.all([
90
+ store.ready(),
91
+ transcriber.init(), // warm up Whisper — downloads model if needed
92
+ ]).then(() => setReady(true))
93
+ }, [store, transcriber])
94
+
95
+ // ── Core pipeline ─────────────────────────────────────────────────────────
96
+
97
+ const processAudio = useCallback(async (audioUri: string) => {
98
+ setState('processing')
99
+ setTranscript('')
100
+
101
+ let resolvedTranscript: string | null = null
102
+ let audioBase64: string | null = null
103
+
104
+ // ── Step 1: STT ──────────────────────────────────────────────────────────
105
+ if (transcriber.isReady) {
106
+ // On-device Whisper — fast, no network
107
+ resolvedTranscript = await transcriber.transcribe(audioUri)
108
+ if (debug) console.log('[VoiceLayer] on-device transcript:', resolvedTranscript)
109
+ }
110
+
111
+ if (!resolvedTranscript) {
112
+ // On-device model not ready — read audio for server upload
113
+ audioBase64 = await FileSystem.readAsStringAsync(audioUri, {
114
+ encoding: FileSystem.EncodingType.Base64,
115
+ })
116
+ }
117
+
118
+ if (resolvedTranscript) setTranscript(resolvedTranscript)
119
+
120
+ // ── Step 2: LearningStore (cache lookup) ──────────────────────────────────
121
+ if (resolvedTranscript) {
122
+ const hit = store.findMatch(resolvedTranscript, screenName)
123
+ if (hit) {
124
+ if (debug) console.log('[VoiceLayer] cache HIT:', hit)
125
+
126
+ const action = actions.find((a) => a.id === hit.actionId)
127
+ if (action) {
128
+ setMessage(hit.speak)
129
+ setState('speaking')
130
+ await tts.speak(hit.speak)
131
+ action.onTrigger(hit.params)
132
+ setState('idle')
133
+ setMessage('')
134
+ return
135
+ }
136
+ }
137
+ }
138
+
139
+ // ── Step 3: Server fallback ───────────────────────────────────────────────
140
+ try {
141
+ const opts = { proxyUrl, appId, screenName, language, apiKey }
142
+
143
+ let result: Awaited<ReturnType<typeof resolveViaServer>>
144
+ let serverTranscript = resolvedTranscript ?? ''
145
+
146
+ if (resolvedTranscript) {
147
+ result = await resolveViaServer(resolvedTranscript, actions, opts)
148
+ } else {
149
+ // Send audio; server does STT + intent in one shot
150
+ const r = await resolveViaServerWithAudio(audioBase64!, 'wav', actions, opts)
151
+ result = r
152
+ serverTranscript = r.transcript ?? ''
153
+ setTranscript(serverTranscript)
154
+ }
155
+
156
+ if (debug) console.log('[VoiceLayer] server result:', result)
157
+
158
+ // Cache for next time
159
+ if (serverTranscript) {
160
+ await store.store(serverTranscript, result, screenName)
161
+ }
162
+
163
+ const action = actions.find((a) => a.id === result.actionId)
164
+ if (!action) {
165
+ // Clarify — no matching action
166
+ const clarify = (result as unknown as { speak: string }).speak
167
+ ?? "Samajh nahi aaya, kya aap dobara bol sakte hain?"
168
+ setMessage(clarify)
169
+ setState('speaking')
170
+ await tts.speak(clarify)
171
+ setState('idle')
172
+ setMessage('')
173
+ return
174
+ }
175
+
176
+ setMessage(result.speak)
177
+ setState('speaking')
178
+ await tts.speak(result.speak)
179
+ action.onTrigger(result.params)
180
+ setState('idle')
181
+ setMessage('')
182
+ } catch (err) {
183
+ const error = err instanceof Error ? err : new Error(String(err))
184
+ if (debug) console.error('[VoiceLayer] server error:', error)
185
+ onError?.(error)
186
+
187
+ const msg = 'Kuch problem ho gayi. Please try again.'
188
+ setMessage(msg)
189
+ setState('error')
190
+ await tts.speak(msg)
191
+ setState('idle')
192
+ setMessage('')
193
+ }
194
+ }, [
195
+ transcriber, store, tts, actions, proxyUrl, appId,
196
+ screenName, language, debug, onError,
197
+ ])
198
+
199
+ const stopAndProcess = useCallback(async () => {
200
+ if (!recordingRef.current) return
201
+
202
+ if (autoStopRef.current) {
203
+ clearTimeout(autoStopRef.current)
204
+ autoStopRef.current = null
205
+ }
206
+
207
+ const rec = recordingRef.current
208
+ recordingRef.current = null
209
+
210
+ try {
211
+ await rec.stopAndUnloadAsync()
212
+ } catch { /* already stopped */ }
213
+
214
+ await Audio.setAudioModeAsync({ allowsRecordingIOS: false })
215
+
216
+ const uri = rec.getURI()
217
+ if (!uri) return
218
+
219
+ await processAudio(uri)
220
+ }, [processAudio])
221
+
222
+ const startListening = useCallback(async () => {
223
+ // Request mic permission
224
+ const { granted } = await Audio.requestPermissionsAsync()
225
+ if (!granted) {
226
+ const err = new Error('Microphone permission denied')
227
+ onError?.(err)
228
+ setState('error')
229
+ setTimeout(() => setState('idle'), 2000)
230
+ return
231
+ }
232
+
233
+ await Audio.setAudioModeAsync({
234
+ allowsRecordingIOS: true,
235
+ playsInSilentModeIOS: true,
236
+ })
237
+
238
+ const recording = new Audio.Recording()
239
+ await recording.prepareToRecordAsync(RECORDING_OPTIONS)
240
+ await recording.startAsync()
241
+ recordingRef.current = recording
242
+ setState('listening')
243
+
244
+ // Auto-stop after MAX_RECORD_MS
245
+ autoStopRef.current = setTimeout(stopAndProcess, MAX_RECORD_MS)
246
+ }, [onError, stopAndProcess])
247
+
248
+ const stopListening = useCallback(async () => {
249
+ await stopAndProcess()
250
+ }, [stopAndProcess])
251
+
252
+ const toggle = useCallback(async () => {
253
+ if (state === 'listening') {
254
+ await stopListening()
255
+ } else if (state === 'idle' || state === 'error') {
256
+ await startListening()
257
+ }
258
+ }, [state, startListening, stopListening])
259
+
260
+ // Cleanup on unmount
261
+ useEffect(() => {
262
+ return () => {
263
+ if (autoStopRef.current) clearTimeout(autoStopRef.current)
264
+ recordingRef.current?.stopAndUnloadAsync().catch(() => undefined)
265
+ }
266
+ }, [])
267
+
268
+ return { state, transcript, message, toggle, isReady }
269
+ }
@@ -0,0 +1,47 @@
1
+ /**
2
+ * useVoiceLayerScreen — optional hook for per-screen voice context.
3
+ *
4
+ * Call this inside any screen component to give VoiceLayer richer context.
5
+ * The SDK works perfectly without it — this hook makes intent resolution
6
+ * and chips smarter for that specific screen.
7
+ *
8
+ * Usage (minimal):
9
+ * useVoiceLayerScreen({ hints: ['add customer', 'search', 'export'] })
10
+ *
11
+ * Usage (full):
12
+ * useVoiceLayerScreen({
13
+ * hints: ['filter orders', 'mark delivered', 'view details'],
14
+ * elements: ['Pending (12)', 'Delivered (8)', 'Search bar'],
15
+ * })
16
+ *
17
+ * Rules:
18
+ * hints — 3–6 short action phrases for this screen
19
+ * elements — labels of visible interactive elements (optional)
20
+ * Arrays can be defined inline — the hook handles ref stability.
21
+ */
22
+
23
+ import { useEffect, useRef } from 'react';
24
+ import { useNavigationState } from '@react-navigation/native';
25
+ import { screenRegistry } from './screenRegistry';
26
+
27
+ export function useVoiceLayerScreen({ hints = [], elements = [] } = {}) {
28
+ // Auto-detect current screen name from navigation state
29
+ const routeName = useNavigationState(state =>
30
+ state?.routes[state.index]?.name ?? null
31
+ );
32
+
33
+ // Refs avoid re-firing the effect when arrays are redefined inline
34
+ const hintsRef = useRef(hints);
35
+ const elementsRef = useRef(elements);
36
+ hintsRef.current = hints;
37
+ elementsRef.current = elements;
38
+
39
+ useEffect(() => {
40
+ if (!routeName) return;
41
+ screenRegistry.register(routeName, {
42
+ hints: hintsRef.current,
43
+ elements: elementsRef.current,
44
+ });
45
+ return () => screenRegistry.unregister(routeName);
46
+ }, [routeName]);
47
+ }