npm - voicelayer-rn - Versions diffs - 0.1.0 - Mend

voicelayer-rn 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/package.json +18 -0
package/src/LearningStore.ts +170 -0
package/src/MicButton.tsx +184 -0
package/src/TTSPlayer.ts +38 -0
package/src/Transcriber.ts +117 -0
package/src/VoiceLayer.tsx +51 -0
package/src/VoiceLayerButton.jsx +566 -0
package/src/api.ts +94 -0
package/src/conversationStore.js +44 -0
package/src/index.js +12 -0
package/src/index.ts +14 -0
package/src/navigationHistory.js +52 -0
package/src/screenRegistry.js +23 -0
package/src/types.ts +102 -0
package/src/useVoiceIntent.js +91 -0
package/src/useVoiceLayer.ts +269 -0
package/src/useVoiceLayerScreen.js +47 -0

package/src/index.js ADDED Viewed

@@ -0,0 +1,12 @@
+/**
+ * voicelayer-rn — Voice intelligence for React Native
+ *
+ * Main exports:
+ *   VoiceLayerButton     — drop-in floating voice assistant component
+ *   useVoiceLayerScreen  — optional hook for per-screen context hints
+ *   setVoiceLayerServer  — override server URL (for self-hosting)
+ */
+export { default as VoiceLayerButton } from './VoiceLayerButton';
+export { useVoiceLayerScreen }         from './useVoiceLayerScreen';
+export { setVoiceLayerServer }         from './useVoiceIntent';

package/src/index.ts ADDED Viewed

@@ -0,0 +1,14 @@
+// VoiceLayer RN — public exports
+export { VoiceLayer }        from './VoiceLayer'
+export { useVoiceLayer }     from './useVoiceLayer'
+export { LearningStore }     from './LearningStore'
+export { Transcriber }       from './Transcriber'
+export { TTSPlayer }         from './TTSPlayer'
+export type {
+  VLAction,
+  StoredCommand,
+  VoiceResult,
+  VoiceLayerProps,
+  VoiceState,
+} from './types'

package/src/navigationHistory.js ADDED Viewed

@@ -0,0 +1,52 @@
+/**
+ * navigationHistory — tracks screen transitions for "wapas jaao" and pronoun resolution.
+ *
+ * Call navigationHistory.init(navigationRef) once after NavigationContainer is ready.
+ * VoiceLayerButton does this automatically on mount.
+ */
+const MAX_HISTORY = 12;
+let _history  = [];
+let _listener = null;
+export const navigationHistory = {
+  /**
+   * Start listening to navigation state changes.
+   * Safe to call multiple times — only registers once.
+   * @param {NavigationContainerRef} navRef
+   */
+  init(navRef) {
+    if (_listener || !navRef?.isReady()) return;
+    const handler = () => {
+      const route = navRef.getCurrentRoute();
+      if (!route?.name) return;
+      // Deduplicate consecutive same-screen entries
+      if (_history[_history.length - 1] === route.name) return;
+      _history.push(route.name);
+      if (_history.length > MAX_HISTORY) _history.shift();
+    };
+    _listener = navRef.addListener('state', handler);
+    handler(); // seed with current screen immediately
+  },
+  /** Most recent n screen names, oldest first. */
+  getRecent(n = 6) {
+    return _history.slice(-n);
+  },
+  /** The screen visited before the current one, or null. */
+  getPrevious() {
+    return _history.length >= 2 ? _history[_history.length - 2] : null;
+  },
+  canGoBack() {
+    return _history.length >= 2;
+  },
+  reset() {
+    _history  = [];
+    _listener = null;
+  },
+};

package/src/screenRegistry.js ADDED Viewed

@@ -0,0 +1,23 @@
+/**
+ * screenRegistry — global store for per-screen context from useVoiceLayerScreen.
+ *
+ * Screens register on mount, unregister on unmount. VoiceLayerButton reads
+ * the current screen's registration at command-submit time to enrich Claude's context.
+ */
+const _registry = new Map();
+export const screenRegistry = {
+  register(screenName, { hints = [], elements = [] } = {}) {
+    _registry.set(screenName, { hints: [...hints], elements: [...elements] });
+  },
+  unregister(screenName) {
+    _registry.delete(screenName);
+  },
+  get(screenName) {
+    return _registry.get(screenName) ?? null;
+  },
+  clear() {
+    _registry.clear();
+  },
+};

package/src/types.ts ADDED Viewed

@@ -0,0 +1,102 @@
+// ─────────────────────────────────────────────────────────────────────────────
+// VoiceLayer RN — shared types
+// ─────────────────────────────────────────────────────────────────────────────
+/** A single action the user can trigger by voice on the current screen. */
+export interface VLAction {
+  /** Unique stable ID. Stored in LearningStore — never change after shipping. */
+  id: string
+  /** Plain English/Hindi description used by Claude for intent matching. */
+  description: string
+  /**
+   * Parameter names Claude can extract from the voice command.
+   * e.g. ['name', 'phone', 'plan'] for a registration action.
+   * If empty / omitted, the action takes no parameters.
+   */
+  params?: string[]
+  /**
+   * Called when this action is resolved — either from cache (instant)
+   * or from the server (first time).
+   * Receives extracted params as a key→value map.
+   */
+  onTrigger: (params: Record<string, string>) => void
+}
+/** One learned command stored in AsyncStorage. */
+export interface StoredCommand {
+  /** Normalised transcript used for fuzzy matching. */
+  transcript: string
+  /** Original transcript as spoken (for debugging). */
+  original: string
+  actionId: string
+  params: Record<string, string>
+  speak: string
+  hitCount: number
+  lastUsed: number
+  confidence: number
+}
+/** Response from the /api/voice server endpoint. */
+export interface VoiceResult {
+  actionId: string
+  params: Record<string, string>
+  speak: string
+  confidence: number
+}
+/** Props for the <VoiceLayer /> component. */
+export interface VoiceLayerProps {
+  /**
+   * Base URL of your VoiceLayer analytics server.
+   * e.g. "https://your-server.com"
+   * Used for the /api/voice fallback when a command isn't in local cache.
+   */
+  proxyUrl: string
+  /**
+   * SDK API key — must match SDK_API_KEY in your server's .env.
+   * Passed as Authorization: Bearer <apiKey> on every server call.
+   * Leave empty if SDK_API_KEY is not set on the server (open dev mode).
+   */
+  apiKey?: string
+  /** App identifier — scopes the LearningStore so commands don't bleed between apps. */
+  appId: string
+  /** Voice commands registered for the current screen. */
+  actions: VLAction[]
+  /**
+   * Current screen / route name — helps the server resolve ambiguous commands.
+   * e.g. "CustomerList", "Dashboard", "OrderDetail"
+   */
+  screenName?: string
+  /** Primary language of your users. Default: 'hi' */
+  language?: 'hi' | 'en' | 'auto'
+  /**
+   * Absolute path to the ggml-tiny.bin Whisper model file.
+   * Download it once via: npx whisper.rn download tiny
+   * Then pass: modelPath={require('../assets/ggml-tiny.bin')}
+   *
+   * If omitted, STT falls back to the server (audio is sent to /api/voice).
+   * The server path works but adds ~200ms latency.
+   */
+  modelPath?: number | string
+  /** Floating button position. Default: 'bottom-right' */
+  position?: 'bottom-right' | 'bottom-left' | 'bottom-center'
+  /** Tint color for the mic button. Default: '#6C63FF' */
+  color?: string
+  /** Log debug output to console. Default: false */
+  debug?: boolean
+  /** Called on unrecoverable errors (permission denied, server down, etc.) */
+  onError?: (error: Error) => void
+}
+/** States the mic button and overlay can be in. */
+export type VoiceState = 'idle' | 'listening' | 'processing' | 'speaking' | 'error'

package/src/useVoiceIntent.js ADDED Viewed

@@ -0,0 +1,91 @@
+/**
+ * useVoiceIntent — sends voice context to VoiceLayer server and returns navigation intent.
+ *
+ * Default server: https://voicelayer-sdk-production.up.railway.app
+ * Override:       pass serverUrl prop on VoiceLayerButton, or call setVoiceLayerServer()
+ *                 e.g. for local dev: setVoiceLayerServer('http://10.0.2.2:3001')
+ */
+import { useState, useCallback } from 'react';
+import { conversationStore }     from './conversationStore';
+// ── Server URL ─────────────────────────────────────────────────────────────
+let _serverUrl = 'https://voicelayer-sdk-production.up.railway.app';
+/**
+ * Override the VoiceLayer server URL.
+ * Call once in App.js before any voice commands are issued.
+ *
+ * Examples:
+ *   setVoiceLayerServer('http://10.0.2.2:3001')     // Android emulator → local server
+ *   setVoiceLayerServer('http://localhost:3001')      // iOS Simulator  → local server
+ *   setVoiceLayerServer('http://192.168.1.5:3001')   // Real device    → local server
+ *   setVoiceLayerServer('https://api.voicelayer.dev') // Default (no call needed)
+ */
+export function setVoiceLayerServer(url) {
+  _serverUrl = url.replace(/\/$/, '');
+}
+// ── Hook ───────────────────────────────────────────────────────────────────
+/**
+ * @param {{ apiKey: string, appId?: string, language?: string }}
+ */
+export function useVoiceIntent({ apiKey, appId = 'app', language = 'hi' }) {
+  const [loading, setLoading] = useState(false);
+  const [error,   setError]   = useState(null);
+  /**
+   * Resolve a voice transcript to a navigation route.
+   * @param {string} transcript
+   * @param {object} ctx — context snapshot from VoiceLayerButton
+   */
+  const resolve = useCallback(async (transcript, ctx = {}) => {
+    setLoading(true);
+    setError(null);
+    const {
+      allRoutes     = [],
+      currentRoute  = null,
+      screenMeta    = null,
+      recentScreens = [],
+    } = ctx;
+    const conversationHistory = conversationStore.getHistory();
+    try {
+      const headers = { 'Content-Type': 'application/json' };
+      if (apiKey) headers['Authorization'] = `Bearer ${apiKey}`;
+      const res = await fetch(`${_serverUrl}/api/voice`, {
+        method: 'POST',
+        headers,
+        body: JSON.stringify({
+          transcript,
+          routes:              allRoutes,
+          currentScreen:       currentRoute?.name   ?? null,
+          currentScreenParams: currentRoute?.params ?? {},
+          screenHints:         screenMeta?.hints    ?? [],
+          screenElements:      screenMeta?.elements ?? [],
+          recentScreens,
+          conversationHistory,
+          appId,
+          language,
+        }),
+      });
+      if (!res.ok) {
+        const body = await res.json().catch(() => ({}));
+        throw new Error(body.error || `VoiceLayer error ${res.status}`);
+      }
+      return await res.json();
+    } catch (e) {
+      setError(e.message);
+      throw e;
+    } finally {
+      setLoading(false);
+    }
+  }, [apiKey, appId, language]);
+  return { resolve, loading, error };
+}

package/src/useVoiceLayer.ts ADDED Viewed

@@ -0,0 +1,269 @@
+// ─────────────────────────────────────────────────────────────────────────────
+// VoiceLayer RN — useVoiceLayer hook
+//
+// Orchestrates the full voice pipeline:
+//   record → STT (on-device) → LearningStore → server fallback → execute
+//
+// Used internally by <VoiceLayer /> but exported so advanced users can
+// build custom UIs on top of it.
+// ─────────────────────────────────────────────────────────────────────────────
+import { useCallback, useEffect, useMemo, useRef, useState } from 'react'
+import { Audio } from 'expo-av'
+import * as FileSystem from 'expo-file-system'
+import { LearningStore }         from './LearningStore'
+import { Transcriber }           from './Transcriber'
+import { TTSPlayer }             from './TTSPlayer'
+import { resolveViaServer, resolveViaServerWithAudio } from './api'
+import type { VLAction, VoiceState, VoiceLayerProps } from './types'
+// ── Recording options ─────────────────────────────────────────────────────────
+// 16kHz mono WAV is what Whisper expects. Same settings on iOS and Android.
+const RECORDING_OPTIONS: Audio.RecordingOptions = {
+  android: {
+    extension:    '.wav',
+    outputFormat: Audio.AndroidOutputFormat.DEFAULT,
+    audioEncoder: Audio.AndroidAudioEncoder.DEFAULT,
+    sampleRate:   16000,
+    numberOfChannels: 1,
+    bitRate:      128_000,
+  },
+  ios: {
+    extension:    '.wav',
+    outputFormat: Audio.IOSOutputFormat.LINEARPCM,
+    audioQuality: Audio.IOSAudioQuality.HIGH,
+    sampleRate:   16000,
+    numberOfChannels: 1,
+    bitRate:      128_000,
+    linearPCMBitDepth:    16,
+    linearPCMIsBigEndian: false,
+    linearPCMIsFloat:     false,
+  },
+  web: {},   // not used — this is the RN SDK
+  isMeteringEnabled: false,
+}
+const MAX_RECORD_MS  = 10_000   // auto-stop after 10 s of silence
+const SILENCE_PAUSE  = 1_500    // stop if no new audio for 1.5 s (future: VAD)
+export interface UseVoiceLayerReturn {
+  state:      VoiceState
+  transcript: string
+  message:    string   // TTS message shown in overlay
+  toggle:     () => Promise<void>   // start or stop listening
+  isReady:    boolean  // true once LearningStore has hydrated
+}
+export function useVoiceLayer({
+  proxyUrl,
+  appId,
+  actions,
+  screenName,
+  language = 'hi',
+  modelPath,
+  apiKey,
+  debug = false,
+  onError,
+}: VoiceLayerProps): UseVoiceLayerReturn {
+  const [state,      setState]      = useState<VoiceState>('idle')
+  const [transcript, setTranscript] = useState('')
+  const [message,    setMessage]    = useState('')
+  const [isReady,    setReady]      = useState(false)
+  const recordingRef   = useRef<Audio.Recording | null>(null)
+  const autoStopRef    = useRef<ReturnType<typeof setTimeout> | null>(null)
+  // ── Stable singletons (don't recreate on every render) ───────────────────
+  const store = useMemo(() => new LearningStore(appId), [appId])
+  const transcriber = useMemo(
+    () => new Transcriber(language, modelPath, debug),
+    [language, modelPath, debug],
+  )
+  const tts = useMemo(() => new TTSPlayer(language), [language])
+  // ── Initialise on mount ───────────────────────────────────────────────────
+  useEffect(() => {
+    Promise.all([
+      store.ready(),
+      transcriber.init(),   // warm up Whisper — downloads model if needed
+    ]).then(() => setReady(true))
+  }, [store, transcriber])
+  // ── Core pipeline ─────────────────────────────────────────────────────────
+  const processAudio = useCallback(async (audioUri: string) => {
+    setState('processing')
+    setTranscript('')
+    let resolvedTranscript: string | null = null
+    let audioBase64: string | null = null
+    // ── Step 1: STT ──────────────────────────────────────────────────────────
+    if (transcriber.isReady) {
+      // On-device Whisper — fast, no network
+      resolvedTranscript = await transcriber.transcribe(audioUri)
+      if (debug) console.log('[VoiceLayer] on-device transcript:', resolvedTranscript)
+    }
+    if (!resolvedTranscript) {
+      // On-device model not ready — read audio for server upload
+      audioBase64 = await FileSystem.readAsStringAsync(audioUri, {
+        encoding: FileSystem.EncodingType.Base64,
+      })
+    }
+    if (resolvedTranscript) setTranscript(resolvedTranscript)
+    // ── Step 2: LearningStore (cache lookup) ──────────────────────────────────
+    if (resolvedTranscript) {
+      const hit = store.findMatch(resolvedTranscript, screenName)
+      if (hit) {
+        if (debug) console.log('[VoiceLayer] cache HIT:', hit)
+        const action = actions.find((a) => a.id === hit.actionId)
+        if (action) {
+          setMessage(hit.speak)
+          setState('speaking')
+          await tts.speak(hit.speak)
+          action.onTrigger(hit.params)
+          setState('idle')
+          setMessage('')
+          return
+        }
+      }
+    }
+    // ── Step 3: Server fallback ───────────────────────────────────────────────
+    try {
+      const opts = { proxyUrl, appId, screenName, language, apiKey }
+      let result: Awaited<ReturnType<typeof resolveViaServer>>
+      let serverTranscript = resolvedTranscript ?? ''
+      if (resolvedTranscript) {
+        result = await resolveViaServer(resolvedTranscript, actions, opts)
+      } else {
+        // Send audio; server does STT + intent in one shot
+        const r = await resolveViaServerWithAudio(audioBase64!, 'wav', actions, opts)
+        result = r
+        serverTranscript = r.transcript ?? ''
+        setTranscript(serverTranscript)
+      }
+      if (debug) console.log('[VoiceLayer] server result:', result)
+      // Cache for next time
+      if (serverTranscript) {
+        await store.store(serverTranscript, result, screenName)
+      }
+      const action = actions.find((a) => a.id === result.actionId)
+      if (!action) {
+        // Clarify — no matching action
+        const clarify = (result as unknown as { speak: string }).speak
+          ?? "Samajh nahi aaya, kya aap dobara bol sakte hain?"
+        setMessage(clarify)
+        setState('speaking')
+        await tts.speak(clarify)
+        setState('idle')
+        setMessage('')
+        return
+      }
+      setMessage(result.speak)
+      setState('speaking')
+      await tts.speak(result.speak)
+      action.onTrigger(result.params)
+      setState('idle')
+      setMessage('')
+    } catch (err) {
+      const error = err instanceof Error ? err : new Error(String(err))
+      if (debug) console.error('[VoiceLayer] server error:', error)
+      onError?.(error)
+      const msg = 'Kuch problem ho gayi. Please try again.'
+      setMessage(msg)
+      setState('error')
+      await tts.speak(msg)
+      setState('idle')
+      setMessage('')
+    }
+  }, [
+    transcriber, store, tts, actions, proxyUrl, appId,
+    screenName, language, debug, onError,
+  ])
+  const stopAndProcess = useCallback(async () => {
+    if (!recordingRef.current) return
+    if (autoStopRef.current) {
+      clearTimeout(autoStopRef.current)
+      autoStopRef.current = null
+    }
+    const rec = recordingRef.current
+    recordingRef.current = null
+    try {
+      await rec.stopAndUnloadAsync()
+    } catch { /* already stopped */ }
+    await Audio.setAudioModeAsync({ allowsRecordingIOS: false })
+    const uri = rec.getURI()
+    if (!uri) return
+    await processAudio(uri)
+  }, [processAudio])
+  const startListening = useCallback(async () => {
+    // Request mic permission
+    const { granted } = await Audio.requestPermissionsAsync()
+    if (!granted) {
+      const err = new Error('Microphone permission denied')
+      onError?.(err)
+      setState('error')
+      setTimeout(() => setState('idle'), 2000)
+      return
+    }
+    await Audio.setAudioModeAsync({
+      allowsRecordingIOS:  true,
+      playsInSilentModeIOS: true,
+    })
+    const recording = new Audio.Recording()
+    await recording.prepareToRecordAsync(RECORDING_OPTIONS)
+    await recording.startAsync()
+    recordingRef.current = recording
+    setState('listening')
+    // Auto-stop after MAX_RECORD_MS
+    autoStopRef.current = setTimeout(stopAndProcess, MAX_RECORD_MS)
+  }, [onError, stopAndProcess])
+  const stopListening = useCallback(async () => {
+    await stopAndProcess()
+  }, [stopAndProcess])
+  const toggle = useCallback(async () => {
+    if (state === 'listening') {
+      await stopListening()
+    } else if (state === 'idle' || state === 'error') {
+      await startListening()
+    }
+  }, [state, startListening, stopListening])
+  // Cleanup on unmount
+  useEffect(() => {
+    return () => {
+      if (autoStopRef.current) clearTimeout(autoStopRef.current)
+      recordingRef.current?.stopAndUnloadAsync().catch(() => undefined)
+    }
+  }, [])
+  return { state, transcript, message, toggle, isReady }
+}

package/src/useVoiceLayerScreen.js ADDED Viewed

@@ -0,0 +1,47 @@
+/**
+ * useVoiceLayerScreen — optional hook for per-screen voice context.
+ *
+ * Call this inside any screen component to give VoiceLayer richer context.
+ * The SDK works perfectly without it — this hook makes intent resolution
+ * and chips smarter for that specific screen.
+ *
+ * Usage (minimal):
+ *   useVoiceLayerScreen({ hints: ['add customer', 'search', 'export'] })
+ *
+ * Usage (full):
+ *   useVoiceLayerScreen({
+ *     hints:    ['filter orders', 'mark delivered', 'view details'],
+ *     elements: ['Pending (12)', 'Delivered (8)', 'Search bar'],
+ *   })
+ *
+ * Rules:
+ *   hints    — 3–6 short action phrases for this screen
+ *   elements — labels of visible interactive elements (optional)
+ *   Arrays can be defined inline — the hook handles ref stability.
+ */
+import { useEffect, useRef } from 'react';
+import { useNavigationState } from '@react-navigation/native';
+import { screenRegistry }    from './screenRegistry';
+export function useVoiceLayerScreen({ hints = [], elements = [] } = {}) {
+  // Auto-detect current screen name from navigation state
+  const routeName = useNavigationState(state =>
+    state?.routes[state.index]?.name ?? null
+  );
+  // Refs avoid re-firing the effect when arrays are redefined inline
+  const hintsRef    = useRef(hints);
+  const elementsRef = useRef(elements);
+  hintsRef.current    = hints;
+  elementsRef.current = elements;
+  useEffect(() => {
+    if (!routeName) return;
+    screenRegistry.register(routeName, {
+      hints:    hintsRef.current,
+      elements: elementsRef.current,
+    });
+    return () => screenRegistry.unregister(routeName);
+  }, [routeName]);
+}