voicelayer-rn 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json ADDED
@@ -0,0 +1,18 @@
1
+ {
2
+ "name": "voicelayer-rn",
3
+ "version": "0.1.0",
4
+ "description": "Voice intelligence for React Native — Hindi, Hinglish, English. Zero config.",
5
+ "main": "src/index.js",
6
+ "license": "MIT",
7
+ "repository": {
8
+ "type": "git",
9
+ "url": "https://github.com/voicelayer/voicelayer-sdk"
10
+ },
11
+ "keywords": ["react-native", "voice", "navigation", "hindi", "hinglish", "sdk"],
12
+ "peerDependencies": {
13
+ "@react-navigation/native": ">=6.0.0",
14
+ "react": ">=18.0.0",
15
+ "react-native": ">=0.70.0"
16
+ },
17
+ "files": ["src/"]
18
+ }
@@ -0,0 +1,170 @@
1
+ // ─────────────────────────────────────────────────────────────────────────────
2
+ // VoiceLayer RN — LearningStore
3
+ //
4
+ // Persists resolved voice commands in AsyncStorage (→ UserDefaults on iOS,
5
+ // SharedPreferences on Android). Uses the same Levenshtein fuzzy matching
6
+ // as the web SDK so learned commands survive app restarts.
7
+ //
8
+ // Fast path: findMatch() — fully in-memory, synchronous after hydration
9
+ // Slow path: store() — writes to AsyncStorage in the background
10
+ // ─────────────────────────────────────────────────────────────────────────────
11
+
12
+ import AsyncStorage from '@react-native-async-storage/async-storage'
13
+ import type { StoredCommand, VoiceResult } from './types'
14
+
15
+ const MATCH_THRESHOLD = 0.20 // max edit distance as fraction of transcript length
16
+ const SAME_SCREEN_BONUS = 0.04 // tighten threshold if on the same screen
17
+ const MAX_ENTRIES = 300
18
+ const DECAY_DAYS = 60
19
+
20
+ export class LearningStore {
21
+ private cache: StoredCommand[] = []
22
+ private readonly key: string
23
+ private readonly readyPromise: Promise<void>
24
+
25
+ constructor(appId: string) {
26
+ this.key = `vl_learning_rn_${appId}`
27
+ this.readyPromise = this.hydrate()
28
+ }
29
+
30
+ /** Wait for AsyncStorage hydration before first use. */
31
+ ready(): Promise<void> {
32
+ return this.readyPromise
33
+ }
34
+
35
+ // ── Public API ──────────────────────────────────────────────────────────────
36
+
37
+ /**
38
+ * Synchronous after ready() resolves.
39
+ * Returns the best matching stored command, or null if none passes the
40
+ * similarity threshold.
41
+ */
42
+ findMatch(transcript: string, screenName?: string): StoredCommand | null {
43
+ const normalised = LearningStore.normalise(transcript)
44
+ let best: { cmd: StoredCommand; dist: number } | null = null
45
+
46
+ for (const cmd of this.cache) {
47
+ const threshold = Math.floor(
48
+ normalised.length * (MATCH_THRESHOLD - (screenName ? SAME_SCREEN_BONUS : 0)),
49
+ )
50
+ const dist = LearningStore.levenshtein(normalised, cmd.transcript)
51
+ if (dist <= threshold) {
52
+ if (!best || dist < best.dist) best = { cmd, dist }
53
+ }
54
+ }
55
+
56
+ if (best) {
57
+ best.cmd.hitCount++
58
+ best.cmd.lastUsed = Date.now()
59
+ this.persist() // fire-and-forget
60
+ }
61
+
62
+ return best?.cmd ?? null
63
+ }
64
+
65
+ /** Store a newly resolved command. Deduplicates and prunes old entries. */
66
+ async store(
67
+ originalTranscript: string,
68
+ result: VoiceResult,
69
+ screenName?: string,
70
+ ): Promise<void> {
71
+ const normalised = LearningStore.normalise(originalTranscript)
72
+
73
+ // Deduplicate — skip if very similar to an existing entry
74
+ const isDuplicate = this.cache.some((cmd) => {
75
+ const dist = LearningStore.levenshtein(normalised, cmd.transcript)
76
+ return dist <= Math.floor(normalised.length * 0.05)
77
+ })
78
+ if (isDuplicate) return
79
+
80
+ this.cache.push({
81
+ transcript: normalised,
82
+ original: originalTranscript,
83
+ actionId: result.actionId,
84
+ params: result.params,
85
+ speak: result.speak,
86
+ hitCount: 1,
87
+ lastUsed: Date.now(),
88
+ confidence: result.confidence,
89
+ })
90
+
91
+ this.prune()
92
+ await this.persist()
93
+ }
94
+
95
+ /** Seed multiple variant transcripts for the same action (from VariantGenerator). */
96
+ async bulkStore(
97
+ variants: string[],
98
+ actionId: string,
99
+ speak: string,
100
+ confidence: number,
101
+ ): Promise<void> {
102
+ for (const v of variants) {
103
+ if (v.trim().length < 4) continue
104
+ await this.store(v, { actionId, params: {}, speak, confidence })
105
+ }
106
+ }
107
+
108
+ // ── Private ─────────────────────────────────────────────────────────────────
109
+
110
+ private async hydrate(): Promise<void> {
111
+ try {
112
+ const raw = await AsyncStorage.getItem(this.key)
113
+ if (raw) this.cache = JSON.parse(raw)
114
+ } catch {
115
+ // Corrupt storage — start fresh
116
+ this.cache = []
117
+ }
118
+ }
119
+
120
+ private async persist(): Promise<void> {
121
+ try {
122
+ await AsyncStorage.setItem(this.key, JSON.stringify(this.cache))
123
+ } catch { /* storage full — non-fatal */ }
124
+ }
125
+
126
+ private prune(): void {
127
+ const cutoff = Date.now() - DECAY_DAYS * 86_400_000
128
+ this.cache = this.cache
129
+ .filter((c) => c.lastUsed > cutoff)
130
+ .sort((a, b) => (b.hitCount * 10 + b.lastUsed / 1e10) - (a.hitCount * 10 + a.lastUsed / 1e10))
131
+ .slice(0, MAX_ENTRIES)
132
+ }
133
+
134
+ // ── Text normalisation ──────────────────────────────────────────────────────
135
+
136
+ static normalise(text: string): string {
137
+ return text
138
+ .toLowerCase()
139
+ // Strip punctuation but keep Devanagari and Latin word chars
140
+ .replace(/[^\w\sऀ-ॿ]/g, ' ')
141
+ // Remove Hindi filler words
142
+ .replace(/\b(hai|hain|toh|ji|ok|okay|please|kya|aap|mujhe|na|kar|do|de|meri|mera)\b/g, ' ')
143
+ .replace(/\s+/g, ' ')
144
+ .trim()
145
+ }
146
+
147
+ // ── Iterative Levenshtein — O(m×n) time, O(min(m,n)) space ────────────────
148
+
149
+ static levenshtein(a: string, b: string): number {
150
+ if (a === b) return 0
151
+ if (a.length === 0) return b.length
152
+ if (b.length === 0) return a.length
153
+
154
+ if (a.length > b.length) [a, b] = [b, a]
155
+
156
+ const row = Array.from({ length: a.length + 1 }, (_, i) => i)
157
+ for (let j = 1; j <= b.length; j++) {
158
+ let prev = j
159
+ for (let i = 1; i <= a.length; i++) {
160
+ const val = b[j - 1] === a[i - 1]
161
+ ? row[i - 1]
162
+ : 1 + Math.min(row[i - 1], row[i], prev)
163
+ row[i - 1] = prev
164
+ prev = val
165
+ }
166
+ row[a.length] = prev
167
+ }
168
+ return row[a.length]
169
+ }
170
+ }
@@ -0,0 +1,184 @@
1
+ // ─────────────────────────────────────────────────────────────────────────────
2
+ // VoiceLayer RN — MicButton
3
+ //
4
+ // Animated floating mic button. Pulses while listening, spins while processing.
5
+ // ─────────────────────────────────────────────────────────────────────────────
6
+
7
+ import React, { useEffect, useRef } from 'react'
8
+ import {
9
+ Animated,
10
+ Pressable,
11
+ StyleSheet,
12
+ Text,
13
+ View,
14
+ ActivityIndicator,
15
+ } from 'react-native'
16
+ import type { VoiceState } from './types'
17
+
18
+ interface MicButtonProps {
19
+ state: VoiceState
20
+ onPress: () => void
21
+ color?: string
22
+ position?: 'bottom-right' | 'bottom-left' | 'bottom-center'
23
+ transcript?: string
24
+ message?: string
25
+ }
26
+
27
+ const ICON: Record<VoiceState, string> = {
28
+ idle: '🎙',
29
+ listening: '⏹', // tap to stop
30
+ processing: '…',
31
+ speaking: '🔊',
32
+ error: '⚠️',
33
+ }
34
+
35
+ export const MicButton: React.FC<MicButtonProps> = ({
36
+ state,
37
+ onPress,
38
+ color = '#6C63FF',
39
+ position = 'bottom-right',
40
+ transcript = '',
41
+ message = '',
42
+ }) => {
43
+ const pulse = useRef(new Animated.Value(1)).current
44
+ const pulseAnim = useRef<Animated.CompositeAnimation | null>(null)
45
+
46
+ // Pulse animation while listening
47
+ useEffect(() => {
48
+ if (state === 'listening') {
49
+ pulseAnim.current = Animated.loop(
50
+ Animated.sequence([
51
+ Animated.timing(pulse, { toValue: 1.25, duration: 600, useNativeDriver: true }),
52
+ Animated.timing(pulse, { toValue: 1.0, duration: 600, useNativeDriver: true }),
53
+ ]),
54
+ )
55
+ pulseAnim.current.start()
56
+ } else {
57
+ pulseAnim.current?.stop()
58
+ Animated.spring(pulse, { toValue: 1, useNativeDriver: true }).start()
59
+ }
60
+ }, [state, pulse])
61
+
62
+ const containerStyle = [
63
+ styles.container,
64
+ position === 'bottom-right' && styles.bottomRight,
65
+ position === 'bottom-left' && styles.bottomLeft,
66
+ position === 'bottom-center' && styles.bottomCenter,
67
+ ]
68
+
69
+ const showLabel = state === 'listening' || state === 'speaking' || state === 'processing'
70
+ const label = state === 'listening'
71
+ ? (transcript || 'Listening…')
72
+ : state === 'speaking' || state === 'processing'
73
+ ? (message || 'Processing…')
74
+ : ''
75
+
76
+ return (
77
+ <View style={containerStyle}>
78
+ {/* Transcript / response overlay */}
79
+ {showLabel && label ? (
80
+ <View style={styles.label}>
81
+ <Text style={styles.labelText} numberOfLines={3}>{label}</Text>
82
+ </View>
83
+ ) : null}
84
+
85
+ {/* Pulse ring behind button while listening */}
86
+ {state === 'listening' && (
87
+ <Animated.View
88
+ style={[
89
+ styles.ring,
90
+ { borderColor: color, transform: [{ scale: pulse }] },
91
+ ]}
92
+ />
93
+ )}
94
+
95
+ <Pressable
96
+ onPress={onPress}
97
+ disabled={state === 'processing' || state === 'speaking'}
98
+ style={({ pressed }) => [
99
+ styles.button,
100
+ { backgroundColor: state === 'error' ? '#EF4444' : color },
101
+ pressed && styles.buttonPressed,
102
+ ]}
103
+ accessibilityLabel="VoiceLayer mic button"
104
+ accessibilityHint="Tap to start or stop voice command"
105
+ accessibilityRole="button"
106
+ >
107
+ <Animated.View style={{ transform: [{ scale: state === 'listening' ? pulse : 1 }] }}>
108
+ {state === 'processing' ? (
109
+ <ActivityIndicator color="#fff" size="small" />
110
+ ) : (
111
+ <Text style={styles.icon}>{ICON[state]}</Text>
112
+ )}
113
+ </Animated.View>
114
+ </Pressable>
115
+ </View>
116
+ )
117
+ }
118
+
119
+ const BTN = 56
120
+
121
+ const styles = StyleSheet.create({
122
+ container: {
123
+ position: 'absolute',
124
+ bottom: 24,
125
+ alignItems: 'center',
126
+ zIndex: 999,
127
+ },
128
+ bottomRight: {
129
+ right: 20,
130
+ alignItems: 'flex-end',
131
+ },
132
+ bottomLeft: {
133
+ left: 20,
134
+ alignItems: 'flex-start',
135
+ },
136
+ bottomCenter: {
137
+ alignSelf: 'center',
138
+ left: 0,
139
+ right: 0,
140
+ alignItems: 'center',
141
+ },
142
+ button: {
143
+ width: BTN,
144
+ height: BTN,
145
+ borderRadius: BTN / 2,
146
+ alignItems: 'center',
147
+ justifyContent:'center',
148
+ elevation: 6,
149
+ shadowColor: '#000',
150
+ shadowOffset: { width: 0, height: 3 },
151
+ shadowOpacity: 0.25,
152
+ shadowRadius: 4,
153
+ },
154
+ buttonPressed: {
155
+ opacity: 0.85,
156
+ transform: [{ scale: 0.95 }],
157
+ },
158
+ icon: {
159
+ fontSize: 22,
160
+ },
161
+ ring: {
162
+ position: 'absolute',
163
+ width: BTN + 16,
164
+ height: BTN + 16,
165
+ borderRadius: (BTN + 16) / 2,
166
+ borderWidth: 2,
167
+ bottom: -8,
168
+ opacity: 0.4,
169
+ },
170
+ label: {
171
+ backgroundColor: 'rgba(0,0,0,0.75)',
172
+ borderRadius: 10,
173
+ paddingHorizontal: 12,
174
+ paddingVertical: 8,
175
+ maxWidth: 220,
176
+ marginBottom: 10,
177
+ },
178
+ labelText: {
179
+ color: '#fff',
180
+ fontSize: 13,
181
+ lineHeight: 18,
182
+ textAlign: 'center',
183
+ },
184
+ })
@@ -0,0 +1,38 @@
1
+ // ─────────────────────────────────────────────────────────────────────────────
2
+ // VoiceLayer RN — TTSPlayer
3
+ //
4
+ // Uses expo-speech (on-device, free) by default.
5
+ // Falls back to ElevenLabs or OpenAI TTS via the proxy for better Hindi voice.
6
+ // ─────────────────────────────────────────────────────────────────────────────
7
+
8
+ import * as Speech from 'expo-speech'
9
+ import type { TranscriberLanguage } from './Transcriber'
10
+
11
+ export class TTSPlayer {
12
+ constructor(private readonly language: TranscriberLanguage) {}
13
+
14
+ async speak(text: string): Promise<void> {
15
+ // Stop any current speech first
16
+ await Speech.stop()
17
+
18
+ return new Promise((resolve) => {
19
+ Speech.speak(text, {
20
+ language: this.resolveLocale(),
21
+ pitch: 1.0,
22
+ rate: 0.9, // slightly slower for clarity
23
+ onDone: resolve,
24
+ onError: () => resolve(), // non-fatal
25
+ })
26
+ })
27
+ }
28
+
29
+ async stop(): Promise<void> {
30
+ await Speech.stop()
31
+ }
32
+
33
+ private resolveLocale(): string {
34
+ if (this.language === 'hi') return 'hi-IN'
35
+ if (this.language === 'en') return 'en-US'
36
+ return 'hi-IN' // default: Hindi (Repeatly's primary language)
37
+ }
38
+ }
@@ -0,0 +1,117 @@
1
+ // ─────────────────────────────────────────────────────────────────────────────
2
+ // VoiceLayer RN — Transcriber
3
+ //
4
+ // Two-tier STT:
5
+ // Primary: whisper.rn — on-device, ~100ms, no server call, works offline
6
+ // Fallback: /api/voice — server-side Whisper when model isn't loaded yet
7
+ //
8
+ // The model is a one-time 39MB download (ggml-tiny.bin). After that every
9
+ // transcription is local and instant.
10
+ // ─────────────────────────────────────────────────────────────────────────────
11
+
12
+ import * as FileSystem from 'expo-file-system'
13
+ import { initWhisper, type WhisperContext } from 'whisper.rn'
14
+
15
+ export type TranscriberLanguage = 'hi' | 'en' | 'auto'
16
+
17
+ const WHISPER_TINY_URL =
18
+ 'https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin'
19
+
20
+ const MODEL_FILENAME = 'ggml-tiny.bin'
21
+
22
+ export class Transcriber {
23
+ private ctx: WhisperContext | null = null
24
+ private modelReady = false
25
+ private initPromise: Promise<void> | null = null
26
+
27
+ constructor(
28
+ private readonly language: TranscriberLanguage,
29
+ /** Pass require('../assets/ggml-tiny.bin') if bundled, or omit to auto-download. */
30
+ private readonly modelPath?: number | string,
31
+ private readonly debug = false,
32
+ ) {}
33
+
34
+ /**
35
+ * Initialise the on-device Whisper model.
36
+ * Call this early (e.g. on component mount) to warm up before first use.
37
+ * Safe to call multiple times — initialises only once.
38
+ */
39
+ async init(): Promise<void> {
40
+ if (this.modelReady) return
41
+ if (this.initPromise) return this.initPromise
42
+
43
+ this.initPromise = this._init()
44
+ return this.initPromise
45
+ }
46
+
47
+ /** Returns true if the on-device model is ready. */
48
+ get isReady(): boolean {
49
+ return this.modelReady
50
+ }
51
+
52
+ /**
53
+ * Transcribe an audio file URI.
54
+ * Uses on-device model if ready, otherwise returns null
55
+ * (caller should fall back to server).
56
+ */
57
+ async transcribe(audioUri: string): Promise<string | null> {
58
+ if (!this.ctx) return null
59
+
60
+ try {
61
+ const { promise } = this.ctx.transcribe(audioUri, {
62
+ language: this.language === 'auto' ? undefined : this.language,
63
+ maxLen: 1,
64
+ // Suppress blank audio tokens
65
+ suppressBlank: true,
66
+ temperature: 0,
67
+ })
68
+ const { result } = await promise
69
+ return result?.trim() ?? null
70
+ } catch (err) {
71
+ if (this.debug) console.warn('[VoiceLayer/Transcriber] whisper.rn error:', err)
72
+ return null
73
+ }
74
+ }
75
+
76
+ // ── Private ─────────────────────────────────────────────────────────────────
77
+
78
+ private async _init(): Promise<void> {
79
+ try {
80
+ const filePath = await this.resolveModelPath()
81
+ if (!filePath) {
82
+ if (this.debug) console.log('[VoiceLayer/Transcriber] No model path — will use server STT')
83
+ return
84
+ }
85
+
86
+ this.ctx = await initWhisper({ filePath })
87
+ this.modelReady = true
88
+ if (this.debug) console.log('[VoiceLayer/Transcriber] on-device Whisper ready')
89
+ } catch (err) {
90
+ if (this.debug) console.warn('[VoiceLayer/Transcriber] init failed, falling back to server:', err)
91
+ }
92
+ }
93
+
94
+ private async resolveModelPath(): Promise<string | number | null> {
95
+ // Developer passed require('../assets/ggml-tiny.bin') or a file URI
96
+ if (this.modelPath !== undefined) return this.modelPath
97
+
98
+ // Auto-download to DocumentDirectory
99
+ const dest = `${FileSystem.documentDirectory}${MODEL_FILENAME}`
100
+ const info = await FileSystem.getInfoAsync(dest)
101
+ if (info.exists) return dest
102
+
103
+ if (this.debug) console.log('[VoiceLayer/Transcriber] Downloading Whisper tiny model (~39MB)…')
104
+
105
+ try {
106
+ const { status } = await FileSystem.downloadAsync(WHISPER_TINY_URL, dest)
107
+ if (status === 200) {
108
+ if (this.debug) console.log('[VoiceLayer/Transcriber] Model downloaded to', dest)
109
+ return dest
110
+ }
111
+ } catch (err) {
112
+ if (this.debug) console.warn('[VoiceLayer/Transcriber] Model download failed:', err)
113
+ }
114
+
115
+ return null
116
+ }
117
+ }
@@ -0,0 +1,51 @@
1
+ // ─────────────────────────────────────────────────────────────────────────────
2
+ // VoiceLayer RN — <VoiceLayer /> component
3
+ //
4
+ // Drop this into any screen. It renders an absolute-positioned floating mic
5
+ // button. On first voice command it hits the server; after that, everything
6
+ // runs on-device from the LearningStore cache.
7
+ //
8
+ // Usage:
9
+ // import { VoiceLayer } from 'voicelayer-rn'
10
+ //
11
+ // <VoiceLayer
12
+ // proxyUrl="https://your-server.com"
13
+ // appId="repeatly"
14
+ // screenName="CustomerList"
15
+ // language="hi"
16
+ // actions={[
17
+ // {
18
+ // id: 'show-inactive',
19
+ // description: 'Show inactive or expired subscription customers',
20
+ // onTrigger: () => setFilter('inactive'),
21
+ // },
22
+ // {
23
+ // id: 'register-customer',
24
+ // description: 'Register a new customer',
25
+ // params: ['name', 'phone', 'plan'],
26
+ // onTrigger: (params) =>
27
+ // navigation.navigate('CustomerForm', { prefill: params }),
28
+ // },
29
+ // ]}
30
+ // />
31
+ // ─────────────────────────────────────────────────────────────────────────────
32
+
33
+ import React from 'react'
34
+ import { MicButton } from './MicButton'
35
+ import { useVoiceLayer } from './useVoiceLayer'
36
+ import type { VoiceLayerProps } from './types'
37
+
38
+ export const VoiceLayer: React.FC<VoiceLayerProps> = (props) => {
39
+ const { state, transcript, message, toggle } = useVoiceLayer(props)
40
+
41
+ return (
42
+ <MicButton
43
+ state={state}
44
+ onPress={toggle}
45
+ color={props.color}
46
+ position={props.position ?? 'bottom-right'}
47
+ transcript={transcript}
48
+ message={message}
49
+ />
50
+ )
51
+ }