@untemps/react-vocal 2.0.0-beta.3 → 2.0.0-beta.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +7 -0
- package/README.md +18 -17
- package/dev/src/index.jsx +39 -26
- package/dist/index.es.js +314 -293
- package/dist/index.es.js.map +1 -1
- package/dist/index.js +2 -2
- package/dist/index.js.map +1 -1
- package/dist/index.umd.js +2 -2
- package/dist/index.umd.js.map +1 -1
- package/package.json +1 -1
- package/src/components/Vocal.jsx +25 -9
- package/src/components/__tests__/Vocal.test.jsx +144 -9
- package/src/hooks/__tests__/useCommands.test.js +25 -0
- package/src/hooks/__tests__/useVocal.test.js +6 -1
- package/src/hooks/useCommands.js +34 -6
- package/src/hooks/useVocal.js +3 -3
- package/vitest.setup.js +3 -0
package/package.json
CHANGED
package/src/components/Vocal.jsx
CHANGED
|
@@ -8,12 +8,22 @@ import useCommands from '../hooks/useCommands'
|
|
|
8
8
|
|
|
9
9
|
import Icon from './Icon'
|
|
10
10
|
|
|
11
|
+
const tryMatchCommand = (segmentData, trigger) => {
|
|
12
|
+
for (const { alternatives } of segmentData) {
|
|
13
|
+
for (const a of alternatives) {
|
|
14
|
+
if (trigger(a) !== null) return
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
|
|
11
19
|
const Vocal = ({
|
|
12
20
|
children,
|
|
13
21
|
commands = null,
|
|
14
22
|
lang = 'en-US',
|
|
15
23
|
grammars = null,
|
|
16
24
|
timeout = 3000,
|
|
25
|
+
precision = 0.4, // Fuse.js score threshold for phrase commands only; single-word commands always use exact lookup
|
|
26
|
+
maxAlternatives = 1,
|
|
17
27
|
ariaLabel = 'start recognition',
|
|
18
28
|
style = null,
|
|
19
29
|
className = null,
|
|
@@ -30,8 +40,8 @@ const Vocal = ({
|
|
|
30
40
|
const buttonRef = useRef(null)
|
|
31
41
|
const [isListening, setIsListening] = useState(false)
|
|
32
42
|
|
|
33
|
-
const [, { start, stop, subscribe, unsubscribe }] = useVocal(lang, grammars, __rsInstance)
|
|
34
|
-
const triggerCommand = useCommands(commands)
|
|
43
|
+
const [, { start, stop, subscribe, unsubscribe }] = useVocal(lang, grammars, maxAlternatives, __rsInstance)
|
|
44
|
+
const triggerCommand = useCommands(commands, precision)
|
|
35
45
|
|
|
36
46
|
const propsRef = useRef({})
|
|
37
47
|
propsRef.current = { onStart, onEnd, onSpeechStart, onSpeechEnd, onResult, onError, onNoMatch }
|
|
@@ -52,7 +62,6 @@ const Vocal = ({
|
|
|
52
62
|
stop()
|
|
53
63
|
} catch (error) {
|
|
54
64
|
propsRef.current.onError?.(error)
|
|
55
|
-
} finally {
|
|
56
65
|
unsubscribeAllRef.current?.()
|
|
57
66
|
}
|
|
58
67
|
}, [stop])
|
|
@@ -83,20 +92,23 @@ const Vocal = ({
|
|
|
83
92
|
|
|
84
93
|
const _onResult = useCallback(
|
|
85
94
|
(event) => {
|
|
86
|
-
const
|
|
95
|
+
const segmentData = Array.from(event?.results ?? [], (segment) => {
|
|
87
96
|
let best = { confidence: -Infinity, transcript: '' }
|
|
97
|
+
const alternatives = []
|
|
88
98
|
for (let j = 0; j < segment.length; j++) {
|
|
89
99
|
const alt = segment[j]
|
|
100
|
+
alternatives.push(alt.transcript ?? '')
|
|
90
101
|
if (alt.confidence === undefined || alt.confidence > best.confidence) {
|
|
91
102
|
best = alt
|
|
92
103
|
}
|
|
93
104
|
}
|
|
94
|
-
return best.transcript ?? ''
|
|
95
|
-
})
|
|
105
|
+
return { best: best.transcript ?? '', alternatives }
|
|
106
|
+
})
|
|
107
|
+
const transcript = segmentData.map((s) => s.best).join('')
|
|
96
108
|
|
|
97
109
|
stopTimer()
|
|
98
110
|
stopRecognition()
|
|
99
|
-
triggerCommandRef.current
|
|
111
|
+
tryMatchCommand(segmentData, triggerCommandRef.current)
|
|
100
112
|
propsRef.current.onResult?.(transcript, event)
|
|
101
113
|
},
|
|
102
114
|
[stopTimer, stopRecognition]
|
|
@@ -122,8 +134,12 @@ const Vocal = ({
|
|
|
122
134
|
const _onEnd = useCallback(
|
|
123
135
|
(e) => {
|
|
124
136
|
stopTimer()
|
|
125
|
-
|
|
126
|
-
|
|
137
|
+
try {
|
|
138
|
+
stopRecognition()
|
|
139
|
+
unsubscribeAllRef.current?.()
|
|
140
|
+
} finally {
|
|
141
|
+
propsRef.current.onEnd?.(e)
|
|
142
|
+
}
|
|
127
143
|
},
|
|
128
144
|
[stopTimer, stopRecognition]
|
|
129
145
|
)
|
|
@@ -428,7 +428,92 @@ describe('Vocal', () => {
|
|
|
428
428
|
expect(onErrorV1).not.toHaveBeenCalled()
|
|
429
429
|
})
|
|
430
430
|
|
|
431
|
-
it('
|
|
431
|
+
it('triggers command matched on first segment in multi-segment result', async () => {
|
|
432
|
+
const callback = vi.fn()
|
|
433
|
+
const recognition = new SpeechRecognitionWrapper()
|
|
434
|
+
const commands = { hello: callback }
|
|
435
|
+
const { getByTestId } = render(getInstance({ __rsInstance: recognition, commands }))
|
|
436
|
+
|
|
437
|
+
await act(async () => {
|
|
438
|
+
fireEvent.click(getByTestId('__vocal-root__'))
|
|
439
|
+
recognition.instance.say([
|
|
440
|
+
[{ transcript: 'hello', confidence: 0.9 }],
|
|
441
|
+
[{ transcript: 'world', confidence: 0.8 }],
|
|
442
|
+
])
|
|
443
|
+
await waitFor(() => expect(callback).toHaveBeenCalledWith('hello'))
|
|
444
|
+
})
|
|
445
|
+
})
|
|
446
|
+
|
|
447
|
+
it('triggers command matched on second segment in multi-segment result', async () => {
|
|
448
|
+
const callback = vi.fn()
|
|
449
|
+
const recognition = new SpeechRecognitionWrapper()
|
|
450
|
+
const commands = { world: callback }
|
|
451
|
+
const { getByTestId } = render(getInstance({ __rsInstance: recognition, commands }))
|
|
452
|
+
|
|
453
|
+
await act(async () => {
|
|
454
|
+
fireEvent.click(getByTestId('__vocal-root__'))
|
|
455
|
+
recognition.instance.say([
|
|
456
|
+
[{ transcript: 'hello', confidence: 0.9 }],
|
|
457
|
+
[{ transcript: 'world', confidence: 0.8 }],
|
|
458
|
+
])
|
|
459
|
+
await waitFor(() => expect(callback).toHaveBeenCalledWith('world'))
|
|
460
|
+
})
|
|
461
|
+
})
|
|
462
|
+
|
|
463
|
+
it('does not trigger command when no segment matches', async () => {
|
|
464
|
+
const callback = vi.fn()
|
|
465
|
+
const recognition = new SpeechRecognitionWrapper()
|
|
466
|
+
const commands = { foo: callback }
|
|
467
|
+
const { getByTestId } = render(getInstance({ __rsInstance: recognition, commands }))
|
|
468
|
+
|
|
469
|
+
await act(async () => {
|
|
470
|
+
fireEvent.click(getByTestId('__vocal-root__'))
|
|
471
|
+
recognition.instance.say([
|
|
472
|
+
[{ transcript: 'hello', confidence: 0.9 }],
|
|
473
|
+
[{ transcript: 'world', confidence: 0.8 }],
|
|
474
|
+
])
|
|
475
|
+
await new Promise((r) => setTimeout(r, 100))
|
|
476
|
+
})
|
|
477
|
+
|
|
478
|
+
expect(callback).not.toHaveBeenCalled()
|
|
479
|
+
})
|
|
480
|
+
|
|
481
|
+
it('fires only the first matching command when multiple segments each match a different command', async () => {
|
|
482
|
+
const callbackHello = vi.fn()
|
|
483
|
+
const callbackWorld = vi.fn()
|
|
484
|
+
const recognition = new SpeechRecognitionWrapper()
|
|
485
|
+
const commands = { hello: callbackHello, world: callbackWorld }
|
|
486
|
+
const { getByTestId } = render(getInstance({ __rsInstance: recognition, commands }))
|
|
487
|
+
|
|
488
|
+
await act(async () => {
|
|
489
|
+
fireEvent.click(getByTestId('__vocal-root__'))
|
|
490
|
+
recognition.instance.say([
|
|
491
|
+
[{ transcript: 'hello', confidence: 0.9 }],
|
|
492
|
+
[{ transcript: 'world', confidence: 0.8 }],
|
|
493
|
+
])
|
|
494
|
+
await waitFor(() => expect(callbackHello).toHaveBeenCalledWith('hello'))
|
|
495
|
+
})
|
|
496
|
+
|
|
497
|
+
expect(callbackWorld).not.toHaveBeenCalled()
|
|
498
|
+
})
|
|
499
|
+
|
|
500
|
+
it('passes full joined transcript to onResult regardless of command segment matching', async () => {
|
|
501
|
+
const onResult = vi.fn()
|
|
502
|
+
const recognition = new SpeechRecognitionWrapper()
|
|
503
|
+
const commands = { hello: vi.fn() }
|
|
504
|
+
const { getByTestId } = render(getInstance({ __rsInstance: recognition, commands, onResult }))
|
|
505
|
+
|
|
506
|
+
await act(async () => {
|
|
507
|
+
fireEvent.click(getByTestId('__vocal-root__'))
|
|
508
|
+
recognition.instance.say([
|
|
509
|
+
[{ transcript: 'hello ', confidence: 0.9 }],
|
|
510
|
+
[{ transcript: 'world', confidence: 0.8 }],
|
|
511
|
+
])
|
|
512
|
+
await waitFor(() => expect(onResult).toHaveBeenCalledWith('hello world', expect.anything()))
|
|
513
|
+
})
|
|
514
|
+
})
|
|
515
|
+
|
|
516
|
+
it('returns the most confident alternative as the onResult transcript', async () => {
|
|
432
517
|
const onResult = vi.fn()
|
|
433
518
|
const recognition = new SpeechRecognitionWrapper()
|
|
434
519
|
const { getByTestId } = render(getInstance({ __rsInstance: recognition, onResult }))
|
|
@@ -444,7 +529,7 @@ describe('Vocal', () => {
|
|
|
444
529
|
})
|
|
445
530
|
})
|
|
446
531
|
|
|
447
|
-
it('joins all segments
|
|
532
|
+
it('joins all segments into the onResult transcript', async () => {
|
|
448
533
|
const onResult = vi.fn()
|
|
449
534
|
const recognition = new SpeechRecognitionWrapper()
|
|
450
535
|
const { getByTestId } = render(getInstance({ __rsInstance: recognition, onResult }))
|
|
@@ -459,18 +544,68 @@ describe('Vocal', () => {
|
|
|
459
544
|
})
|
|
460
545
|
})
|
|
461
546
|
|
|
462
|
-
it('
|
|
547
|
+
it('triggers command matched on a word within a multi-word segment', async () => {
|
|
548
|
+
const callback = vi.fn()
|
|
549
|
+
const recognition = new SpeechRecognitionWrapper()
|
|
550
|
+
const commands = { rouge: callback }
|
|
551
|
+
const { getByTestId } = render(getInstance({ __rsInstance: recognition, commands }))
|
|
552
|
+
|
|
553
|
+
await act(async () => {
|
|
554
|
+
fireEvent.click(getByTestId('__vocal-root__'))
|
|
555
|
+
recognition.instance.say([[{ transcript: 'je veux du rouge', confidence: 0.9 }]])
|
|
556
|
+
await waitFor(() => expect(callback).toHaveBeenCalledWith('rouge'))
|
|
557
|
+
})
|
|
558
|
+
})
|
|
559
|
+
|
|
560
|
+
it('triggers command matched on a secondary alternative (homophone)', async () => {
|
|
561
|
+
const callback = vi.fn()
|
|
562
|
+
const recognition = new SpeechRecognitionWrapper()
|
|
563
|
+
const commands = { vert: callback }
|
|
564
|
+
const { getByTestId } = render(getInstance({ __rsInstance: recognition, commands, maxAlternatives: 3 }))
|
|
565
|
+
|
|
566
|
+
await act(async () => {
|
|
567
|
+
fireEvent.click(getByTestId('__vocal-root__'))
|
|
568
|
+
// Primary alternative is the homophone; secondary is the correct word
|
|
569
|
+
recognition.instance.say([[
|
|
570
|
+
{ transcript: 'verre', confidence: 0.9 },
|
|
571
|
+
{ transcript: 'vert', confidence: 0.7 },
|
|
572
|
+
]])
|
|
573
|
+
await waitFor(() => expect(callback).toHaveBeenCalledWith('vert'))
|
|
574
|
+
})
|
|
575
|
+
})
|
|
576
|
+
|
|
577
|
+
it('passes the most confident transcript to onResult even when command matches a secondary alternative', async () => {
|
|
463
578
|
const onResult = vi.fn()
|
|
464
579
|
const recognition = new SpeechRecognitionWrapper()
|
|
465
|
-
const
|
|
580
|
+
const commands = { vert: vi.fn() }
|
|
581
|
+
const { getByTestId } = render(getInstance({ __rsInstance: recognition, commands, onResult, maxAlternatives: 3 }))
|
|
466
582
|
|
|
467
583
|
await act(async () => {
|
|
468
584
|
fireEvent.click(getByTestId('__vocal-root__'))
|
|
469
|
-
recognition.instance.say([
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
])
|
|
473
|
-
await waitFor(() => expect(onResult).toHaveBeenCalledWith('
|
|
585
|
+
recognition.instance.say([[
|
|
586
|
+
{ transcript: 'verre', confidence: 0.9 },
|
|
587
|
+
{ transcript: 'vert', confidence: 0.7 },
|
|
588
|
+
]])
|
|
589
|
+
await waitFor(() => expect(onResult).toHaveBeenCalledWith('verre', expect.anything()))
|
|
590
|
+
})
|
|
591
|
+
})
|
|
592
|
+
|
|
593
|
+
it('calls onEnd via the end event when stop is asynchronous', async () => {
|
|
594
|
+
const onEnd = vi.fn()
|
|
595
|
+
const recognition = new SpeechRecognitionWrapper()
|
|
596
|
+
const { getByTestId } = render(getInstance({ __rsInstance: recognition, onEnd }))
|
|
597
|
+
|
|
598
|
+
// Simulate async stop: override stop() so the end event does not fire immediately
|
|
599
|
+
recognition.instance.stop = vi.fn()
|
|
600
|
+
|
|
601
|
+
await act(async () => {
|
|
602
|
+
fireEvent.click(getByTestId('__vocal-root__'))
|
|
603
|
+
recognition.instance.say('Foo')
|
|
604
|
+
// stopRecognition was called but end has not fired yet — onEnd must not be called
|
|
605
|
+
expect(onEnd).not.toHaveBeenCalled()
|
|
606
|
+
// Browser fires end asynchronously after recognition stops
|
|
607
|
+
recognition.instance.end()
|
|
608
|
+
await waitFor(() => expect(onEnd).toHaveBeenCalled())
|
|
474
609
|
})
|
|
475
610
|
})
|
|
476
611
|
})
|
|
@@ -61,4 +61,29 @@ describe('useCommands', () => {
|
|
|
61
61
|
} = renderHook(() => useCommands(commands))
|
|
62
62
|
expect(triggerCommand('gag')).toBeNull()
|
|
63
63
|
})
|
|
64
|
+
|
|
65
|
+
it('triggers all registered commands when multiple commands are defined', () => {
|
|
66
|
+
const commands = {
|
|
67
|
+
rouge: () => 'red',
|
|
68
|
+
bleu: () => 'blue',
|
|
69
|
+
jaune: () => 'yellow',
|
|
70
|
+
}
|
|
71
|
+
const {
|
|
72
|
+
result: { current: triggerCommand },
|
|
73
|
+
} = renderHook(() => useCommands(commands))
|
|
74
|
+
expect(triggerCommand('rouge')).toBe('red')
|
|
75
|
+
expect(triggerCommand('bleu')).toBe('blue')
|
|
76
|
+
expect(triggerCommand('jaune')).toBe('yellow')
|
|
77
|
+
})
|
|
78
|
+
|
|
79
|
+
it('does not match near-homophones with strict precision — rely on maxAlternatives instead', () => {
|
|
80
|
+
const commands = { vert: () => 'green' }
|
|
81
|
+
const {
|
|
82
|
+
result: { current: triggerCommand },
|
|
83
|
+
} = renderHook(() => useCommands(commands))
|
|
84
|
+
// 'verre' scores 0.4 against 'vert' — not strictly < STRICT_PRECISION (0.4)
|
|
85
|
+
expect(triggerCommand('verre')).toBeNull()
|
|
86
|
+
// The engine surfaces 'vert' as a secondary alternative (score 0) — exact match
|
|
87
|
+
expect(triggerCommand('vert')).toBe('green')
|
|
88
|
+
})
|
|
64
89
|
})
|
|
@@ -124,13 +124,18 @@ describe('useVocal', () => {
|
|
|
124
124
|
expect(ref.current).toBeDefined()
|
|
125
125
|
})
|
|
126
126
|
|
|
127
|
+
it('passes maxAlternatives to SpeechRecognitionWrapper constructor', () => {
|
|
128
|
+
renderHook(() => useVocal('en-US', null, 5))
|
|
129
|
+
expect(SpeechRecognitionWrapper).toHaveBeenCalledWith({ lang: 'en-US', grammars: null, maxAlternatives: 5 })
|
|
130
|
+
})
|
|
131
|
+
|
|
127
132
|
it('uses custom SpeechRecognition instance', () => {
|
|
128
133
|
const foo = new SpeechRecognitionWrapper()
|
|
129
134
|
const {
|
|
130
135
|
result: {
|
|
131
136
|
current: [ref],
|
|
132
137
|
},
|
|
133
|
-
} = renderHook(() => useVocal(null, null, foo))
|
|
138
|
+
} = renderHook(() => useVocal(null, null, 1, foo))
|
|
134
139
|
expect(ref.current).toBe(foo)
|
|
135
140
|
})
|
|
136
141
|
|
package/src/hooks/useCommands.js
CHANGED
|
@@ -1,16 +1,44 @@
|
|
|
1
|
+
import { useMemo } from 'react'
|
|
1
2
|
import Fuse from 'fuse.js'
|
|
2
3
|
|
|
3
4
|
const useCommands = (commands, precision = 0.4) => {
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
5
|
+
const normalized = useMemo(
|
|
6
|
+
() =>
|
|
7
|
+
!!commands
|
|
8
|
+
? Object.entries(commands).reduce((acc, [key, value]) => ({ ...acc, [key.toLowerCase()]: value }), {})
|
|
9
|
+
: {},
|
|
10
|
+
[commands]
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
const keys = useMemo(() => Object.keys(normalized), [normalized])
|
|
14
|
+
|
|
15
|
+
// Fuzzy matching is only needed for phrase command keys.
|
|
16
|
+
// Single-word keys use exact case-insensitive lookup — simpler and no false positives.
|
|
17
|
+
const hasPhraseKeys = useMemo(() => keys.some((k) => k.includes(' ')), [keys])
|
|
18
|
+
|
|
19
|
+
// precision only applies to phrase keys — single-word keys always use exact lookup
|
|
20
|
+
const fuse = useMemo(
|
|
21
|
+
() => (hasPhraseKeys ? new Fuse(keys, { includeScore: true, ignoreLocation: true }) : null),
|
|
22
|
+
[hasPhraseKeys, keys]
|
|
23
|
+
)
|
|
7
24
|
|
|
8
25
|
const triggerCommand = (input) => {
|
|
9
|
-
|
|
26
|
+
if (!keys.length) return null
|
|
27
|
+
|
|
28
|
+
if (!hasPhraseKeys) {
|
|
29
|
+
const words = input.trim().split(/\s+/)
|
|
30
|
+
const targets = words.length > 1 ? words : [input.trim()]
|
|
31
|
+
for (const w of targets) {
|
|
32
|
+
const key = w.toLowerCase()
|
|
33
|
+
if (key in normalized) return normalized[key]?.(w)
|
|
34
|
+
}
|
|
35
|
+
return null
|
|
36
|
+
}
|
|
37
|
+
|
|
10
38
|
const result = fuse.search(input).filter((r) => r.score < precision)
|
|
11
|
-
if (
|
|
39
|
+
if (result?.length) {
|
|
12
40
|
const key = result[0].item.toLowerCase()
|
|
13
|
-
return
|
|
41
|
+
return normalized[key]?.(input)
|
|
14
42
|
}
|
|
15
43
|
return null
|
|
16
44
|
}
|
package/src/hooks/useVocal.js
CHANGED
|
@@ -1,18 +1,18 @@
|
|
|
1
1
|
import { useCallback, useEffect, useRef } from 'react'
|
|
2
2
|
import { Vocal as SpeechRecognitionWrapper } from '@untemps/vocal'
|
|
3
3
|
|
|
4
|
-
const useVocal = (lang = 'en-US', grammars = null, __rsInstance = null) => {
|
|
4
|
+
const useVocal = (lang = 'en-US', grammars = null, maxAlternatives = 1, __rsInstance = null) => {
|
|
5
5
|
const ref = useRef(null)
|
|
6
6
|
|
|
7
7
|
useEffect(() => {
|
|
8
8
|
if (SpeechRecognitionWrapper.isSupported) {
|
|
9
|
-
ref.current = __rsInstance || new SpeechRecognitionWrapper({ lang, grammars })
|
|
9
|
+
ref.current = __rsInstance || new SpeechRecognitionWrapper({ lang, grammars, maxAlternatives })
|
|
10
10
|
return () => {
|
|
11
11
|
ref.current.abort()
|
|
12
12
|
ref.current.cleanup()
|
|
13
13
|
}
|
|
14
14
|
}
|
|
15
|
-
}, [lang, grammars, __rsInstance])
|
|
15
|
+
}, [lang, grammars, maxAlternatives, __rsInstance])
|
|
16
16
|
|
|
17
17
|
const start = useCallback(() => {
|
|
18
18
|
if (ref.current) {
|