@untemps/react-vocal 2.0.0-beta.2 → 2.0.0-beta.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@untemps/react-vocal",
3
- "version": "2.0.0-beta.2",
3
+ "version": "2.0.0-beta.4",
4
4
  "author": "Vincent Le Badezet <v.lebadezet@untemps.net>",
5
5
  "repository": "git@github.com:untemps/react-vocal.git",
6
6
  "license": "MIT",
@@ -8,12 +8,22 @@ import useCommands from '../hooks/useCommands'
8
8
 
9
9
  import Icon from './Icon'
10
10
 
11
+ const tryMatchCommand = (segmentData, trigger) => {
12
+ for (const { alternatives } of segmentData) {
13
+ for (const a of alternatives) {
14
+ if (trigger(a) !== null) return
15
+ }
16
+ }
17
+ }
18
+
11
19
  const Vocal = ({
12
20
  children,
13
21
  commands = null,
14
22
  lang = 'en-US',
15
23
  grammars = null,
16
24
  timeout = 3000,
25
+ precision = 0.4, // Fuse.js score threshold for phrase commands only; single-word commands always use exact lookup
26
+ maxAlternatives = 1,
17
27
  ariaLabel = 'start recognition',
18
28
  style = null,
19
29
  className = null,
@@ -30,8 +40,8 @@ const Vocal = ({
30
40
  const buttonRef = useRef(null)
31
41
  const [isListening, setIsListening] = useState(false)
32
42
 
33
- const [, { start, stop, subscribe, unsubscribe }] = useVocal(lang, grammars, __rsInstance)
34
- const triggerCommand = useCommands(commands)
43
+ const [, { start, stop, subscribe, unsubscribe }] = useVocal(lang, grammars, maxAlternatives, __rsInstance)
44
+ const triggerCommand = useCommands(commands, precision)
35
45
 
36
46
  const propsRef = useRef({})
37
47
  propsRef.current = { onStart, onEnd, onSpeechStart, onSpeechEnd, onResult, onError, onNoMatch }
@@ -52,7 +62,6 @@ const Vocal = ({
52
62
  stop()
53
63
  } catch (error) {
54
64
  propsRef.current.onError?.(error)
55
- } finally {
56
65
  unsubscribeAllRef.current?.()
57
66
  }
58
67
  }, [stop])
@@ -82,11 +91,25 @@ const Vocal = ({
82
91
  )
83
92
 
84
93
  const _onResult = useCallback(
85
- (event, result) => {
94
+ (event) => {
95
+ const segmentData = Array.from(event?.results ?? [], (segment) => {
96
+ let best = { confidence: -Infinity, transcript: '' }
97
+ const alternatives = []
98
+ for (let j = 0; j < segment.length; j++) {
99
+ const alt = segment[j]
100
+ alternatives.push(alt.transcript ?? '')
101
+ if (alt.confidence === undefined || alt.confidence > best.confidence) {
102
+ best = alt
103
+ }
104
+ }
105
+ return { best: best.transcript ?? '', alternatives }
106
+ })
107
+ const transcript = segmentData.map((s) => s.best).join('')
108
+
86
109
  stopTimer()
87
110
  stopRecognition()
88
- triggerCommandRef.current(result)
89
- propsRef.current.onResult?.(result, event)
111
+ tryMatchCommand(segmentData, triggerCommandRef.current)
112
+ propsRef.current.onResult?.(transcript, event)
90
113
  },
91
114
  [stopTimer, stopRecognition]
92
115
  )
@@ -111,8 +134,12 @@ const Vocal = ({
111
134
  const _onEnd = useCallback(
112
135
  (e) => {
113
136
  stopTimer()
114
- stopRecognition()
115
- propsRef.current.onEnd?.(e)
137
+ try {
138
+ stopRecognition()
139
+ unsubscribeAllRef.current?.()
140
+ } finally {
141
+ propsRef.current.onEnd?.(e)
142
+ }
116
143
  },
117
144
  [stopTimer, stopRecognition]
118
145
  )
@@ -427,4 +427,185 @@ describe('Vocal', () => {
427
427
 
428
428
  expect(onErrorV1).not.toHaveBeenCalled()
429
429
  })
430
+
431
+ it('triggers command matched on first segment in multi-segment result', async () => {
432
+ const callback = vi.fn()
433
+ const recognition = new SpeechRecognitionWrapper()
434
+ const commands = { hello: callback }
435
+ const { getByTestId } = render(getInstance({ __rsInstance: recognition, commands }))
436
+
437
+ await act(async () => {
438
+ fireEvent.click(getByTestId('__vocal-root__'))
439
+ recognition.instance.say([
440
+ [{ transcript: 'hello', confidence: 0.9 }],
441
+ [{ transcript: 'world', confidence: 0.8 }],
442
+ ])
443
+ await waitFor(() => expect(callback).toHaveBeenCalledWith('hello'))
444
+ })
445
+ })
446
+
447
+ it('triggers command matched on second segment in multi-segment result', async () => {
448
+ const callback = vi.fn()
449
+ const recognition = new SpeechRecognitionWrapper()
450
+ const commands = { world: callback }
451
+ const { getByTestId } = render(getInstance({ __rsInstance: recognition, commands }))
452
+
453
+ await act(async () => {
454
+ fireEvent.click(getByTestId('__vocal-root__'))
455
+ recognition.instance.say([
456
+ [{ transcript: 'hello', confidence: 0.9 }],
457
+ [{ transcript: 'world', confidence: 0.8 }],
458
+ ])
459
+ await waitFor(() => expect(callback).toHaveBeenCalledWith('world'))
460
+ })
461
+ })
462
+
463
+ it('does not trigger command when no segment matches', async () => {
464
+ const callback = vi.fn()
465
+ const recognition = new SpeechRecognitionWrapper()
466
+ const commands = { foo: callback }
467
+ const { getByTestId } = render(getInstance({ __rsInstance: recognition, commands }))
468
+
469
+ await act(async () => {
470
+ fireEvent.click(getByTestId('__vocal-root__'))
471
+ recognition.instance.say([
472
+ [{ transcript: 'hello', confidence: 0.9 }],
473
+ [{ transcript: 'world', confidence: 0.8 }],
474
+ ])
475
+ await new Promise((r) => setTimeout(r, 100))
476
+ })
477
+
478
+ expect(callback).not.toHaveBeenCalled()
479
+ })
480
+
481
+ it('fires only the first matching command when multiple segments each match a different command', async () => {
482
+ const callbackHello = vi.fn()
483
+ const callbackWorld = vi.fn()
484
+ const recognition = new SpeechRecognitionWrapper()
485
+ const commands = { hello: callbackHello, world: callbackWorld }
486
+ const { getByTestId } = render(getInstance({ __rsInstance: recognition, commands }))
487
+
488
+ await act(async () => {
489
+ fireEvent.click(getByTestId('__vocal-root__'))
490
+ recognition.instance.say([
491
+ [{ transcript: 'hello', confidence: 0.9 }],
492
+ [{ transcript: 'world', confidence: 0.8 }],
493
+ ])
494
+ await waitFor(() => expect(callbackHello).toHaveBeenCalledWith('hello'))
495
+ })
496
+
497
+ expect(callbackWorld).not.toHaveBeenCalled()
498
+ })
499
+
500
+ it('passes full joined transcript to onResult regardless of command segment matching', async () => {
501
+ const onResult = vi.fn()
502
+ const recognition = new SpeechRecognitionWrapper()
503
+ const commands = { hello: vi.fn() }
504
+ const { getByTestId } = render(getInstance({ __rsInstance: recognition, commands, onResult }))
505
+
506
+ await act(async () => {
507
+ fireEvent.click(getByTestId('__vocal-root__'))
508
+ recognition.instance.say([
509
+ [{ transcript: 'hello ', confidence: 0.9 }],
510
+ [{ transcript: 'world', confidence: 0.8 }],
511
+ ])
512
+ await waitFor(() => expect(onResult).toHaveBeenCalledWith('hello world', expect.anything()))
513
+ })
514
+ })
515
+
516
+ it('returns the most confident alternative as the onResult transcript', async () => {
517
+ const onResult = vi.fn()
518
+ const recognition = new SpeechRecognitionWrapper()
519
+ const { getByTestId } = render(getInstance({ __rsInstance: recognition, onResult }))
520
+
521
+ await act(async () => {
522
+ fireEvent.click(getByTestId('__vocal-root__'))
523
+ recognition.instance.say([[
524
+ { transcript: 'bar', confidence: 0.4 },
525
+ { transcript: 'foo', confidence: 0.9 },
526
+ { transcript: 'baz', confidence: 0.1 },
527
+ ]])
528
+ await waitFor(() => expect(onResult).toHaveBeenCalledWith('foo', expect.anything()))
529
+ })
530
+ })
531
+
532
+ it('joins all segments into the onResult transcript', async () => {
533
+ const onResult = vi.fn()
534
+ const recognition = new SpeechRecognitionWrapper()
535
+ const { getByTestId } = render(getInstance({ __rsInstance: recognition, onResult }))
536
+
537
+ await act(async () => {
538
+ fireEvent.click(getByTestId('__vocal-root__'))
539
+ recognition.instance.say([
540
+ [{ transcript: 'hello ', confidence: 0.9 }],
541
+ [{ transcript: 'world', confidence: 0.8 }],
542
+ ])
543
+ await waitFor(() => expect(onResult).toHaveBeenCalledWith('hello world', expect.anything()))
544
+ })
545
+ })
546
+
547
+ it('triggers command matched on a word within a multi-word segment', async () => {
548
+ const callback = vi.fn()
549
+ const recognition = new SpeechRecognitionWrapper()
550
+ const commands = { rouge: callback }
551
+ const { getByTestId } = render(getInstance({ __rsInstance: recognition, commands }))
552
+
553
+ await act(async () => {
554
+ fireEvent.click(getByTestId('__vocal-root__'))
555
+ recognition.instance.say([[{ transcript: 'je veux du rouge', confidence: 0.9 }]])
556
+ await waitFor(() => expect(callback).toHaveBeenCalledWith('rouge'))
557
+ })
558
+ })
559
+
560
+ it('triggers command matched on a secondary alternative (homophone)', async () => {
561
+ const callback = vi.fn()
562
+ const recognition = new SpeechRecognitionWrapper()
563
+ const commands = { vert: callback }
564
+ const { getByTestId } = render(getInstance({ __rsInstance: recognition, commands, maxAlternatives: 3 }))
565
+
566
+ await act(async () => {
567
+ fireEvent.click(getByTestId('__vocal-root__'))
568
+ // Primary alternative is the homophone; secondary is the correct word
569
+ recognition.instance.say([[
570
+ { transcript: 'verre', confidence: 0.9 },
571
+ { transcript: 'vert', confidence: 0.7 },
572
+ ]])
573
+ await waitFor(() => expect(callback).toHaveBeenCalledWith('vert'))
574
+ })
575
+ })
576
+
577
+ it('passes the most confident transcript to onResult even when command matches a secondary alternative', async () => {
578
+ const onResult = vi.fn()
579
+ const recognition = new SpeechRecognitionWrapper()
580
+ const commands = { vert: vi.fn() }
581
+ const { getByTestId } = render(getInstance({ __rsInstance: recognition, commands, onResult, maxAlternatives: 3 }))
582
+
583
+ await act(async () => {
584
+ fireEvent.click(getByTestId('__vocal-root__'))
585
+ recognition.instance.say([[
586
+ { transcript: 'verre', confidence: 0.9 },
587
+ { transcript: 'vert', confidence: 0.7 },
588
+ ]])
589
+ await waitFor(() => expect(onResult).toHaveBeenCalledWith('verre', expect.anything()))
590
+ })
591
+ })
592
+
593
+ it('calls onEnd via the end event when stop is asynchronous', async () => {
594
+ const onEnd = vi.fn()
595
+ const recognition = new SpeechRecognitionWrapper()
596
+ const { getByTestId } = render(getInstance({ __rsInstance: recognition, onEnd }))
597
+
598
+ // Simulate async stop: override stop() so the end event does not fire immediately
599
+ recognition.instance.stop = vi.fn()
600
+
601
+ await act(async () => {
602
+ fireEvent.click(getByTestId('__vocal-root__'))
603
+ recognition.instance.say('Foo')
604
+ // stopRecognition was called but end has not fired yet — onEnd must not be called
605
+ expect(onEnd).not.toHaveBeenCalled()
606
+ // Browser fires end asynchronously after recognition stops
607
+ recognition.instance.end()
608
+ await waitFor(() => expect(onEnd).toHaveBeenCalled())
609
+ })
610
+ })
430
611
  })
@@ -61,4 +61,29 @@ describe('useCommands', () => {
61
61
  } = renderHook(() => useCommands(commands))
62
62
  expect(triggerCommand('gag')).toBeNull()
63
63
  })
64
+
65
+ it('triggers all registered commands when multiple commands are defined', () => {
66
+ const commands = {
67
+ rouge: () => 'red',
68
+ bleu: () => 'blue',
69
+ jaune: () => 'yellow',
70
+ }
71
+ const {
72
+ result: { current: triggerCommand },
73
+ } = renderHook(() => useCommands(commands))
74
+ expect(triggerCommand('rouge')).toBe('red')
75
+ expect(triggerCommand('bleu')).toBe('blue')
76
+ expect(triggerCommand('jaune')).toBe('yellow')
77
+ })
78
+
79
+ it('does not match near-homophones with strict precision — rely on maxAlternatives instead', () => {
80
+ const commands = { vert: () => 'green' }
81
+ const {
82
+ result: { current: triggerCommand },
83
+ } = renderHook(() => useCommands(commands))
84
+ // 'verre' scores 0.4 against 'vert' — not strictly < STRICT_PRECISION (0.4)
85
+ expect(triggerCommand('verre')).toBeNull()
86
+ // The engine surfaces 'vert' as a secondary alternative (score 0) — exact match
87
+ expect(triggerCommand('vert')).toBe('green')
88
+ })
64
89
  })
@@ -124,13 +124,18 @@ describe('useVocal', () => {
124
124
  expect(ref.current).toBeDefined()
125
125
  })
126
126
 
127
+ it('passes maxAlternatives to SpeechRecognitionWrapper constructor', () => {
128
+ renderHook(() => useVocal('en-US', null, 5))
129
+ expect(SpeechRecognitionWrapper).toHaveBeenCalledWith({ lang: 'en-US', grammars: null, maxAlternatives: 5 })
130
+ })
131
+
127
132
  it('uses custom SpeechRecognition instance', () => {
128
133
  const foo = new SpeechRecognitionWrapper()
129
134
  const {
130
135
  result: {
131
136
  current: [ref],
132
137
  },
133
- } = renderHook(() => useVocal(null, null, foo))
138
+ } = renderHook(() => useVocal(null, null, 1, foo))
134
139
  expect(ref.current).toBe(foo)
135
140
  })
136
141
 
@@ -1,16 +1,44 @@
1
+ import { useMemo } from 'react'
1
2
  import Fuse from 'fuse.js'
2
3
 
3
4
  const useCommands = (commands, precision = 0.4) => {
4
- commands = !!commands
5
- ? Object.entries(commands)?.reduce((acc, [key, value]) => ({ [key.toLowerCase()]: value }), {})
6
- : {}
5
+ const normalized = useMemo(
6
+ () =>
7
+ !!commands
8
+ ? Object.entries(commands).reduce((acc, [key, value]) => ({ ...acc, [key.toLowerCase()]: value }), {})
9
+ : {},
10
+ [commands]
11
+ )
12
+
13
+ const keys = useMemo(() => Object.keys(normalized), [normalized])
14
+
15
+ // Fuzzy matching is only needed for phrase command keys.
16
+ // Single-word keys use exact case-insensitive lookup — simpler and no false positives.
17
+ const hasPhraseKeys = useMemo(() => keys.some((k) => k.includes(' ')), [keys])
18
+
19
+ // precision only applies to phrase keys — single-word keys always use exact lookup
20
+ const fuse = useMemo(
21
+ () => (hasPhraseKeys ? new Fuse(keys, { includeScore: true, ignoreLocation: true }) : null),
22
+ [hasPhraseKeys, keys]
23
+ )
7
24
 
8
25
  const triggerCommand = (input) => {
9
- const fuse = new Fuse(Object.keys(commands), { includeScore: true, ignoreLocation: true })
26
+ if (!keys.length) return null
27
+
28
+ if (!hasPhraseKeys) {
29
+ const words = input.trim().split(/\s+/)
30
+ const targets = words.length > 1 ? words : [input.trim()]
31
+ for (const w of targets) {
32
+ const key = w.toLowerCase()
33
+ if (key in normalized) return normalized[key]?.(w)
34
+ }
35
+ return null
36
+ }
37
+
10
38
  const result = fuse.search(input).filter((r) => r.score < precision)
11
- if (!!result?.length) {
39
+ if (result?.length) {
12
40
  const key = result[0].item.toLowerCase()
13
- return commands[key]?.(input)
41
+ return normalized[key]?.(input)
14
42
  }
15
43
  return null
16
44
  }
@@ -1,18 +1,18 @@
1
1
  import { useCallback, useEffect, useRef } from 'react'
2
2
  import { Vocal as SpeechRecognitionWrapper } from '@untemps/vocal'
3
3
 
4
- const useVocal = (lang = 'en-US', grammars = null, __rsInstance = null) => {
4
+ const useVocal = (lang = 'en-US', grammars = null, maxAlternatives = 1, __rsInstance = null) => {
5
5
  const ref = useRef(null)
6
6
 
7
7
  useEffect(() => {
8
8
  if (SpeechRecognitionWrapper.isSupported) {
9
- ref.current = __rsInstance || new SpeechRecognitionWrapper({ lang, grammars })
9
+ ref.current = __rsInstance || new SpeechRecognitionWrapper({ lang, grammars, maxAlternatives })
10
10
  return () => {
11
11
  ref.current.abort()
12
12
  ref.current.cleanup()
13
13
  }
14
14
  }
15
- }, [lang, grammars, __rsInstance])
15
+ }, [lang, grammars, maxAlternatives, __rsInstance])
16
16
 
17
17
  const start = useCallback(() => {
18
18
  if (ref.current) {
package/vitest.setup.js CHANGED
@@ -54,19 +54,22 @@ global.SpeechRecognition = vi.fn(function () {
54
54
  abort: vi.fn(function () {
55
55
  handlers.end?.()
56
56
  }),
57
- say: vi.fn(function (sentence) {
57
+ say: vi.fn(function (input) {
58
58
  handlers.speechstart?.()
59
59
 
60
60
  const resultEvent = new Event('result')
61
61
  resultEvent.resultIndex = 0
62
- resultEvent.results = [[{ transcript: sentence }]]
62
+ resultEvent.results = Array.isArray(input) ? input : input ? [[{ transcript: input }]] : []
63
63
  handlers.speechend?.()
64
- if (sentence) {
64
+ if (input) {
65
65
  handlers.result?.(resultEvent)
66
66
  } else {
67
67
  handlers.nomatch?.()
68
68
  }
69
69
  }),
70
+ end: vi.fn(function () {
71
+ handlers.end?.()
72
+ }),
70
73
  error: vi.fn(function (err) {
71
74
  handlers.error?.(err)
72
75
  }),