@djangocfg/ui-tools 2.1.381 → 2.1.382

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (183) hide show
  1. package/README.md +132 -899
  2. package/dist/ChatRoot-6IZFM5HM.mjs +5 -0
  3. package/dist/{ChatRoot-EJC5Y2YM.cjs.map → ChatRoot-6IZFM5HM.mjs.map} +1 -1
  4. package/dist/ChatRoot-LW4XNIKP.cjs +14 -0
  5. package/dist/{ChatRoot-QOSKJPM6.mjs.map → ChatRoot-LW4XNIKP.cjs.map} +1 -1
  6. package/dist/DictationField-2ZLQWLYV.mjs +4 -0
  7. package/dist/DictationField-2ZLQWLYV.mjs.map +1 -0
  8. package/dist/DictationField-IPPJ54CU.cjs +13 -0
  9. package/dist/DictationField-IPPJ54CU.cjs.map +1 -0
  10. package/dist/{DocsLayout-2YKPXZYO.mjs → DocsLayout-2P3ONDWJ.mjs} +3 -3
  11. package/dist/{DocsLayout-2YKPXZYO.mjs.map → DocsLayout-2P3ONDWJ.mjs.map} +1 -1
  12. package/dist/{DocsLayout-Q4KS3QWW.cjs → DocsLayout-2YZNS5VK.cjs} +8 -8
  13. package/dist/{DocsLayout-Q4KS3QWW.cjs.map → DocsLayout-2YZNS5VK.cjs.map} +1 -1
  14. package/dist/chunk-4LXG3NBV.mjs +833 -0
  15. package/dist/chunk-4LXG3NBV.mjs.map +1 -0
  16. package/dist/{chunk-XACCHZH2.cjs → chunk-FIRK5CEH.cjs} +42 -4
  17. package/dist/chunk-FIRK5CEH.cjs.map +1 -0
  18. package/dist/{chunk-NWUT327A.mjs → chunk-HIK6BPL7.mjs} +38 -5
  19. package/dist/chunk-HIK6BPL7.mjs.map +1 -0
  20. package/dist/chunk-KMSBGNVC.cjs +835 -0
  21. package/dist/chunk-KMSBGNVC.cjs.map +1 -0
  22. package/dist/chunk-OZAU3QWD.cjs +2493 -0
  23. package/dist/chunk-OZAU3QWD.cjs.map +1 -0
  24. package/dist/chunk-UWVP6LCW.mjs +2447 -0
  25. package/dist/chunk-UWVP6LCW.mjs.map +1 -0
  26. package/dist/index.cjs +1532 -100
  27. package/dist/index.cjs.map +1 -1
  28. package/dist/index.d.cts +1148 -107
  29. package/dist/index.d.ts +1148 -107
  30. package/dist/index.mjs +1421 -51
  31. package/dist/index.mjs.map +1 -1
  32. package/package.json +16 -8
  33. package/src/audio-assets.d.ts +8 -0
  34. package/src/components/markdown/MarkdownMessage/CollapseToggle.tsx +3 -1
  35. package/src/components/markdown/MarkdownMessage/components.tsx +2 -5
  36. package/src/stories/index.ts +32 -2
  37. package/src/tools/Chat/README.md +347 -530
  38. package/src/tools/Chat/components/Attachments.tsx +6 -1
  39. package/src/tools/Chat/components/ChatRoot.tsx +30 -2
  40. package/src/tools/Chat/components/Composer.tsx +20 -3
  41. package/src/tools/Chat/components/ErrorBanner.tsx +7 -3
  42. package/src/tools/Chat/components/MessageActions.tsx +3 -1
  43. package/src/tools/Chat/components/MessageBubble.tsx +6 -5
  44. package/src/tools/Chat/components/MessageList.tsx +87 -1
  45. package/src/tools/Chat/components/ToolCalls.tsx +21 -3
  46. package/src/tools/Chat/context/ChatProvider.tsx +21 -3
  47. package/src/tools/Chat/core/audio/audioBus.ts +10 -163
  48. package/src/tools/Chat/core/audio/defaults.ts +43 -0
  49. package/src/tools/Chat/core/audio/index.ts +1 -0
  50. package/src/tools/Chat/core/audio/preferences.ts +5 -59
  51. package/src/tools/Chat/core/audio/sounds/error.mp3 +0 -0
  52. package/src/tools/Chat/core/audio/sounds/mention.mp3 +0 -0
  53. package/src/tools/Chat/core/audio/sounds/notification.mp3 +0 -0
  54. package/src/tools/Chat/core/audio/sounds/received.mp3 +0 -0
  55. package/src/tools/Chat/core/audio/sounds/sent.mp3 +0 -0
  56. package/src/tools/Chat/core/audio/sounds/start.mp3 +0 -0
  57. package/src/tools/Chat/core/audio/types.ts +28 -0
  58. package/src/tools/Chat/core/reducer.ts +33 -0
  59. package/src/tools/Chat/core/transport/index.ts +13 -0
  60. package/src/tools/Chat/core/transport/mappers/index.ts +6 -0
  61. package/src/tools/Chat/core/transport/mappers/pydantic-ai.ts +142 -0
  62. package/src/tools/Chat/core/transport/pydantic-ai-transport.ts +208 -0
  63. package/src/tools/Chat/core/transport/sse.ts +18 -5
  64. package/src/tools/Chat/hooks/index.ts +25 -0
  65. package/src/tools/Chat/hooks/useAutoFocusOnStreamEnd.ts +5 -3
  66. package/src/tools/Chat/hooks/useChat.ts +28 -0
  67. package/src/tools/Chat/hooks/useChatAudio.ts +59 -180
  68. package/src/tools/Chat/hooks/useChatDockPrefs.ts +74 -0
  69. package/src/tools/Chat/hooks/useChatReset.ts +70 -0
  70. package/src/tools/Chat/hooks/useChatUnread.ts +87 -0
  71. package/src/tools/Chat/hooks/useFocusOnEmptyClick.ts +111 -0
  72. package/src/tools/Chat/hooks/useVisitorFingerprint.ts +48 -0
  73. package/src/tools/Chat/index.ts +69 -1
  74. package/src/tools/Chat/launcher/ChatDock.tsx +263 -0
  75. package/src/tools/Chat/launcher/ChatFAB.tsx +349 -0
  76. package/src/tools/Chat/launcher/ChatGreeting.tsx +200 -0
  77. package/src/tools/Chat/launcher/ChatHeader.tsx +76 -0
  78. package/src/tools/Chat/launcher/ChatHeaderActionButton.tsx +87 -0
  79. package/src/tools/Chat/launcher/ChatHeaderAudioToggle.tsx +47 -0
  80. package/src/tools/Chat/launcher/ChatHeaderLanguageButton.tsx +179 -0
  81. package/src/tools/Chat/launcher/ChatHeaderModeToggle.tsx +57 -0
  82. package/src/tools/Chat/launcher/ChatHeaderResetButton.tsx +93 -0
  83. package/src/tools/Chat/launcher/ChatLauncher.tsx +321 -0
  84. package/src/tools/Chat/launcher/ChatUnreadPreview.tsx +197 -0
  85. package/src/tools/Chat/launcher/index.ts +46 -0
  86. package/src/tools/Chat/launcher/useChatPresence.ts +44 -0
  87. package/src/tools/Chat/stories/01-basic.story.tsx +64 -0
  88. package/src/tools/Chat/stories/02-bubbles.story.tsx +21 -0
  89. package/src/tools/Chat/stories/03-tool-calls.story.tsx +59 -0
  90. package/src/tools/Chat/stories/04-personas.story.tsx +78 -0
  91. package/src/tools/Chat/stories/05-launcher.story.tsx +321 -0
  92. package/src/tools/Chat/stories/06-header.story.tsx +147 -0
  93. package/src/tools/Chat/stories/07-audio-actions.story.tsx +112 -0
  94. package/src/tools/Chat/stories/shared/Frame.tsx +21 -0
  95. package/src/tools/Chat/stories/shared/index.ts +5 -0
  96. package/src/tools/Chat/stories/shared/messages.ts +39 -0
  97. package/src/tools/Chat/stories/shared/personas.ts +13 -0
  98. package/src/tools/Chat/stories/shared/seeds.ts +92 -0
  99. package/src/tools/Chat/stories/shared/transports.ts +36 -0
  100. package/src/tools/Chat/styles/bubbleTokens.ts +71 -0
  101. package/src/tools/Chat/styles/index.ts +16 -0
  102. package/src/tools/Chat/styles/useChatStyles.ts +101 -0
  103. package/src/tools/Chat/types/attachment.ts +25 -0
  104. package/src/tools/Chat/types/config.ts +48 -0
  105. package/src/tools/Chat/types/events.ts +35 -0
  106. package/src/tools/Chat/types/index.ts +34 -0
  107. package/src/tools/Chat/types/labels.ts +38 -0
  108. package/src/tools/Chat/types/message.ts +32 -0
  109. package/src/tools/Chat/types/persona.ts +31 -0
  110. package/src/tools/Chat/types/session.ts +43 -0
  111. package/src/tools/Chat/types/tool-call.ts +17 -0
  112. package/src/tools/Chat/types/transport.ts +28 -0
  113. package/src/tools/Chat/types.ts +5 -240
  114. package/src/tools/MarkdownEditor/MarkdownEditor.tsx +50 -14
  115. package/src/tools/MarkdownEditor/index.ts +1 -1
  116. package/src/tools/SpeechRecognition/README.md +336 -0
  117. package/src/tools/SpeechRecognition/__tests__/ids.test.ts +15 -0
  118. package/src/tools/SpeechRecognition/__tests__/language.test.ts +59 -0
  119. package/src/tools/SpeechRecognition/__tests__/reducer.test.ts +71 -0
  120. package/src/tools/SpeechRecognition/__tests__/transcript.test.ts +52 -0
  121. package/src/tools/SpeechRecognition/components/DevicePicker.tsx +49 -0
  122. package/src/tools/SpeechRecognition/components/DictationButton.tsx +93 -0
  123. package/src/tools/SpeechRecognition/components/EngineBadge.tsx +30 -0
  124. package/src/tools/SpeechRecognition/components/ErrorBanner.tsx +52 -0
  125. package/src/tools/SpeechRecognition/components/LanguagePicker.tsx +63 -0
  126. package/src/tools/SpeechRecognition/components/MicMeter.tsx +63 -0
  127. package/src/tools/SpeechRecognition/components/PushToTalkHint.tsx +51 -0
  128. package/src/tools/SpeechRecognition/components/TranscriptView.tsx +55 -0
  129. package/src/tools/SpeechRecognition/components/index.ts +16 -0
  130. package/src/tools/SpeechRecognition/context/SpeechRecognitionProvider.tsx +47 -0
  131. package/src/tools/SpeechRecognition/context/index.ts +6 -0
  132. package/src/tools/SpeechRecognition/core/audio/defaults.ts +24 -0
  133. package/src/tools/SpeechRecognition/core/engine/external.ts +222 -0
  134. package/src/tools/SpeechRecognition/core/engine/http.ts +147 -0
  135. package/src/tools/SpeechRecognition/core/engine/index.ts +52 -0
  136. package/src/tools/SpeechRecognition/core/engine/mediarecorder.ts +105 -0
  137. package/src/tools/SpeechRecognition/core/engine/websocket.ts +211 -0
  138. package/src/tools/SpeechRecognition/core/engine/webspeech.ts +188 -0
  139. package/src/tools/SpeechRecognition/core/ids.ts +11 -0
  140. package/src/tools/SpeechRecognition/core/index.ts +14 -0
  141. package/src/tools/SpeechRecognition/core/language.ts +78 -0
  142. package/src/tools/SpeechRecognition/core/languages-catalog.ts +229 -0
  143. package/src/tools/SpeechRecognition/core/logger.ts +3 -0
  144. package/src/tools/SpeechRecognition/core/reducer.ts +105 -0
  145. package/src/tools/SpeechRecognition/core/transcript.ts +36 -0
  146. package/src/tools/SpeechRecognition/hooks/index.ts +14 -0
  147. package/src/tools/SpeechRecognition/hooks/useDictation.ts +59 -0
  148. package/src/tools/SpeechRecognition/hooks/useEnginePrefs.ts +15 -0
  149. package/src/tools/SpeechRecognition/hooks/useMicDevices.ts +57 -0
  150. package/src/tools/SpeechRecognition/hooks/useMicLevel.ts +52 -0
  151. package/src/tools/SpeechRecognition/hooks/usePushToTalk.ts +85 -0
  152. package/src/tools/SpeechRecognition/hooks/useResolvedLanguage.ts +28 -0
  153. package/src/tools/SpeechRecognition/hooks/useSpeechLanguageInfo.ts +108 -0
  154. package/src/tools/SpeechRecognition/hooks/useSpeechRecognition.ts +188 -0
  155. package/src/tools/SpeechRecognition/hooks/useVoiceSupport.ts +78 -0
  156. package/src/tools/SpeechRecognition/index.ts +82 -0
  157. package/src/tools/SpeechRecognition/lazy.tsx +19 -0
  158. package/src/tools/SpeechRecognition/store/index.ts +2 -0
  159. package/src/tools/SpeechRecognition/store/prefsStore.ts +54 -0
  160. package/src/tools/SpeechRecognition/stories/01-basic.story.tsx +32 -0
  161. package/src/tools/SpeechRecognition/stories/02-dictation-field.story.tsx +32 -0
  162. package/src/tools/SpeechRecognition/stories/03-push-to-talk.story.tsx +27 -0
  163. package/src/tools/SpeechRecognition/stories/04-mic-meter.story.tsx +35 -0
  164. package/src/tools/SpeechRecognition/stories/05-custom-engine-http.story.tsx +40 -0
  165. package/src/tools/SpeechRecognition/stories/06-custom-engine-ws.story.tsx +48 -0
  166. package/src/tools/SpeechRecognition/stories/07-language-device.story.tsx +57 -0
  167. package/src/tools/SpeechRecognition/stories/08-errors-permissions.story.tsx +25 -0
  168. package/src/tools/SpeechRecognition/stories/09-chat-voice.story.tsx +90 -0
  169. package/src/tools/SpeechRecognition/stories/shared.tsx +123 -0
  170. package/src/tools/SpeechRecognition/types.ts +133 -0
  171. package/src/tools/SpeechRecognition/widgets/DictationField.tsx +105 -0
  172. package/src/tools/SpeechRecognition/widgets/VoiceComposerSlot.tsx +305 -0
  173. package/src/tools/SpeechRecognition/widgets/VoiceMessageRecorder.tsx +88 -0
  174. package/src/tools/SpeechRecognition/widgets/index.ts +6 -0
  175. package/dist/ChatRoot-EJC5Y2YM.cjs +0 -14
  176. package/dist/ChatRoot-QOSKJPM6.mjs +0 -5
  177. package/dist/chunk-NWUT327A.mjs.map +0 -1
  178. package/dist/chunk-QLMKCSR6.mjs +0 -2420
  179. package/dist/chunk-QLMKCSR6.mjs.map +0 -1
  180. package/dist/chunk-SI5RD2GD.cjs +0 -2460
  181. package/dist/chunk-SI5RD2GD.cjs.map +0 -1
  182. package/dist/chunk-XACCHZH2.cjs.map +0 -1
  183. package/src/tools/Chat/Chat.story.tsx +0 -1457
@@ -0,0 +1,336 @@
1
+ # SpeechRecognition
2
+
3
+ Decomposed Speech-to-Text for the React app. **Headless core + composable UI parts + lazy bundle**, just like [`Chat`](../Chat) and [`AudioPlayer`](../AudioPlayer).
4
+
5
+ The default backend is the browser's native Web Speech API (zero deps, zero network). For anything else — Deepgram, AssemblyAI, OpenAI Whisper, your own Django/FastAPI gateway — plug a custom engine into the same hook. No SDK lock-in.
6
+
7
+ ```bash
8
+ pnpm add @djangocfg/ui-tools
9
+ ```
10
+
11
+ Subpath import (recommended — keeps the rest of `ui-tools` out of your bundle):
12
+
13
+ ```ts
14
+ import {
15
+ useSpeechRecognition,
16
+ DictationField,
17
+ createWebSpeechEngine,
18
+ createHttpEngine,
19
+ createWebSocketEngine,
20
+ } from '@djangocfg/ui-tools/speech-recognition';
21
+ ```
22
+
23
+ ---
24
+
25
+ ## Quick start
26
+
27
+ ```tsx
28
+ import {
29
+ DictationButton,
30
+ TranscriptView,
31
+ useSpeechRecognition,
32
+ } from '@djangocfg/ui-tools/speech-recognition';
33
+
34
+ function Dictate() {
35
+ const rec = useSpeechRecognition(); // Web Speech engine, browser language
36
+ return (
37
+ <div className="flex items-start gap-3">
38
+ <DictationButton status={rec.status} onClick={() => rec.toggle()} />
39
+ <TranscriptView transcript={rec.transcript} />
40
+ </div>
41
+ );
42
+ }
43
+ ```
44
+
45
+ That's the whole "make me type with my voice" flow. With no config, the hook uses `createWebSpeechEngine()` and the language stored in `useSpeechPrefs` (defaults to `navigator.language`).
46
+
47
+ ---
48
+
49
+ ## DictationField — the opinionated widget
50
+
51
+ A textarea + mic button + interim ghost + push-to-talk hint, all wired up. Final segments are appended to the controlled `value`.
52
+
53
+ ```tsx
54
+ import { DictationField } from '@djangocfg/ui-tools/speech-recognition';
55
+
56
+ const [text, setText] = useState('');
57
+
58
+ <DictationField
59
+ value={text}
60
+ onChange={setText}
61
+ language="ru-RU"
62
+ pushToTalk={{ key: 'alt' }}
63
+ placeholder="Type or hold ⌥ to talk…"
64
+ />
65
+ ```
66
+
67
+ For voice-memo flows there's `VoiceMessageRecorder`: press the mic, dictate freely, silence-detection or 60-second cap triggers `onSubmit(text, segments)`.
68
+
69
+ ---
70
+
71
+ ## Custom engines — the whole point
72
+
73
+ `useSpeechRecognition` doesn't care **how** audio becomes text. The `RecognitionEngine` interface is small enough to implement against any backend.
74
+
75
+ ### HTTP (Whisper, custom REST)
76
+
77
+ ```ts
78
+ import { createHttpEngine } from '@djangocfg/ui-tools/speech-recognition';
79
+
80
+ const engine = createHttpEngine({
81
+ url: '/api/stt/transcribe',
82
+ headers: async () => ({ Authorization: `Bearer ${token}` }),
83
+ chunkMs: 750,
84
+ parse: async (resp) => {
85
+ const { text, final } = await resp.json();
86
+ return { text, isFinal: final };
87
+ },
88
+ });
89
+
90
+ const rec = useSpeechRecognition({ engine });
91
+ ```
92
+
93
+ Captures audio with `MediaRecorder` (Opus/WebM by default), POSTs each chunk as the request body, runs your `parse` callback on the response.
94
+
95
+ ### External (Wails / Tauri / native sidecar)
96
+
97
+ When the host owns the entire pipeline — capture happens outside the browser, transcription runs on the backend, the frontend just commands "start" / "stop" — use `createExternalEngine`. Perfect for cmdop's Wails whisper.cpp integration.
98
+
99
+ ```ts
100
+ import { createExternalEngine } from '@djangocfg/ui-tools/speech-recognition';
101
+ import { EventsOn } from '@runtime';
102
+ import * as VoiceService from '@bindings/desktop/services/voice/service';
103
+
104
+ const wailsEngine = createExternalEngine({
105
+ id: 'wails-whisper',
106
+ onStart: () => VoiceService.StartRecordingForChat(),
107
+ onStop: () => VoiceService.StopRecordingForChat(),
108
+ subscribe: (handle) => {
109
+ const offText = EventsOn('voice:chat-text', (p) => {
110
+ if (p?.error) handle.emitError({ code: 'engine', message: p.error });
111
+ else if (p?.text) handle.emitFinal(p.text);
112
+ else handle.emitError({ code: 'no-speech', message: '' });
113
+ });
114
+ const offState = EventsOn('voice:state', (s) => {
115
+ if (s.state === 'recording' || s.state === 'streaming') handle.markListening();
116
+ if (s.partial) handle.emitPartial(s.partial);
117
+ });
118
+ return () => { offText(); offState(); };
119
+ },
120
+ });
121
+
122
+ <VoiceComposerSlot engine={wailsEngine} value={composer.value} onChange={composer.setValue} />
123
+ ```
124
+
125
+ No `MediaRecorder` / `getUserMedia` — the engine is purely a translator between the chat UI and your event bus. `emitFinal` automatically closes the session, so the composer reset / autosend logic fires the moment the backend posts a result.
126
+
127
+ ### WebSocket (Deepgram / AssemblyAI / custom realtime)
128
+
129
+ ```ts
130
+ import { createWebSocketEngine } from '@djangocfg/ui-tools/speech-recognition';
131
+
132
+ const engine = createWebSocketEngine({
133
+ url: async () => {
134
+ const { token } = await fetch('/api/stt/ticket').then((r) => r.json());
135
+ return `wss://stt.example.com/listen?token=${token}`;
136
+ },
137
+ chunkMs: 250,
138
+ parseMessage: (data) => {
139
+ if (typeof data !== 'string') return { kind: 'ignore' };
140
+ const msg = JSON.parse(data);
141
+ if (msg.type === 'Results') {
142
+ return msg.is_final
143
+ ? { kind: 'final', text: msg.channel.alternatives[0].transcript }
144
+ : { kind: 'partial', text: msg.channel.alternatives[0].transcript };
145
+ }
146
+ return { kind: 'ignore' };
147
+ },
148
+ });
149
+ ```
150
+
151
+ Reconnect with exponential backoff (250 ms → 5 s) is built in. Tokens go through a `url()` callback so they can be minted server-side and rotated per session.
152
+
153
+ ### Anything else
154
+
155
+ Implement `RecognitionEngine` directly — on-device Whisper WASM, Picovoice, native bridges from Tauri / Electron, mocked engines for tests. The interface:
156
+
157
+ ```ts
158
+ interface RecognitionEngine {
159
+ id: string;
160
+ isSupported: boolean;
161
+ start(opts: EngineStartOptions): Promise<void>;
162
+ stop(): Promise<void>;
163
+ abort(): void;
164
+ on(event, cb): Unsub; // 'partial' | 'final' | 'error' | 'state'
165
+ getStream?(): MediaStream | null; // optional — for VU meters
166
+ }
167
+ ```
168
+
169
+ `createEngineBus()` gives you the listener bookkeeping in three lines.
170
+
171
+ ---
172
+
173
+ ## Voice inside the Chat composer
174
+
175
+ Two drop-ins, designed to live together:
176
+
177
+ ```tsx
178
+ import { ChatRoot } from '@djangocfg/ui-tools/chat';
179
+ import {
180
+ ChatHeaderLanguageButton,
181
+ VoiceComposerSlot,
182
+ } from '@djangocfg/ui-tools/speech-recognition';
183
+
184
+ <ChatRoot
185
+ transport={transport}
186
+ composerToolbarEnd={<VoiceComposerSlot />}
187
+ />
188
+
189
+ // Header flag-picker is added via ChatLauncher dock slot:
190
+ <ChatLauncher dock={{ headerActions: <ChatHeaderLanguageButton /> }}>
191
+ ```
192
+
193
+ That's it. No props, no refs. The slot reads / writes the composer through the `ComposerHandle` registered in `ChatProvider` (`focus / moveCursorToEnd / getValue / setValue`), so the built-in `<Composer>` and a TipTap-backed `MarkdownEditor` work the same way — host implements `useRegisterComposer({...})` once and voice flows in.
194
+
195
+ What you get without writing it yourself:
196
+
197
+ - **Anchored merge.** The text typed before pressing the mic is preserved; dictation is appended to that anchor.
198
+ - **Live focus + cursor pinning.** On start, the composer is focused and the caret jumps to end; every partial / final repins the caret so the live transcript visibly grows where the user expects.
199
+ - **Auto-hide.** `useVoiceSupport()` checks `engine.isSupported` + `getUserMedia` + browser type (Firefox / Instagram / TikTok WebViews → renders `null`).
200
+ - **Countdown chip + tooltip.** A `useCountdownFromSeconds()` ticker (max 90 s default) sits next to the mic button.
201
+ - **Silence stop.** Auto-stop after 2.5 s of quiet (configurable via `silenceMs`).
202
+ - **Esc / Enter hotkeys while listening.** Esc cancels (and `stopPropagation` so the chat doesn't close), Enter finishes recording (and **does not** submit the chat — avoids accidental sends mid-sentence).
203
+ - **Earcons.** Bundled start (low chime) + stop (short tick) reused from chat sounds, both at deliberately quiet volumes. Override via `sounds={{ start, stop }}` or disable with `sounds={false}`.
204
+
205
+ The explicit `value` / `onChange` form is still supported for standalone usage outside a `<ChatProvider>`:
206
+
207
+ ```tsx
208
+ <VoiceComposerSlot value={value} onChange={setValue} />
209
+ ```
210
+
211
+ ### Language picker — flag button in the chat header
212
+
213
+ ```tsx
214
+ <ChatHeader actions={<ChatHeaderLanguageButton />} />
215
+ ```
216
+
217
+ Compact 28×28 flag button. Shows the currently-resolved language's country flag (🇷🇺 for `ru-RU`, 🇺🇸 for `en-US`). Clicking opens a searchable `<Combobox>` with **66 BCP-47 tags from the official Chrome Web Speech demo** (`WEB_SPEECH_LANGUAGES` catalogue) — language name + region + tag, every row with a country flag, search across all three fields. Choice persists in `useSpeechPrefs`.
218
+
219
+ ### Shared state across the tree
220
+
221
+ Need to react to listening state elsewhere (dim textarea, header indicator)? Wrap the chat in `<SpeechRecognitionProvider>` and read `useSpeechRecognitionContext()` from any descendant.
222
+
223
+ ### Reading the active language from elsewhere
224
+
225
+ Speech language is **persisted independently** of the app's i18n locale (`djangocfg-stt:prefs` in localStorage). Read it from any component:
226
+
227
+ ```tsx
228
+ import {
229
+ useSpeechPrefs, // raw user choice — `string | null`
230
+ useResolvedLanguage, // resolved BCP-47 with full fallback chain
231
+ useSpeechLanguageInfo, // combo: { tag, iso, country, name, englishName, region, hasUserChoice }
232
+ } from '@djangocfg/ui-tools/speech-recognition';
233
+
234
+ function HeaderBadge() {
235
+ const { tag, name, country, hasUserChoice } = useSpeechLanguageInfo();
236
+ return (
237
+ <Badge>
238
+ <Flag countryCode={country} />
239
+ {name ?? tag}
240
+ {hasUserChoice && <span className="ml-1">★</span>}
241
+ </Badge>
242
+ );
243
+ }
244
+ ```
245
+
246
+ Push to backend on every change:
247
+
248
+ ```tsx
249
+ const { tag, hasUserChoice } = useSpeechLanguageInfo();
250
+ useEffect(() => {
251
+ if (!hasUserChoice) return;
252
+ void api.user.update({ speechLanguage: tag });
253
+ }, [tag, hasUserChoice]);
254
+ ```
255
+
256
+ Outside React (event handlers, util functions, non-component code):
257
+
258
+ ```ts
259
+ import { useSpeechPrefs } from '@djangocfg/ui-tools/speech-recognition';
260
+ const current = useSpeechPrefs.getState().language; // 'ru-RU' | null
261
+ const unsubscribe = useSpeechPrefs.subscribe((state) => {
262
+ console.log('language changed', state.language);
263
+ });
264
+ ```
265
+
266
+ ---
267
+
268
+ ## What you get for free
269
+
270
+ - **Zero-setup default** — `useSpeechRecognition()` works with no engine, no config.
271
+ - **Permission-aware UX** — `permission-denied` / `no-microphone` / `no-speech` surface as typed errors; `<ErrorBanner>` translates them.
272
+ - **Persisted prefs** — language, mic device, engine choice live in zustand+localStorage (`djangocfg-stt:prefs`).
273
+ - **Auto-stop** — `autoStop: { silenceMs, maxMs, silenceThreshold }` based on RMS analyser; opt-in.
274
+ - **Push-to-talk** — `usePushToTalk({ key: 'mod+alt' })` with smart input-field bypass.
275
+ - **VU meter** — `useMicLevel(stream)` + `<MicMeter />` for level visualisation.
276
+ - **Mic enumeration** — `useMicDevices()` returns `audioinput` list, refreshes on `devicechange`.
277
+ - **Interim+final UI** — `<TranscriptView>` dims the trailing interim chunk so users see the model "thinking".
278
+
279
+ ---
280
+
281
+ ## Public surface
282
+
283
+ ### Hooks
284
+ `useSpeechRecognition`, `useDictation`, `usePushToTalk`, `useMicDevices`, `useMicLevel`, `useEnginePrefs`, `useSpeechPrefs`, `useVoiceSupport`, `useResolvedLanguage`, `useSpeechLanguageInfo`.
285
+
286
+ ### Context
287
+ `SpeechRecognitionProvider`, `useSpeechRecognitionContext`, `useSpeechRecognitionContextOptional` — lift a single engine instance so any descendant (composer slot, header badge, transcript overlay) sees the same `status` / `transcript` / `level`.
288
+
289
+ ### Components
290
+ `DictationButton`, `MicMeter`, `TranscriptView`, `LanguagePicker`, `DevicePicker`, `EngineBadge`, `ErrorBanner`, `PushToTalkHint`. Chat header: `ChatHeaderLanguageButton` (re-exported from chat launcher).
291
+
292
+ ### Widgets
293
+ `DictationField`, `VoiceMessageRecorder`, `VoiceComposerSlot`, `LazyDictationField`.
294
+
295
+ ### Engines
296
+ `createWebSpeechEngine`, `createHttpEngine`, `createWebSocketEngine`, `createExternalEngine`, `createEngineBus`, `startMicCapture`, `pickMime`.
297
+
298
+ ### Language utilities
299
+ `WEB_SPEECH_LANGUAGES` (catalogue of 66 supported BCP-47 tags from the Chrome demo), `WEB_SPEECH_TAGS` (flat array), `findSpeechLanguage(tag)`, `countryFromTag(tag)`, `toBCP47(iso)`, `resolveSpeechLanguage({ explicit, prefs, i18n })`, `DEFAULT_ISO_TO_BCP47`, `DEFAULT_VOICE_SOUNDS`.
300
+
301
+ ### Types
302
+ `RecognitionEngine`, `RecognitionStatus`, `RecognitionError`, `RecognitionErrorCode`, `Segment`, `Transcript`, `EngineState`, `EngineStartOptions`, `EngineEventMap`, `Unsub`, `AutoStopOptions`, `VoiceSupport`, `VoiceUnsupportedReason`.
303
+
304
+ ---
305
+
306
+ ## Tests
307
+
308
+ ```bash
309
+ pnpm test # one-shot
310
+ pnpm test:watch # vitest watch mode
311
+ ```
312
+
313
+ Covered (12 cases, all pure-function): reducer state machine (`__tests__/reducer.test.ts`), transcript merge + `normaliseFinal` (`__tests__/transcript.test.ts`), `newSegmentId` (`__tests__/ids.test.ts`). Engine adapters and UI parts rely on stories — `MediaRecorder` / `getUserMedia` / `WebSocket` are mock-engine-driven in the playground.
314
+
315
+ ---
316
+
317
+ ## Stories
318
+
319
+ `Tools/SpeechRecognition/{Basic, DictationField, PushToTalk, MicMeter, CustomEngine: HTTP, CustomEngine: WebSocket, Language & Device, Errors}` plus `Tools/Chat/Voice composer` for the chat-slot integration — all driven by a deterministic mock engine so the playground never asks for microphone permission.
320
+
321
+ ```bash
322
+ pnpm playground
323
+ ```
324
+
325
+ ---
326
+
327
+ ## Browser support
328
+
329
+ | Browser | Default engine | Notes |
330
+ |---|---|---|
331
+ | Chrome / Edge desktop | ✅ Web Speech | Best — continuous + interim results. |
332
+ | Safari 16+ desktop | ✅ Web Speech | Continuous works; some locales partial only. |
333
+ | Firefox desktop | ❌ Web Speech | `isSupported === false`. Pass a custom engine (HTTP/WS). |
334
+ | Mobile WebViews | ⚠️ varies | Always pair with a fallback engine in production. |
335
+
336
+ For Firefox / WebView consumers: pass `engine: createHttpEngine(...)` and you're streaming again.
@@ -0,0 +1,15 @@
1
+ import { describe, expect, it } from 'vitest';
2
+
3
+ import { newSegmentId } from '../core/ids';
4
+
5
+ describe('newSegmentId', () => {
6
+ it('produces unique values across calls', () => {
7
+ const ids = new Set<string>();
8
+ for (let i = 0; i < 200; i += 1) ids.add(newSegmentId());
9
+ expect(ids.size).toBe(200);
10
+ });
11
+
12
+ it('matches the seg_<time>_<n> shape', () => {
13
+ expect(newSegmentId()).toMatch(/^seg_[a-z0-9]+_[a-z0-9]+$/);
14
+ });
15
+ });
@@ -0,0 +1,59 @@
1
+ import { describe, expect, it } from 'vitest';
2
+
3
+ import { resolveSpeechLanguage, toBCP47 } from '../core/language';
4
+
5
+ describe('toBCP47', () => {
6
+ it('maps known ISO-2 codes to canonical BCP-47', () => {
7
+ expect(toBCP47('en')).toBe('en-US');
8
+ expect(toBCP47('ru')).toBe('ru-RU');
9
+ expect(toBCP47('ko')).toBe('ko-KR');
10
+ expect(toBCP47('pt')).toBe('pt-BR');
11
+ expect(toBCP47('no')).toBe('nb-NO');
12
+ });
13
+
14
+ it('falls back to <code>-<UPPER(code)> for unmapped ISO codes', () => {
15
+ expect(toBCP47('uk')).toBe('uk-UK');
16
+ expect(toBCP47('cs')).toBe('cs-CS');
17
+ });
18
+
19
+ it('passes BCP-47 input through unchanged', () => {
20
+ expect(toBCP47('en-GB')).toBe('en-GB');
21
+ expect(toBCP47('zh-TW')).toBe('zh-TW');
22
+ });
23
+
24
+ it('returns undefined for empty / null', () => {
25
+ expect(toBCP47(null)).toBeUndefined();
26
+ expect(toBCP47(undefined)).toBeUndefined();
27
+ expect(toBCP47('')).toBeUndefined();
28
+ expect(toBCP47(' ')).toBeUndefined();
29
+ });
30
+ });
31
+
32
+ describe('resolveSpeechLanguage', () => {
33
+ it('priority: explicit beats everything', () => {
34
+ expect(
35
+ resolveSpeechLanguage({
36
+ explicit: 'ko-KR',
37
+ prefs: 'ru-RU',
38
+ i18n: 'en',
39
+ }),
40
+ ).toBe('ko-KR');
41
+ });
42
+
43
+ it('priority: prefs beats i18n', () => {
44
+ expect(resolveSpeechLanguage({ prefs: 'ru-RU', i18n: 'en' })).toBe('ru-RU');
45
+ });
46
+
47
+ it('priority: i18n beats navigator', () => {
48
+ expect(resolveSpeechLanguage({ i18n: 'ru' })).toBe('ru-RU');
49
+ });
50
+
51
+ it('falls back to en-US when nothing supplied and no navigator', () => {
52
+ expect(resolveSpeechLanguage({})).toMatch(/^[a-z]{2}-[A-Z]{2}$/);
53
+ });
54
+
55
+ it('normalises ISO-2 in any slot', () => {
56
+ expect(resolveSpeechLanguage({ explicit: 'ru' })).toBe('ru-RU');
57
+ expect(resolveSpeechLanguage({ prefs: 'ko' })).toBe('ko-KR');
58
+ });
59
+ });
@@ -0,0 +1,71 @@
1
+ import { describe, expect, it } from 'vitest';
2
+
3
+ import { INITIAL_STATE, reducer } from '../core/reducer';
4
+
5
+ describe('SpeechRecognition reducer', () => {
6
+ it('starts and finishes a session', () => {
7
+ const s1 = reducer(INITIAL_STATE, { type: 'START' });
8
+ expect(s1.status).toBe('starting');
9
+ expect(s1.startedAt).toBeTypeOf('number');
10
+ expect(s1.error).toBeNull();
11
+
12
+ const s2 = reducer(s1, { type: 'STARTED' });
13
+ expect(s2.status).toBe('listening');
14
+
15
+ const s3 = reducer(s2, { type: 'STOP' });
16
+ expect(s3.status).toBe('stopping');
17
+
18
+ const s4 = reducer(s3, { type: 'STOPPED' });
19
+ expect(s4.status).toBe('idle');
20
+ });
21
+
22
+ it('merges PARTIAL into an interim segment, then promotes to FINAL', () => {
23
+ let s = reducer(INITIAL_STATE, { type: 'START' });
24
+ s = reducer(s, { type: 'STARTED' });
25
+ s = reducer(s, { type: 'PARTIAL', text: 'hel', segmentId: 'seg-1' });
26
+ s = reducer(s, { type: 'PARTIAL', text: 'hello', segmentId: 'seg-1' });
27
+ expect(s.segments).toHaveLength(1);
28
+ expect(s.segments[0]).toMatchObject({
29
+ id: 'seg-1',
30
+ text: 'hello',
31
+ isFinal: false,
32
+ });
33
+
34
+ s = reducer(s, { type: 'FINAL', text: 'hello world', segmentId: 'seg-1', confidence: 0.91 });
35
+ expect(s.segments).toHaveLength(1);
36
+ expect(s.segments[0]).toMatchObject({
37
+ id: 'seg-1',
38
+ text: 'hello world',
39
+ isFinal: true,
40
+ confidence: 0.91,
41
+ });
42
+ });
43
+
44
+ it('accumulates separate segments', () => {
45
+ let s = reducer(INITIAL_STATE, { type: 'START' });
46
+ s = reducer(s, { type: 'FINAL', text: 'one', segmentId: 'a' });
47
+ s = reducer(s, { type: 'FINAL', text: 'two', segmentId: 'b' });
48
+ s = reducer(s, { type: 'PARTIAL', text: 'thr', segmentId: 'c' });
49
+ expect(s.segments.map((seg) => seg.text)).toEqual(['one', 'two', 'thr']);
50
+ expect(s.segments.map((seg) => seg.isFinal)).toEqual([true, true, false]);
51
+ });
52
+
53
+ it('records errors and resets cleanly', () => {
54
+ let s = reducer(INITIAL_STATE, { type: 'START' });
55
+ s = reducer(s, {
56
+ type: 'ERROR',
57
+ error: { code: 'no-speech', message: 'no speech' },
58
+ });
59
+ expect(s.status).toBe('error');
60
+ expect(s.error?.code).toBe('no-speech');
61
+
62
+ const reset = reducer(s, { type: 'RESET' });
63
+ expect(reset).toEqual(INITIAL_STATE);
64
+ });
65
+
66
+ it('ignores unknown actions', () => {
67
+ // @ts-expect-error - intentionally invalid for the default branch
68
+ const next = reducer(INITIAL_STATE, { type: 'NOPE' });
69
+ expect(next).toBe(INITIAL_STATE);
70
+ });
71
+ });
@@ -0,0 +1,52 @@
1
+ import { describe, expect, it } from 'vitest';
2
+
3
+ import {
4
+ EMPTY_TRANSCRIPT,
5
+ buildTranscript,
6
+ joinFinal,
7
+ normaliseFinal,
8
+ } from '../core/transcript';
9
+ import type { Segment } from '../types';
10
+
11
+ function seg(text: string, isFinal: boolean, id = text): Segment {
12
+ return { id, text, isFinal, startedAt: 0 };
13
+ }
14
+
15
+ describe('transcript helpers', () => {
16
+ it('joinFinal skips interim and trims whitespace', () => {
17
+ const out = joinFinal([
18
+ seg('Hello.', true, 'a'),
19
+ seg(' world ', true, 'b'),
20
+ seg('partial', false, 'c'),
21
+ ]);
22
+ expect(out).toBe('Hello. world');
23
+ });
24
+
25
+ it('buildTranscript exposes trailing interim text', () => {
26
+ const t = buildTranscript([
27
+ seg('Hi.', true, 'a'),
28
+ seg('there', false, 'b'),
29
+ ]);
30
+ expect(t.final).toBe('Hi.');
31
+ expect(t.interim).toBe('there');
32
+ expect(t.segments).toHaveLength(2);
33
+ });
34
+
35
+ it('buildTranscript with only finals leaves interim empty', () => {
36
+ const t = buildTranscript([seg('Done.', true)]);
37
+ expect(t.interim).toBe('');
38
+ expect(t.final).toBe('Done.');
39
+ });
40
+
41
+ it('EMPTY_TRANSCRIPT is the zero value', () => {
42
+ expect(EMPTY_TRANSCRIPT.interim).toBe('');
43
+ expect(EMPTY_TRANSCRIPT.final).toBe('');
44
+ expect(EMPTY_TRANSCRIPT.segments).toEqual([]);
45
+ });
46
+
47
+ it('normaliseFinal collapses whitespace and fixes punctuation spacing', () => {
48
+ expect(normaliseFinal(' hello world ')).toBe('hello world');
49
+ expect(normaliseFinal('Hi , there !')).toBe('Hi, there!');
50
+ expect(normaliseFinal('one\ntwo\tthree')).toBe('one two three');
51
+ });
52
+ });
@@ -0,0 +1,49 @@
1
+ 'use client';
2
+
3
+ import type * as React from 'react';
4
+
5
+ import { cn } from '@djangocfg/ui-core/lib';
6
+
7
+ import type { MicDevice } from '../hooks/useMicDevices';
8
+
9
+ export interface DevicePickerProps {
10
+ devices: MicDevice[];
11
+ value: string | null;
12
+ onChange: (id: string | null) => void;
13
+ className?: string;
14
+ disabled?: boolean;
15
+ defaultLabel?: string;
16
+ ariaLabel?: string;
17
+ }
18
+
19
+ export function DevicePicker({
20
+ devices,
21
+ value,
22
+ onChange,
23
+ className,
24
+ disabled,
25
+ defaultLabel = 'System default',
26
+ ariaLabel = 'Microphone',
27
+ }: DevicePickerProps): React.ReactElement {
28
+ return (
29
+ <select
30
+ value={value ?? ''}
31
+ onChange={(e) => onChange(e.target.value || null)}
32
+ disabled={disabled}
33
+ aria-label={ariaLabel}
34
+ className={cn(
35
+ 'h-8 rounded-md border border-input bg-background px-2 text-xs text-foreground',
36
+ 'focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring',
37
+ 'disabled:cursor-not-allowed disabled:opacity-50',
38
+ className,
39
+ )}
40
+ >
41
+ <option value="">{defaultLabel}</option>
42
+ {devices.map((d) => (
43
+ <option key={d.deviceId} value={d.deviceId}>
44
+ {d.label}
45
+ </option>
46
+ ))}
47
+ </select>
48
+ );
49
+ }
@@ -0,0 +1,93 @@
1
+ 'use client';
2
+
3
+ import type * as React from 'react';
4
+
5
+ import { Loader2, Mic, MicOff } from 'lucide-react';
6
+ import type { CSSProperties, ReactNode } from 'react';
7
+
8
+ import { cn } from '@djangocfg/ui-core/lib';
9
+
10
+ import type { RecognitionStatus } from '../types';
11
+
12
+ export interface DictationButtonProps {
13
+ status: RecognitionStatus;
14
+ onClick: () => void;
15
+ isSupported?: boolean;
16
+ size?: 'sm' | 'md' | 'lg';
17
+ className?: string;
18
+ style?: CSSProperties;
19
+ ariaLabel?: string;
20
+ /** Override icon for the idle state. */
21
+ idleIcon?: ReactNode;
22
+ /** Override icon for the listening state. */
23
+ listeningIcon?: ReactNode;
24
+ /** Disable without unmounting. */
25
+ disabled?: boolean;
26
+ }
27
+
28
+ const SIZE_CLS: Record<NonNullable<DictationButtonProps['size']>, string> = {
29
+ sm: 'h-8 w-8 [&_svg]:h-4 [&_svg]:w-4',
30
+ md: 'h-10 w-10 [&_svg]:h-5 [&_svg]:w-5',
31
+ lg: 'h-12 w-12 [&_svg]:h-6 [&_svg]:w-6',
32
+ };
33
+
34
+ /**
35
+ * Round microphone button. Cycles icon by status; shows a soft pulse
36
+ * ring when listening. ARIA-correct so screen readers announce
37
+ * "recording" vs "start dictation".
38
+ */
39
+ export function DictationButton({
40
+ status,
41
+ onClick,
42
+ isSupported = true,
43
+ size = 'md',
44
+ className,
45
+ style,
46
+ ariaLabel,
47
+ idleIcon,
48
+ listeningIcon,
49
+ disabled,
50
+ }: DictationButtonProps): React.ReactElement {
51
+ const listening = status === 'listening' || status === 'starting';
52
+ const stopping = status === 'stopping';
53
+ const off = !isSupported;
54
+
55
+ return (
56
+ <button
57
+ type="button"
58
+ onClick={onClick}
59
+ disabled={disabled || off}
60
+ aria-pressed={listening}
61
+ aria-label={
62
+ ariaLabel ?? (listening ? 'Stop dictation' : off ? 'Dictation not supported' : 'Start dictation')
63
+ }
64
+ className={cn(
65
+ 'relative inline-flex items-center justify-center rounded-full transition-colors',
66
+ 'focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2',
67
+ 'disabled:cursor-not-allowed disabled:opacity-50',
68
+ SIZE_CLS[size],
69
+ listening
70
+ ? 'bg-destructive text-destructive-foreground hover:bg-destructive/90'
71
+ : 'bg-primary text-primary-foreground hover:bg-primary/90',
72
+ className,
73
+ )}
74
+ style={style}
75
+ >
76
+ {listening && (
77
+ <span
78
+ aria-hidden
79
+ className="absolute inset-0 rounded-full bg-destructive/40 animate-ping"
80
+ />
81
+ )}
82
+ {stopping ? (
83
+ <Loader2 className="animate-spin" />
84
+ ) : off ? (
85
+ listeningIcon ?? <MicOff />
86
+ ) : listening ? (
87
+ listeningIcon ?? <Mic />
88
+ ) : (
89
+ idleIcon ?? <Mic />
90
+ )}
91
+ </button>
92
+ );
93
+ }