@djangocfg/ui-tools 2.1.381 → 2.1.383

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (178) hide show
  1. package/README.md +132 -899
  2. package/dist/ChatRoot-6IZFM5HM.mjs +5 -0
  3. package/dist/{ChatRoot-EJC5Y2YM.cjs.map → ChatRoot-6IZFM5HM.mjs.map} +1 -1
  4. package/dist/ChatRoot-LW4XNIKP.cjs +14 -0
  5. package/dist/{ChatRoot-QOSKJPM6.mjs.map → ChatRoot-LW4XNIKP.cjs.map} +1 -1
  6. package/dist/DictationField-U25MEYAL.mjs +4 -0
  7. package/dist/DictationField-U25MEYAL.mjs.map +1 -0
  8. package/dist/DictationField-XWR5VOID.cjs +13 -0
  9. package/dist/DictationField-XWR5VOID.cjs.map +1 -0
  10. package/dist/{DocsLayout-2YKPXZYO.mjs → DocsLayout-2P3ONDWJ.mjs} +3 -3
  11. package/dist/{DocsLayout-2YKPXZYO.mjs.map → DocsLayout-2P3ONDWJ.mjs.map} +1 -1
  12. package/dist/{DocsLayout-Q4KS3QWW.cjs → DocsLayout-2YZNS5VK.cjs} +8 -8
  13. package/dist/{DocsLayout-Q4KS3QWW.cjs.map → DocsLayout-2YZNS5VK.cjs.map} +1 -1
  14. package/dist/chunk-4PFW7MIJ.cjs +837 -0
  15. package/dist/chunk-4PFW7MIJ.cjs.map +1 -0
  16. package/dist/chunk-C2YN6WEO.mjs +833 -0
  17. package/dist/chunk-C2YN6WEO.mjs.map +1 -0
  18. package/dist/{chunk-XACCHZH2.cjs → chunk-FIRK5CEH.cjs} +42 -4
  19. package/dist/chunk-FIRK5CEH.cjs.map +1 -0
  20. package/dist/{chunk-NWUT327A.mjs → chunk-HIK6BPL7.mjs} +38 -5
  21. package/dist/chunk-HIK6BPL7.mjs.map +1 -0
  22. package/dist/chunk-OZAU3QWD.cjs +2493 -0
  23. package/dist/chunk-OZAU3QWD.cjs.map +1 -0
  24. package/dist/chunk-UWVP6LCW.mjs +2447 -0
  25. package/dist/chunk-UWVP6LCW.mjs.map +1 -0
  26. package/dist/index.cjs +1668 -99
  27. package/dist/index.cjs.map +1 -1
  28. package/dist/index.d.cts +1215 -107
  29. package/dist/index.d.ts +1215 -107
  30. package/dist/index.mjs +1555 -50
  31. package/dist/index.mjs.map +1 -1
  32. package/package.json +16 -15
  33. package/src/audio-assets.d.ts +8 -0
  34. package/src/components/markdown/MarkdownMessage/CollapseToggle.tsx +3 -1
  35. package/src/components/markdown/MarkdownMessage/components.tsx +2 -5
  36. package/src/tools/Chat/README.md +347 -530
  37. package/src/tools/Chat/components/Attachments.tsx +6 -1
  38. package/src/tools/Chat/components/ChatRoot.tsx +30 -2
  39. package/src/tools/Chat/components/Composer.tsx +20 -3
  40. package/src/tools/Chat/components/ErrorBanner.tsx +7 -3
  41. package/src/tools/Chat/components/MessageActions.tsx +3 -1
  42. package/src/tools/Chat/components/MessageBubble.tsx +6 -5
  43. package/src/tools/Chat/components/MessageList.tsx +87 -1
  44. package/src/tools/Chat/components/ToolCalls.tsx +21 -3
  45. package/src/tools/Chat/context/ChatProvider.tsx +21 -3
  46. package/src/tools/Chat/core/audio/audioBus.ts +10 -163
  47. package/src/tools/Chat/core/audio/defaults.ts +43 -0
  48. package/src/tools/Chat/core/audio/index.ts +1 -0
  49. package/src/tools/Chat/core/audio/preferences.ts +5 -59
  50. package/src/tools/Chat/core/audio/sounds/error.mp3 +0 -0
  51. package/src/tools/Chat/core/audio/sounds/mention.mp3 +0 -0
  52. package/src/tools/Chat/core/audio/sounds/notification.mp3 +0 -0
  53. package/src/tools/Chat/core/audio/sounds/received.mp3 +0 -0
  54. package/src/tools/Chat/core/audio/sounds/sent.mp3 +0 -0
  55. package/src/tools/Chat/core/audio/sounds/start.mp3 +0 -0
  56. package/src/tools/Chat/core/audio/types.ts +28 -0
  57. package/src/tools/Chat/core/reducer.ts +33 -0
  58. package/src/tools/Chat/core/transport/index.ts +13 -0
  59. package/src/tools/Chat/core/transport/mappers/index.ts +6 -0
  60. package/src/tools/Chat/core/transport/mappers/pydantic-ai.ts +142 -0
  61. package/src/tools/Chat/core/transport/pydantic-ai-transport.ts +208 -0
  62. package/src/tools/Chat/core/transport/sse.ts +18 -5
  63. package/src/tools/Chat/hooks/index.ts +25 -0
  64. package/src/tools/Chat/hooks/useAutoFocusOnStreamEnd.ts +5 -3
  65. package/src/tools/Chat/hooks/useChat.ts +28 -0
  66. package/src/tools/Chat/hooks/useChatAudio.ts +59 -180
  67. package/src/tools/Chat/hooks/useChatDockPrefs.ts +74 -0
  68. package/src/tools/Chat/hooks/useChatReset.ts +70 -0
  69. package/src/tools/Chat/hooks/useChatUnread.ts +87 -0
  70. package/src/tools/Chat/hooks/useFocusOnEmptyClick.ts +111 -0
  71. package/src/tools/Chat/hooks/useVisitorFingerprint.ts +48 -0
  72. package/src/tools/Chat/index.ts +84 -1
  73. package/src/tools/Chat/launcher/ChatDock.tsx +263 -0
  74. package/src/tools/Chat/launcher/ChatFAB.tsx +349 -0
  75. package/src/tools/Chat/launcher/ChatGreeting.tsx +200 -0
  76. package/src/tools/Chat/launcher/ChatHeader.tsx +76 -0
  77. package/src/tools/Chat/launcher/ChatHeaderActionButton.tsx +87 -0
  78. package/src/tools/Chat/launcher/ChatHeaderAudioToggle.tsx +47 -0
  79. package/src/tools/Chat/launcher/ChatHeaderLanguageButton.tsx +179 -0
  80. package/src/tools/Chat/launcher/ChatHeaderModeToggle.tsx +57 -0
  81. package/src/tools/Chat/launcher/ChatHeaderResetButton.tsx +93 -0
  82. package/src/tools/Chat/launcher/ChatLauncher.tsx +321 -0
  83. package/src/tools/Chat/launcher/ChatUnreadPreview.tsx +197 -0
  84. package/src/tools/Chat/launcher/index.ts +46 -0
  85. package/src/tools/Chat/launcher/useChatPresence.ts +44 -0
  86. package/src/tools/Chat/styles/bubbleTokens.ts +71 -0
  87. package/src/tools/Chat/styles/index.ts +16 -0
  88. package/src/tools/Chat/styles/useChatStyles.ts +101 -0
  89. package/src/tools/Chat/types/attachment.ts +25 -0
  90. package/src/tools/Chat/types/config.ts +48 -0
  91. package/src/tools/Chat/types/events.ts +35 -0
  92. package/src/tools/Chat/types/index.ts +34 -0
  93. package/src/tools/Chat/types/labels.ts +38 -0
  94. package/src/tools/Chat/types/message.ts +32 -0
  95. package/src/tools/Chat/types/persona.ts +31 -0
  96. package/src/tools/Chat/types/session.ts +43 -0
  97. package/src/tools/Chat/types/tool-call.ts +17 -0
  98. package/src/tools/Chat/types/transport.ts +28 -0
  99. package/src/tools/Chat/types.ts +5 -240
  100. package/src/tools/MarkdownEditor/MarkdownEditor.tsx +50 -14
  101. package/src/tools/MarkdownEditor/index.ts +1 -1
  102. package/src/tools/SpeechRecognition/README.md +336 -0
  103. package/src/tools/SpeechRecognition/__tests__/ids.test.ts +15 -0
  104. package/src/tools/SpeechRecognition/__tests__/language.test.ts +59 -0
  105. package/src/tools/SpeechRecognition/__tests__/reducer.test.ts +71 -0
  106. package/src/tools/SpeechRecognition/__tests__/transcript.test.ts +52 -0
  107. package/src/tools/SpeechRecognition/components/DevicePicker.tsx +49 -0
  108. package/src/tools/SpeechRecognition/components/DictationButton.tsx +93 -0
  109. package/src/tools/SpeechRecognition/components/EngineBadge.tsx +30 -0
  110. package/src/tools/SpeechRecognition/components/ErrorBanner.tsx +52 -0
  111. package/src/tools/SpeechRecognition/components/LanguagePicker.tsx +63 -0
  112. package/src/tools/SpeechRecognition/components/MicMeter.tsx +63 -0
  113. package/src/tools/SpeechRecognition/components/PushToTalkHint.tsx +51 -0
  114. package/src/tools/SpeechRecognition/components/TranscriptView.tsx +55 -0
  115. package/src/tools/SpeechRecognition/components/index.ts +16 -0
  116. package/src/tools/SpeechRecognition/context/SpeechRecognitionProvider.tsx +47 -0
  117. package/src/tools/SpeechRecognition/context/index.ts +6 -0
  118. package/src/tools/SpeechRecognition/core/audio/defaults.ts +24 -0
  119. package/src/tools/SpeechRecognition/core/engine/external.ts +222 -0
  120. package/src/tools/SpeechRecognition/core/engine/http.ts +147 -0
  121. package/src/tools/SpeechRecognition/core/engine/index.ts +52 -0
  122. package/src/tools/SpeechRecognition/core/engine/mediarecorder.ts +105 -0
  123. package/src/tools/SpeechRecognition/core/engine/websocket.ts +211 -0
  124. package/src/tools/SpeechRecognition/core/engine/webspeech.ts +188 -0
  125. package/src/tools/SpeechRecognition/core/ids.ts +11 -0
  126. package/src/tools/SpeechRecognition/core/index.ts +14 -0
  127. package/src/tools/SpeechRecognition/core/language.ts +78 -0
  128. package/src/tools/SpeechRecognition/core/languages-catalog.ts +229 -0
  129. package/src/tools/SpeechRecognition/core/logger.ts +3 -0
  130. package/src/tools/SpeechRecognition/core/reducer.ts +105 -0
  131. package/src/tools/SpeechRecognition/core/transcript.ts +36 -0
  132. package/src/tools/SpeechRecognition/hooks/index.ts +14 -0
  133. package/src/tools/SpeechRecognition/hooks/useDictation.ts +59 -0
  134. package/src/tools/SpeechRecognition/hooks/useEnginePrefs.ts +15 -0
  135. package/src/tools/SpeechRecognition/hooks/useMicDevices.ts +57 -0
  136. package/src/tools/SpeechRecognition/hooks/useMicLevel.ts +52 -0
  137. package/src/tools/SpeechRecognition/hooks/usePushToTalk.ts +85 -0
  138. package/src/tools/SpeechRecognition/hooks/useResolvedLanguage.ts +28 -0
  139. package/src/tools/SpeechRecognition/hooks/useSpeechLanguageInfo.ts +108 -0
  140. package/src/tools/SpeechRecognition/hooks/useSpeechRecognition.ts +188 -0
  141. package/src/tools/SpeechRecognition/hooks/useVoiceSupport.ts +78 -0
  142. package/src/tools/SpeechRecognition/index.ts +82 -0
  143. package/src/tools/SpeechRecognition/lazy.tsx +19 -0
  144. package/src/tools/SpeechRecognition/store/index.ts +2 -0
  145. package/src/tools/SpeechRecognition/store/prefsStore.ts +54 -0
  146. package/src/tools/SpeechRecognition/types.ts +133 -0
  147. package/src/tools/SpeechRecognition/widgets/DictationField.tsx +105 -0
  148. package/src/tools/SpeechRecognition/widgets/VoiceComposerSlot.tsx +305 -0
  149. package/src/tools/SpeechRecognition/widgets/VoiceMessageRecorder.tsx +88 -0
  150. package/src/tools/SpeechRecognition/widgets/index.ts +6 -0
  151. package/dist/ChatRoot-EJC5Y2YM.cjs +0 -14
  152. package/dist/ChatRoot-QOSKJPM6.mjs +0 -5
  153. package/dist/chunk-NWUT327A.mjs.map +0 -1
  154. package/dist/chunk-QLMKCSR6.mjs +0 -2420
  155. package/dist/chunk-QLMKCSR6.mjs.map +0 -1
  156. package/dist/chunk-SI5RD2GD.cjs +0 -2460
  157. package/dist/chunk-SI5RD2GD.cjs.map +0 -1
  158. package/dist/chunk-XACCHZH2.cjs.map +0 -1
  159. package/src/components/markdown/MarkdownMessage/MarkdownMessage.story.tsx +0 -771
  160. package/src/stories/index.ts +0 -33
  161. package/src/tools/AudioPlayer/AudioPlayer.story.tsx +0 -481
  162. package/src/tools/Chat/Chat.story.tsx +0 -1457
  163. package/src/tools/CodeEditor/CodeEditor.story.tsx +0 -202
  164. package/src/tools/CronScheduler/CronScheduler.story.tsx +0 -300
  165. package/src/tools/Gallery/Gallery.story.tsx +0 -237
  166. package/src/tools/ImageViewer/ImageViewer.story.tsx +0 -85
  167. package/src/tools/JsonForm/JsonForm.story.tsx +0 -350
  168. package/src/tools/JsonTree/JsonTree.story.tsx +0 -141
  169. package/src/tools/LottiePlayer/LottiePlayer.story.tsx +0 -95
  170. package/src/tools/Map/Map.story.tsx +0 -458
  171. package/src/tools/MarkdownEditor/MarkdownEditor.story.tsx +0 -225
  172. package/src/tools/Mermaid/Mermaid.story.tsx +0 -251
  173. package/src/tools/OpenapiViewer/OpenapiViewer.story.tsx +0 -230
  174. package/src/tools/PrettyCode/PrettyCode.story.tsx +0 -304
  175. package/src/tools/Tour/Tour.story.tsx +0 -279
  176. package/src/tools/Tree/Tree.story.tsx +0 -620
  177. package/src/tools/Uploader/Uploader.story.tsx +0 -415
  178. package/src/tools/VideoPlayer/VideoPlayer.story.tsx +0 -87
@@ -6,7 +6,7 @@ import Placeholder from '@tiptap/extension-placeholder';
6
6
  import Mention from '@tiptap/extension-mention';
7
7
  import { Markdown } from '@tiptap/markdown';
8
8
  import type { AnyExtension } from '@tiptap/core';
9
- import { useEffect, useRef, useMemo } from 'react';
9
+ import { forwardRef, useEffect, useImperativeHandle, useMemo, useRef } from 'react';
10
10
  import {
11
11
  Bold, Italic, Strikethrough, Heading1, Heading2, Heading3,
12
12
  List, ListOrdered, Quote, Minus, Code, type LucideIcon,
@@ -90,20 +90,39 @@ export interface MarkdownEditorProps {
90
90
  onSubmit?: () => boolean | void;
91
91
  }
92
92
 
93
+ /**
94
+ * Imperative handle exposed via `ref`. Matches `ComposerHandle` from
95
+ * `@djangocfg/ui-tools/chat` so consumers can forward it straight into
96
+ * `useRegisterComposer({ focus, moveCursorToEnd })` — that's what makes
97
+ * voice dictation (`VoiceComposerSlot`) push live text into a TipTap
98
+ * composer.
99
+ */
100
+ export interface MarkdownEditorHandle {
101
+ /** Move keyboard focus into the editor. */
102
+ focus: () => void;
103
+ /** Place the caret at the end of the document (and focus). */
104
+ moveCursorToEnd: () => void;
105
+ /** Escape hatch — the underlying TipTap `Editor` instance. */
106
+ getEditor: () => Editor | null;
107
+ }
108
+
93
109
  // ── Component ──
94
110
 
95
- export function MarkdownEditor({
96
- value,
97
- onChange,
98
- placeholder = 'Write markdown...',
99
- minHeight = 120,
100
- className = '',
101
- disabled = false,
102
- showToolbar = true,
103
- mentions,
104
- onMentionIdsChange,
105
- onSubmit,
106
- }: MarkdownEditorProps) {
111
+ export const MarkdownEditor = forwardRef<MarkdownEditorHandle, MarkdownEditorProps>(function MarkdownEditor(
112
+ {
113
+ value,
114
+ onChange,
115
+ placeholder = 'Write markdown...',
116
+ minHeight = 120,
117
+ className = '',
118
+ disabled = false,
119
+ showToolbar = true,
120
+ mentions,
121
+ onMentionIdsChange,
122
+ onSubmit,
123
+ },
124
+ ref,
125
+ ) {
107
126
  // Keep the latest onSubmit in a ref so the Tiptap extension's
108
127
  // keymap closure always calls the freshest handler — Tiptap's
109
128
  // useEditor initialises extensions ONCE on first render. Without
@@ -244,6 +263,23 @@ export function MarkdownEditor({
244
263
  }
245
264
  }, [value, editor]);
246
265
 
266
+ // Imperative API for hosts that drive the editor without owning a
267
+ // TipTap ref directly — chat composer registration, voice slot,
268
+ // focus-on-stream-end.
269
+ useImperativeHandle(
270
+ ref,
271
+ (): MarkdownEditorHandle => ({
272
+ focus: () => {
273
+ editor?.commands.focus();
274
+ },
275
+ moveCursorToEnd: () => {
276
+ editor?.commands.focus('end');
277
+ },
278
+ getEditor: () => editor ?? null,
279
+ }),
280
+ [editor],
281
+ );
282
+
247
283
  const wrapperClass = `markdown-editor rounded-md border border-input bg-background ${disabled ? 'opacity-60' : ''} ${className}`.trim();
248
284
 
249
285
  return (
@@ -254,7 +290,7 @@ export function MarkdownEditor({
254
290
  </div>
255
291
  </div>
256
292
  );
257
- }
293
+ });
258
294
 
259
295
  // ── Toolbar ──
260
296
 
@@ -1,5 +1,5 @@
1
1
  export { MarkdownEditor } from './MarkdownEditor';
2
- export type { MarkdownEditorProps } from './MarkdownEditor';
2
+ export type { MarkdownEditorProps, MarkdownEditorHandle } from './MarkdownEditor';
3
3
  export type {
4
4
  MentionItem,
5
5
  MentionConfig,
@@ -0,0 +1,336 @@
1
+ # SpeechRecognition
2
+
3
+ Decomposed Speech-to-Text for the React app. **Headless core + composable UI parts + lazy bundle**, just like [`Chat`](../Chat) and [`AudioPlayer`](../AudioPlayer).
4
+
5
+ The default backend is the browser's native Web Speech API (zero deps, zero network). For anything else — Deepgram, AssemblyAI, OpenAI Whisper, your own Django/FastAPI gateway — plug a custom engine into the same hook. No SDK lock-in.
6
+
7
+ ```bash
8
+ pnpm add @djangocfg/ui-tools
9
+ ```
10
+
11
+ Subpath import (recommended — keeps the rest of `ui-tools` out of your bundle):
12
+
13
+ ```ts
14
+ import {
15
+ useSpeechRecognition,
16
+ DictationField,
17
+ createWebSpeechEngine,
18
+ createHttpEngine,
19
+ createWebSocketEngine,
20
+ } from '@djangocfg/ui-tools/speech-recognition';
21
+ ```
22
+
23
+ ---
24
+
25
+ ## Quick start
26
+
27
+ ```tsx
28
+ import {
29
+ DictationButton,
30
+ TranscriptView,
31
+ useSpeechRecognition,
32
+ } from '@djangocfg/ui-tools/speech-recognition';
33
+
34
+ function Dictate() {
35
+ const rec = useSpeechRecognition(); // Web Speech engine, browser language
36
+ return (
37
+ <div className="flex items-start gap-3">
38
+ <DictationButton status={rec.status} onClick={() => rec.toggle()} />
39
+ <TranscriptView transcript={rec.transcript} />
40
+ </div>
41
+ );
42
+ }
43
+ ```
44
+
45
+ That's the whole "make me type with my voice" flow. With no config, the hook uses `createWebSpeechEngine()` and the language stored in `useSpeechPrefs` (defaults to `navigator.language`).
46
+
47
+ ---
48
+
49
+ ## DictationField — the opinionated widget
50
+
51
+ A textarea + mic button + interim ghost + push-to-talk hint, all wired up. Final segments are appended to the controlled `value`.
52
+
53
+ ```tsx
54
+ import { DictationField } from '@djangocfg/ui-tools/speech-recognition';
55
+
56
+ const [text, setText] = useState('');
57
+
58
+ <DictationField
59
+ value={text}
60
+ onChange={setText}
61
+ language="ru-RU"
62
+ pushToTalk={{ key: 'alt' }}
63
+ placeholder="Type or hold ⌥ to talk…"
64
+ />
65
+ ```
66
+
67
+ For voice-memo flows there's `VoiceMessageRecorder`: press the mic, dictate freely, silence-detection or 60-second cap triggers `onSubmit(text, segments)`.
68
+
69
+ ---
70
+
71
+ ## Custom engines — the whole point
72
+
73
+ `useSpeechRecognition` doesn't care **how** audio becomes text. The `RecognitionEngine` interface is small enough to implement against any backend.
74
+
75
+ ### HTTP (Whisper, custom REST)
76
+
77
+ ```ts
78
+ import { createHttpEngine } from '@djangocfg/ui-tools/speech-recognition';
79
+
80
+ const engine = createHttpEngine({
81
+ url: '/api/stt/transcribe',
82
+ headers: async () => ({ Authorization: `Bearer ${token}` }),
83
+ chunkMs: 750,
84
+ parse: async (resp) => {
85
+ const { text, final } = await resp.json();
86
+ return { text, isFinal: final };
87
+ },
88
+ });
89
+
90
+ const rec = useSpeechRecognition({ engine });
91
+ ```
92
+
93
+ Captures audio with `MediaRecorder` (Opus/WebM by default), POSTs each chunk as the request body, runs your `parse` callback on the response.
94
+
95
+ ### External (Wails / Tauri / native sidecar)
96
+
97
+ When the host owns the entire pipeline — capture happens outside the browser, transcription runs on the backend, the frontend just commands "start" / "stop" — use `createExternalEngine`. Perfect for cmdop's Wails whisper.cpp integration.
98
+
99
+ ```ts
100
+ import { createExternalEngine } from '@djangocfg/ui-tools/speech-recognition';
101
+ import { EventsOn } from '@runtime';
102
+ import * as VoiceService from '@bindings/desktop/services/voice/service';
103
+
104
+ const wailsEngine = createExternalEngine({
105
+ id: 'wails-whisper',
106
+ onStart: () => VoiceService.StartRecordingForChat(),
107
+ onStop: () => VoiceService.StopRecordingForChat(),
108
+ subscribe: (handle) => {
109
+ const offText = EventsOn('voice:chat-text', (p) => {
110
+ if (p?.error) handle.emitError({ code: 'engine', message: p.error });
111
+ else if (p?.text) handle.emitFinal(p.text);
112
+ else handle.emitError({ code: 'no-speech', message: '' });
113
+ });
114
+ const offState = EventsOn('voice:state', (s) => {
115
+ if (s.state === 'recording' || s.state === 'streaming') handle.markListening();
116
+ if (s.partial) handle.emitPartial(s.partial);
117
+ });
118
+ return () => { offText(); offState(); };
119
+ },
120
+ });
121
+
122
+ <VoiceComposerSlot engine={wailsEngine} value={composer.value} onChange={composer.setValue} />
123
+ ```
124
+
125
+ No `MediaRecorder` / `getUserMedia` — the engine is purely a translator between the chat UI and your event bus. `emitFinal` automatically closes the session, so the composer reset / autosend logic fires the moment the backend posts a result.
126
+
127
+ ### WebSocket (Deepgram / AssemblyAI / custom realtime)
128
+
129
+ ```ts
130
+ import { createWebSocketEngine } from '@djangocfg/ui-tools/speech-recognition';
131
+
132
+ const engine = createWebSocketEngine({
133
+ url: async () => {
134
+ const { token } = await fetch('/api/stt/ticket').then((r) => r.json());
135
+ return `wss://stt.example.com/listen?token=${token}`;
136
+ },
137
+ chunkMs: 250,
138
+ parseMessage: (data) => {
139
+ if (typeof data !== 'string') return { kind: 'ignore' };
140
+ const msg = JSON.parse(data);
141
+ if (msg.type === 'Results') {
142
+ return msg.is_final
143
+ ? { kind: 'final', text: msg.channel.alternatives[0].transcript }
144
+ : { kind: 'partial', text: msg.channel.alternatives[0].transcript };
145
+ }
146
+ return { kind: 'ignore' };
147
+ },
148
+ });
149
+ ```
150
+
151
+ Reconnect with exponential backoff (250 ms → 5 s) is built in. Tokens go through a `url()` callback so they can be minted server-side and rotated per session.
152
+
153
+ ### Anything else
154
+
155
+ Implement `RecognitionEngine` directly — on-device Whisper WASM, Picovoice, native bridges from Tauri / Electron, mocked engines for tests. The interface:
156
+
157
+ ```ts
158
+ interface RecognitionEngine {
159
+ id: string;
160
+ isSupported: boolean;
161
+ start(opts: EngineStartOptions): Promise<void>;
162
+ stop(): Promise<void>;
163
+ abort(): void;
164
+ on(event, cb): Unsub; // 'partial' | 'final' | 'error' | 'state'
165
+ getStream?(): MediaStream | null; // optional — for VU meters
166
+ }
167
+ ```
168
+
169
+ `createEngineBus()` gives you the listener bookkeeping in three lines.
170
+
171
+ ---
172
+
173
+ ## Voice inside the Chat composer
174
+
175
+ Two drop-ins, designed to live together:
176
+
177
+ ```tsx
178
+ import { ChatRoot } from '@djangocfg/ui-tools/chat';
179
+ import {
180
+ ChatHeaderLanguageButton,
181
+ VoiceComposerSlot,
182
+ } from '@djangocfg/ui-tools/speech-recognition';
183
+
184
+ <ChatRoot
185
+ transport={transport}
186
+ composerToolbarEnd={<VoiceComposerSlot />}
187
+ />
188
+
189
+ // Header flag-picker is added via ChatLauncher dock slot:
190
+ <ChatLauncher dock={{ headerActions: <ChatHeaderLanguageButton /> }}>
191
+ ```
192
+
193
+ That's it. No props, no refs. The slot reads / writes the composer through the `ComposerHandle` registered in `ChatProvider` (`focus / moveCursorToEnd / getValue / setValue`), so the built-in `<Composer>` and a TipTap-backed `MarkdownEditor` work the same way — host implements `useRegisterComposer({...})` once and voice flows in.
194
+
195
+ What you get without writing it yourself:
196
+
197
+ - **Anchored merge.** The text typed before pressing the mic is preserved; dictation is appended to that anchor.
198
+ - **Live focus + cursor pinning.** On start, the composer is focused and the caret jumps to end; every partial / final repins the caret so the live transcript visibly grows where the user expects.
199
+ - **Auto-hide.** `useVoiceSupport()` checks `engine.isSupported` + `getUserMedia` + browser type (Firefox / Instagram / TikTok WebViews → renders `null`).
200
+ - **Countdown chip + tooltip.** A `useCountdownFromSeconds()` ticker (max 90 s default) sits next to the mic button.
201
+ - **Silence stop.** Auto-stop after 2.5 s of quiet (configurable via `silenceMs`).
202
+ - **Esc / Enter hotkeys while listening.** Esc cancels (and `stopPropagation` so the chat doesn't close), Enter finishes recording (and **does not** submit the chat — avoids accidental sends mid-sentence).
203
+ - **Earcons.** Bundled start (low chime) + stop (short tick) reused from chat sounds, both at deliberately quiet volumes. Override via `sounds={{ start, stop }}` or disable with `sounds={false}`.
204
+
205
+ The explicit `value` / `onChange` form is still supported for standalone usage outside a `<ChatProvider>`:
206
+
207
+ ```tsx
208
+ <VoiceComposerSlot value={value} onChange={setValue} />
209
+ ```
210
+
211
+ ### Language picker — flag button in the chat header
212
+
213
+ ```tsx
214
+ <ChatHeader actions={<ChatHeaderLanguageButton />} />
215
+ ```
216
+
217
+ Compact 28×28 flag button. Shows the currently-resolved language's country flag (🇷🇺 for `ru-RU`, 🇺🇸 for `en-US`). Clicking opens a searchable `<Combobox>` with **66 BCP-47 tags from the official Chrome Web Speech demo** (`WEB_SPEECH_LANGUAGES` catalogue) — language name + region + tag, every row with a country flag, search across all three fields. Choice persists in `useSpeechPrefs`.
218
+
219
+ ### Shared state across the tree
220
+
221
+ Need to react to listening state elsewhere (dim textarea, header indicator)? Wrap the chat in `<SpeechRecognitionProvider>` and read `useSpeechRecognitionContext()` from any descendant.
222
+
223
+ ### Reading the active language from elsewhere
224
+
225
+ Speech language is **persisted independently** of the app's i18n locale (`djangocfg-stt:prefs` in localStorage). Read it from any component:
226
+
227
+ ```tsx
228
+ import {
229
+ useSpeechPrefs, // raw user choice — `string | null`
230
+ useResolvedLanguage, // resolved BCP-47 with full fallback chain
231
+ useSpeechLanguageInfo, // combo: { tag, iso, country, name, englishName, region, hasUserChoice }
232
+ } from '@djangocfg/ui-tools/speech-recognition';
233
+
234
+ function HeaderBadge() {
235
+ const { tag, name, country, hasUserChoice } = useSpeechLanguageInfo();
236
+ return (
237
+ <Badge>
238
+ <Flag countryCode={country} />
239
+ {name ?? tag}
240
+ {hasUserChoice && <span className="ml-1">★</span>}
241
+ </Badge>
242
+ );
243
+ }
244
+ ```
245
+
246
+ Push to backend on every change:
247
+
248
+ ```tsx
249
+ const { tag, hasUserChoice } = useSpeechLanguageInfo();
250
+ useEffect(() => {
251
+ if (!hasUserChoice) return;
252
+ void api.user.update({ speechLanguage: tag });
253
+ }, [tag, hasUserChoice]);
254
+ ```
255
+
256
+ Outside React (event handlers, util functions, non-component code):
257
+
258
+ ```ts
259
+ import { useSpeechPrefs } from '@djangocfg/ui-tools/speech-recognition';
260
+ const current = useSpeechPrefs.getState().language; // 'ru-RU' | null
261
+ const unsubscribe = useSpeechPrefs.subscribe((state) => {
262
+ console.log('language changed', state.language);
263
+ });
264
+ ```
265
+
266
+ ---
267
+
268
+ ## What you get for free
269
+
270
+ - **Zero-setup default** — `useSpeechRecognition()` works with no engine, no config.
271
+ - **Permission-aware UX** — `permission-denied` / `no-microphone` / `no-speech` surface as typed errors; `<ErrorBanner>` translates them.
272
+ - **Persisted prefs** — language, mic device, engine choice live in zustand+localStorage (`djangocfg-stt:prefs`).
273
+ - **Auto-stop** — `autoStop: { silenceMs, maxMs, silenceThreshold }` based on RMS analyser; opt-in.
274
+ - **Push-to-talk** — `usePushToTalk({ key: 'mod+alt' })` with smart input-field bypass.
275
+ - **VU meter** — `useMicLevel(stream)` + `<MicMeter />` for level visualisation.
276
+ - **Mic enumeration** — `useMicDevices()` returns `audioinput` list, refreshes on `devicechange`.
277
+ - **Interim+final UI** — `<TranscriptView>` dims the trailing interim chunk so users see the model "thinking".
278
+
279
+ ---
280
+
281
+ ## Public surface
282
+
283
+ ### Hooks
284
+ `useSpeechRecognition`, `useDictation`, `usePushToTalk`, `useMicDevices`, `useMicLevel`, `useEnginePrefs`, `useSpeechPrefs`, `useVoiceSupport`, `useResolvedLanguage`, `useSpeechLanguageInfo`.
285
+
286
+ ### Context
287
+ `SpeechRecognitionProvider`, `useSpeechRecognitionContext`, `useSpeechRecognitionContextOptional` — lift a single engine instance so any descendant (composer slot, header badge, transcript overlay) sees the same `status` / `transcript` / `level`.
288
+
289
+ ### Components
290
+ `DictationButton`, `MicMeter`, `TranscriptView`, `LanguagePicker`, `DevicePicker`, `EngineBadge`, `ErrorBanner`, `PushToTalkHint`. Chat header: `ChatHeaderLanguageButton` (re-exported from chat launcher).
291
+
292
+ ### Widgets
293
+ `DictationField`, `VoiceMessageRecorder`, `VoiceComposerSlot`, `LazyDictationField`.
294
+
295
+ ### Engines
296
+ `createWebSpeechEngine`, `createHttpEngine`, `createWebSocketEngine`, `createExternalEngine`, `createEngineBus`, `startMicCapture`, `pickMime`.
297
+
298
+ ### Language utilities
299
+ `WEB_SPEECH_LANGUAGES` (catalogue of 66 supported BCP-47 tags from the Chrome demo), `WEB_SPEECH_TAGS` (flat array), `findSpeechLanguage(tag)`, `countryFromTag(tag)`, `toBCP47(iso)`, `resolveSpeechLanguage({ explicit, prefs, i18n })`, `DEFAULT_ISO_TO_BCP47`, `DEFAULT_VOICE_SOUNDS`.
300
+
301
+ ### Types
302
+ `RecognitionEngine`, `RecognitionStatus`, `RecognitionError`, `RecognitionErrorCode`, `Segment`, `Transcript`, `EngineState`, `EngineStartOptions`, `EngineEventMap`, `Unsub`, `AutoStopOptions`, `VoiceSupport`, `VoiceUnsupportedReason`.
303
+
304
+ ---
305
+
306
+ ## Tests
307
+
308
+ ```bash
309
+ pnpm test # one-shot
310
+ pnpm test:watch # vitest watch mode
311
+ ```
312
+
313
+ Covered (12 cases, all pure-function): reducer state machine (`__tests__/reducer.test.ts`), transcript merge + `normaliseFinal` (`__tests__/transcript.test.ts`), `newSegmentId` (`__tests__/ids.test.ts`). Engine adapters and UI parts rely on stories — `MediaRecorder` / `getUserMedia` / `WebSocket` are mock-engine-driven in the playground.
314
+
315
+ ---
316
+
317
+ ## Stories
318
+
319
+ `Tools/SpeechRecognition/{Basic, DictationField, PushToTalk, MicMeter, CustomEngine: HTTP, CustomEngine: WebSocket, Language & Device, Errors}` plus `Tools/Chat/Voice composer` for the chat-slot integration — all driven by a deterministic mock engine so the playground never asks for microphone permission.
320
+
321
+ ```bash
322
+ pnpm playground
323
+ ```
324
+
325
+ ---
326
+
327
+ ## Browser support
328
+
329
+ | Browser | Default engine | Notes |
330
+ |---|---|---|
331
+ | Chrome / Edge desktop | ✅ Web Speech | Best — continuous + interim results. |
332
+ | Safari 16+ desktop | ✅ Web Speech | Continuous works; some locales partial only. |
333
+ | Firefox desktop | ❌ Web Speech | `isSupported === false`. Pass a custom engine (HTTP/WS). |
334
+ | Mobile WebViews | ⚠️ varies | Always pair with a fallback engine in production. |
335
+
336
+ For Firefox / WebView consumers: pass `engine: createHttpEngine(...)` and you're streaming again.
@@ -0,0 +1,15 @@
1
+ import { describe, expect, it } from 'vitest';
2
+
3
+ import { newSegmentId } from '../core/ids';
4
+
5
+ describe('newSegmentId', () => {
6
+ it('produces unique values across calls', () => {
7
+ const ids = new Set<string>();
8
+ for (let i = 0; i < 200; i += 1) ids.add(newSegmentId());
9
+ expect(ids.size).toBe(200);
10
+ });
11
+
12
+ it('matches the seg_<time>_<n> shape', () => {
13
+ expect(newSegmentId()).toMatch(/^seg_[a-z0-9]+_[a-z0-9]+$/);
14
+ });
15
+ });
@@ -0,0 +1,59 @@
1
+ import { describe, expect, it } from 'vitest';
2
+
3
+ import { resolveSpeechLanguage, toBCP47 } from '../core/language';
4
+
5
+ describe('toBCP47', () => {
6
+ it('maps known ISO-2 codes to canonical BCP-47', () => {
7
+ expect(toBCP47('en')).toBe('en-US');
8
+ expect(toBCP47('ru')).toBe('ru-RU');
9
+ expect(toBCP47('ko')).toBe('ko-KR');
10
+ expect(toBCP47('pt')).toBe('pt-BR');
11
+ expect(toBCP47('no')).toBe('nb-NO');
12
+ });
13
+
14
+ it('falls back to <code>-<UPPER(code)> for unmapped ISO codes', () => {
15
+ expect(toBCP47('uk')).toBe('uk-UK');
16
+ expect(toBCP47('cs')).toBe('cs-CS');
17
+ });
18
+
19
+ it('passes BCP-47 input through unchanged', () => {
20
+ expect(toBCP47('en-GB')).toBe('en-GB');
21
+ expect(toBCP47('zh-TW')).toBe('zh-TW');
22
+ });
23
+
24
+ it('returns undefined for empty / null', () => {
25
+ expect(toBCP47(null)).toBeUndefined();
26
+ expect(toBCP47(undefined)).toBeUndefined();
27
+ expect(toBCP47('')).toBeUndefined();
28
+ expect(toBCP47(' ')).toBeUndefined();
29
+ });
30
+ });
31
+
32
+ describe('resolveSpeechLanguage', () => {
33
+ it('priority: explicit beats everything', () => {
34
+ expect(
35
+ resolveSpeechLanguage({
36
+ explicit: 'ko-KR',
37
+ prefs: 'ru-RU',
38
+ i18n: 'en',
39
+ }),
40
+ ).toBe('ko-KR');
41
+ });
42
+
43
+ it('priority: prefs beats i18n', () => {
44
+ expect(resolveSpeechLanguage({ prefs: 'ru-RU', i18n: 'en' })).toBe('ru-RU');
45
+ });
46
+
47
+ it('priority: i18n beats navigator', () => {
48
+ expect(resolveSpeechLanguage({ i18n: 'ru' })).toBe('ru-RU');
49
+ });
50
+
51
+ it('falls back to en-US when nothing supplied and no navigator', () => {
52
+ expect(resolveSpeechLanguage({})).toMatch(/^[a-z]{2}-[A-Z]{2}$/);
53
+ });
54
+
55
+ it('normalises ISO-2 in any slot', () => {
56
+ expect(resolveSpeechLanguage({ explicit: 'ru' })).toBe('ru-RU');
57
+ expect(resolveSpeechLanguage({ prefs: 'ko' })).toBe('ko-KR');
58
+ });
59
+ });
@@ -0,0 +1,71 @@
1
+ import { describe, expect, it } from 'vitest';
2
+
3
+ import { INITIAL_STATE, reducer } from '../core/reducer';
4
+
5
+ describe('SpeechRecognition reducer', () => {
6
+ it('starts and finishes a session', () => {
7
+ const s1 = reducer(INITIAL_STATE, { type: 'START' });
8
+ expect(s1.status).toBe('starting');
9
+ expect(s1.startedAt).toBeTypeOf('number');
10
+ expect(s1.error).toBeNull();
11
+
12
+ const s2 = reducer(s1, { type: 'STARTED' });
13
+ expect(s2.status).toBe('listening');
14
+
15
+ const s3 = reducer(s2, { type: 'STOP' });
16
+ expect(s3.status).toBe('stopping');
17
+
18
+ const s4 = reducer(s3, { type: 'STOPPED' });
19
+ expect(s4.status).toBe('idle');
20
+ });
21
+
22
+ it('merges PARTIAL into an interim segment, then promotes to FINAL', () => {
23
+ let s = reducer(INITIAL_STATE, { type: 'START' });
24
+ s = reducer(s, { type: 'STARTED' });
25
+ s = reducer(s, { type: 'PARTIAL', text: 'hel', segmentId: 'seg-1' });
26
+ s = reducer(s, { type: 'PARTIAL', text: 'hello', segmentId: 'seg-1' });
27
+ expect(s.segments).toHaveLength(1);
28
+ expect(s.segments[0]).toMatchObject({
29
+ id: 'seg-1',
30
+ text: 'hello',
31
+ isFinal: false,
32
+ });
33
+
34
+ s = reducer(s, { type: 'FINAL', text: 'hello world', segmentId: 'seg-1', confidence: 0.91 });
35
+ expect(s.segments).toHaveLength(1);
36
+ expect(s.segments[0]).toMatchObject({
37
+ id: 'seg-1',
38
+ text: 'hello world',
39
+ isFinal: true,
40
+ confidence: 0.91,
41
+ });
42
+ });
43
+
44
+ it('accumulates separate segments', () => {
45
+ let s = reducer(INITIAL_STATE, { type: 'START' });
46
+ s = reducer(s, { type: 'FINAL', text: 'one', segmentId: 'a' });
47
+ s = reducer(s, { type: 'FINAL', text: 'two', segmentId: 'b' });
48
+ s = reducer(s, { type: 'PARTIAL', text: 'thr', segmentId: 'c' });
49
+ expect(s.segments.map((seg) => seg.text)).toEqual(['one', 'two', 'thr']);
50
+ expect(s.segments.map((seg) => seg.isFinal)).toEqual([true, true, false]);
51
+ });
52
+
53
+ it('records errors and resets cleanly', () => {
54
+ let s = reducer(INITIAL_STATE, { type: 'START' });
55
+ s = reducer(s, {
56
+ type: 'ERROR',
57
+ error: { code: 'no-speech', message: 'no speech' },
58
+ });
59
+ expect(s.status).toBe('error');
60
+ expect(s.error?.code).toBe('no-speech');
61
+
62
+ const reset = reducer(s, { type: 'RESET' });
63
+ expect(reset).toEqual(INITIAL_STATE);
64
+ });
65
+
66
+ it('ignores unknown actions', () => {
67
+ // @ts-expect-error - intentionally invalid for the default branch
68
+ const next = reducer(INITIAL_STATE, { type: 'NOPE' });
69
+ expect(next).toBe(INITIAL_STATE);
70
+ });
71
+ });
@@ -0,0 +1,52 @@
1
+ import { describe, expect, it } from 'vitest';
2
+
3
+ import {
4
+ EMPTY_TRANSCRIPT,
5
+ buildTranscript,
6
+ joinFinal,
7
+ normaliseFinal,
8
+ } from '../core/transcript';
9
+ import type { Segment } from '../types';
10
+
11
+ function seg(text: string, isFinal: boolean, id = text): Segment {
12
+ return { id, text, isFinal, startedAt: 0 };
13
+ }
14
+
15
+ describe('transcript helpers', () => {
16
+ it('joinFinal skips interim and trims whitespace', () => {
17
+ const out = joinFinal([
18
+ seg('Hello.', true, 'a'),
19
+ seg(' world ', true, 'b'),
20
+ seg('partial', false, 'c'),
21
+ ]);
22
+ expect(out).toBe('Hello. world');
23
+ });
24
+
25
+ it('buildTranscript exposes trailing interim text', () => {
26
+ const t = buildTranscript([
27
+ seg('Hi.', true, 'a'),
28
+ seg('there', false, 'b'),
29
+ ]);
30
+ expect(t.final).toBe('Hi.');
31
+ expect(t.interim).toBe('there');
32
+ expect(t.segments).toHaveLength(2);
33
+ });
34
+
35
+ it('buildTranscript with only finals leaves interim empty', () => {
36
+ const t = buildTranscript([seg('Done.', true)]);
37
+ expect(t.interim).toBe('');
38
+ expect(t.final).toBe('Done.');
39
+ });
40
+
41
+ it('EMPTY_TRANSCRIPT is the zero value', () => {
42
+ expect(EMPTY_TRANSCRIPT.interim).toBe('');
43
+ expect(EMPTY_TRANSCRIPT.final).toBe('');
44
+ expect(EMPTY_TRANSCRIPT.segments).toEqual([]);
45
+ });
46
+
47
+ it('normaliseFinal collapses whitespace and fixes punctuation spacing', () => {
48
+ expect(normaliseFinal(' hello world ')).toBe('hello world');
49
+ expect(normaliseFinal('Hi , there !')).toBe('Hi, there!');
50
+ expect(normaliseFinal('one\ntwo\tthree')).toBe('one two three');
51
+ });
52
+ });
@@ -0,0 +1,49 @@
1
+ 'use client';
2
+
3
+ import type * as React from 'react';
4
+
5
+ import { cn } from '@djangocfg/ui-core/lib';
6
+
7
+ import type { MicDevice } from '../hooks/useMicDevices';
8
+
9
+ export interface DevicePickerProps {
10
+ devices: MicDevice[];
11
+ value: string | null;
12
+ onChange: (id: string | null) => void;
13
+ className?: string;
14
+ disabled?: boolean;
15
+ defaultLabel?: string;
16
+ ariaLabel?: string;
17
+ }
18
+
19
+ export function DevicePicker({
20
+ devices,
21
+ value,
22
+ onChange,
23
+ className,
24
+ disabled,
25
+ defaultLabel = 'System default',
26
+ ariaLabel = 'Microphone',
27
+ }: DevicePickerProps): React.ReactElement {
28
+ return (
29
+ <select
30
+ value={value ?? ''}
31
+ onChange={(e) => onChange(e.target.value || null)}
32
+ disabled={disabled}
33
+ aria-label={ariaLabel}
34
+ className={cn(
35
+ 'h-8 rounded-md border border-input bg-background px-2 text-xs text-foreground',
36
+ 'focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring',
37
+ 'disabled:cursor-not-allowed disabled:opacity-50',
38
+ className,
39
+ )}
40
+ >
41
+ <option value="">{defaultLabel}</option>
42
+ {devices.map((d) => (
43
+ <option key={d.deviceId} value={d.deviceId}>
44
+ {d.label}
45
+ </option>
46
+ ))}
47
+ </select>
48
+ );
49
+ }