@djangocfg/ui-tools 2.1.381 → 2.1.383
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +132 -899
- package/dist/ChatRoot-6IZFM5HM.mjs +5 -0
- package/dist/{ChatRoot-EJC5Y2YM.cjs.map → ChatRoot-6IZFM5HM.mjs.map} +1 -1
- package/dist/ChatRoot-LW4XNIKP.cjs +14 -0
- package/dist/{ChatRoot-QOSKJPM6.mjs.map → ChatRoot-LW4XNIKP.cjs.map} +1 -1
- package/dist/DictationField-U25MEYAL.mjs +4 -0
- package/dist/DictationField-U25MEYAL.mjs.map +1 -0
- package/dist/DictationField-XWR5VOID.cjs +13 -0
- package/dist/DictationField-XWR5VOID.cjs.map +1 -0
- package/dist/{DocsLayout-2YKPXZYO.mjs → DocsLayout-2P3ONDWJ.mjs} +3 -3
- package/dist/{DocsLayout-2YKPXZYO.mjs.map → DocsLayout-2P3ONDWJ.mjs.map} +1 -1
- package/dist/{DocsLayout-Q4KS3QWW.cjs → DocsLayout-2YZNS5VK.cjs} +8 -8
- package/dist/{DocsLayout-Q4KS3QWW.cjs.map → DocsLayout-2YZNS5VK.cjs.map} +1 -1
- package/dist/chunk-4PFW7MIJ.cjs +837 -0
- package/dist/chunk-4PFW7MIJ.cjs.map +1 -0
- package/dist/chunk-C2YN6WEO.mjs +833 -0
- package/dist/chunk-C2YN6WEO.mjs.map +1 -0
- package/dist/{chunk-XACCHZH2.cjs → chunk-FIRK5CEH.cjs} +42 -4
- package/dist/chunk-FIRK5CEH.cjs.map +1 -0
- package/dist/{chunk-NWUT327A.mjs → chunk-HIK6BPL7.mjs} +38 -5
- package/dist/chunk-HIK6BPL7.mjs.map +1 -0
- package/dist/chunk-OZAU3QWD.cjs +2493 -0
- package/dist/chunk-OZAU3QWD.cjs.map +1 -0
- package/dist/chunk-UWVP6LCW.mjs +2447 -0
- package/dist/chunk-UWVP6LCW.mjs.map +1 -0
- package/dist/index.cjs +1668 -99
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +1215 -107
- package/dist/index.d.ts +1215 -107
- package/dist/index.mjs +1555 -50
- package/dist/index.mjs.map +1 -1
- package/package.json +16 -15
- package/src/audio-assets.d.ts +8 -0
- package/src/components/markdown/MarkdownMessage/CollapseToggle.tsx +3 -1
- package/src/components/markdown/MarkdownMessage/components.tsx +2 -5
- package/src/tools/Chat/README.md +347 -530
- package/src/tools/Chat/components/Attachments.tsx +6 -1
- package/src/tools/Chat/components/ChatRoot.tsx +30 -2
- package/src/tools/Chat/components/Composer.tsx +20 -3
- package/src/tools/Chat/components/ErrorBanner.tsx +7 -3
- package/src/tools/Chat/components/MessageActions.tsx +3 -1
- package/src/tools/Chat/components/MessageBubble.tsx +6 -5
- package/src/tools/Chat/components/MessageList.tsx +87 -1
- package/src/tools/Chat/components/ToolCalls.tsx +21 -3
- package/src/tools/Chat/context/ChatProvider.tsx +21 -3
- package/src/tools/Chat/core/audio/audioBus.ts +10 -163
- package/src/tools/Chat/core/audio/defaults.ts +43 -0
- package/src/tools/Chat/core/audio/index.ts +1 -0
- package/src/tools/Chat/core/audio/preferences.ts +5 -59
- package/src/tools/Chat/core/audio/sounds/error.mp3 +0 -0
- package/src/tools/Chat/core/audio/sounds/mention.mp3 +0 -0
- package/src/tools/Chat/core/audio/sounds/notification.mp3 +0 -0
- package/src/tools/Chat/core/audio/sounds/received.mp3 +0 -0
- package/src/tools/Chat/core/audio/sounds/sent.mp3 +0 -0
- package/src/tools/Chat/core/audio/sounds/start.mp3 +0 -0
- package/src/tools/Chat/core/audio/types.ts +28 -0
- package/src/tools/Chat/core/reducer.ts +33 -0
- package/src/tools/Chat/core/transport/index.ts +13 -0
- package/src/tools/Chat/core/transport/mappers/index.ts +6 -0
- package/src/tools/Chat/core/transport/mappers/pydantic-ai.ts +142 -0
- package/src/tools/Chat/core/transport/pydantic-ai-transport.ts +208 -0
- package/src/tools/Chat/core/transport/sse.ts +18 -5
- package/src/tools/Chat/hooks/index.ts +25 -0
- package/src/tools/Chat/hooks/useAutoFocusOnStreamEnd.ts +5 -3
- package/src/tools/Chat/hooks/useChat.ts +28 -0
- package/src/tools/Chat/hooks/useChatAudio.ts +59 -180
- package/src/tools/Chat/hooks/useChatDockPrefs.ts +74 -0
- package/src/tools/Chat/hooks/useChatReset.ts +70 -0
- package/src/tools/Chat/hooks/useChatUnread.ts +87 -0
- package/src/tools/Chat/hooks/useFocusOnEmptyClick.ts +111 -0
- package/src/tools/Chat/hooks/useVisitorFingerprint.ts +48 -0
- package/src/tools/Chat/index.ts +84 -1
- package/src/tools/Chat/launcher/ChatDock.tsx +263 -0
- package/src/tools/Chat/launcher/ChatFAB.tsx +349 -0
- package/src/tools/Chat/launcher/ChatGreeting.tsx +200 -0
- package/src/tools/Chat/launcher/ChatHeader.tsx +76 -0
- package/src/tools/Chat/launcher/ChatHeaderActionButton.tsx +87 -0
- package/src/tools/Chat/launcher/ChatHeaderAudioToggle.tsx +47 -0
- package/src/tools/Chat/launcher/ChatHeaderLanguageButton.tsx +179 -0
- package/src/tools/Chat/launcher/ChatHeaderModeToggle.tsx +57 -0
- package/src/tools/Chat/launcher/ChatHeaderResetButton.tsx +93 -0
- package/src/tools/Chat/launcher/ChatLauncher.tsx +321 -0
- package/src/tools/Chat/launcher/ChatUnreadPreview.tsx +197 -0
- package/src/tools/Chat/launcher/index.ts +46 -0
- package/src/tools/Chat/launcher/useChatPresence.ts +44 -0
- package/src/tools/Chat/styles/bubbleTokens.ts +71 -0
- package/src/tools/Chat/styles/index.ts +16 -0
- package/src/tools/Chat/styles/useChatStyles.ts +101 -0
- package/src/tools/Chat/types/attachment.ts +25 -0
- package/src/tools/Chat/types/config.ts +48 -0
- package/src/tools/Chat/types/events.ts +35 -0
- package/src/tools/Chat/types/index.ts +34 -0
- package/src/tools/Chat/types/labels.ts +38 -0
- package/src/tools/Chat/types/message.ts +32 -0
- package/src/tools/Chat/types/persona.ts +31 -0
- package/src/tools/Chat/types/session.ts +43 -0
- package/src/tools/Chat/types/tool-call.ts +17 -0
- package/src/tools/Chat/types/transport.ts +28 -0
- package/src/tools/Chat/types.ts +5 -240
- package/src/tools/MarkdownEditor/MarkdownEditor.tsx +50 -14
- package/src/tools/MarkdownEditor/index.ts +1 -1
- package/src/tools/SpeechRecognition/README.md +336 -0
- package/src/tools/SpeechRecognition/__tests__/ids.test.ts +15 -0
- package/src/tools/SpeechRecognition/__tests__/language.test.ts +59 -0
- package/src/tools/SpeechRecognition/__tests__/reducer.test.ts +71 -0
- package/src/tools/SpeechRecognition/__tests__/transcript.test.ts +52 -0
- package/src/tools/SpeechRecognition/components/DevicePicker.tsx +49 -0
- package/src/tools/SpeechRecognition/components/DictationButton.tsx +93 -0
- package/src/tools/SpeechRecognition/components/EngineBadge.tsx +30 -0
- package/src/tools/SpeechRecognition/components/ErrorBanner.tsx +52 -0
- package/src/tools/SpeechRecognition/components/LanguagePicker.tsx +63 -0
- package/src/tools/SpeechRecognition/components/MicMeter.tsx +63 -0
- package/src/tools/SpeechRecognition/components/PushToTalkHint.tsx +51 -0
- package/src/tools/SpeechRecognition/components/TranscriptView.tsx +55 -0
- package/src/tools/SpeechRecognition/components/index.ts +16 -0
- package/src/tools/SpeechRecognition/context/SpeechRecognitionProvider.tsx +47 -0
- package/src/tools/SpeechRecognition/context/index.ts +6 -0
- package/src/tools/SpeechRecognition/core/audio/defaults.ts +24 -0
- package/src/tools/SpeechRecognition/core/engine/external.ts +222 -0
- package/src/tools/SpeechRecognition/core/engine/http.ts +147 -0
- package/src/tools/SpeechRecognition/core/engine/index.ts +52 -0
- package/src/tools/SpeechRecognition/core/engine/mediarecorder.ts +105 -0
- package/src/tools/SpeechRecognition/core/engine/websocket.ts +211 -0
- package/src/tools/SpeechRecognition/core/engine/webspeech.ts +188 -0
- package/src/tools/SpeechRecognition/core/ids.ts +11 -0
- package/src/tools/SpeechRecognition/core/index.ts +14 -0
- package/src/tools/SpeechRecognition/core/language.ts +78 -0
- package/src/tools/SpeechRecognition/core/languages-catalog.ts +229 -0
- package/src/tools/SpeechRecognition/core/logger.ts +3 -0
- package/src/tools/SpeechRecognition/core/reducer.ts +105 -0
- package/src/tools/SpeechRecognition/core/transcript.ts +36 -0
- package/src/tools/SpeechRecognition/hooks/index.ts +14 -0
- package/src/tools/SpeechRecognition/hooks/useDictation.ts +59 -0
- package/src/tools/SpeechRecognition/hooks/useEnginePrefs.ts +15 -0
- package/src/tools/SpeechRecognition/hooks/useMicDevices.ts +57 -0
- package/src/tools/SpeechRecognition/hooks/useMicLevel.ts +52 -0
- package/src/tools/SpeechRecognition/hooks/usePushToTalk.ts +85 -0
- package/src/tools/SpeechRecognition/hooks/useResolvedLanguage.ts +28 -0
- package/src/tools/SpeechRecognition/hooks/useSpeechLanguageInfo.ts +108 -0
- package/src/tools/SpeechRecognition/hooks/useSpeechRecognition.ts +188 -0
- package/src/tools/SpeechRecognition/hooks/useVoiceSupport.ts +78 -0
- package/src/tools/SpeechRecognition/index.ts +82 -0
- package/src/tools/SpeechRecognition/lazy.tsx +19 -0
- package/src/tools/SpeechRecognition/store/index.ts +2 -0
- package/src/tools/SpeechRecognition/store/prefsStore.ts +54 -0
- package/src/tools/SpeechRecognition/types.ts +133 -0
- package/src/tools/SpeechRecognition/widgets/DictationField.tsx +105 -0
- package/src/tools/SpeechRecognition/widgets/VoiceComposerSlot.tsx +305 -0
- package/src/tools/SpeechRecognition/widgets/VoiceMessageRecorder.tsx +88 -0
- package/src/tools/SpeechRecognition/widgets/index.ts +6 -0
- package/dist/ChatRoot-EJC5Y2YM.cjs +0 -14
- package/dist/ChatRoot-QOSKJPM6.mjs +0 -5
- package/dist/chunk-NWUT327A.mjs.map +0 -1
- package/dist/chunk-QLMKCSR6.mjs +0 -2420
- package/dist/chunk-QLMKCSR6.mjs.map +0 -1
- package/dist/chunk-SI5RD2GD.cjs +0 -2460
- package/dist/chunk-SI5RD2GD.cjs.map +0 -1
- package/dist/chunk-XACCHZH2.cjs.map +0 -1
- package/src/components/markdown/MarkdownMessage/MarkdownMessage.story.tsx +0 -771
- package/src/stories/index.ts +0 -33
- package/src/tools/AudioPlayer/AudioPlayer.story.tsx +0 -481
- package/src/tools/Chat/Chat.story.tsx +0 -1457
- package/src/tools/CodeEditor/CodeEditor.story.tsx +0 -202
- package/src/tools/CronScheduler/CronScheduler.story.tsx +0 -300
- package/src/tools/Gallery/Gallery.story.tsx +0 -237
- package/src/tools/ImageViewer/ImageViewer.story.tsx +0 -85
- package/src/tools/JsonForm/JsonForm.story.tsx +0 -350
- package/src/tools/JsonTree/JsonTree.story.tsx +0 -141
- package/src/tools/LottiePlayer/LottiePlayer.story.tsx +0 -95
- package/src/tools/Map/Map.story.tsx +0 -458
- package/src/tools/MarkdownEditor/MarkdownEditor.story.tsx +0 -225
- package/src/tools/Mermaid/Mermaid.story.tsx +0 -251
- package/src/tools/OpenapiViewer/OpenapiViewer.story.tsx +0 -230
- package/src/tools/PrettyCode/PrettyCode.story.tsx +0 -304
- package/src/tools/Tour/Tour.story.tsx +0 -279
- package/src/tools/Tree/Tree.story.tsx +0 -620
- package/src/tools/Uploader/Uploader.story.tsx +0 -415
- package/src/tools/VideoPlayer/VideoPlayer.story.tsx +0 -87
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* WebSocket engine — pushes recorded audio frames over a persistent socket
|
|
3
|
+
* and parses server responses through a host-supplied `parseMessage`
|
|
4
|
+
* callback. Works with Deepgram / AssemblyAI realtime endpoints or any
|
|
5
|
+
* custom gateway that speaks JSON or binary frames.
|
|
6
|
+
*
|
|
7
|
+
* Reconnect: simple exponential backoff capped at 5 s; the engine emits
|
|
8
|
+
* `state: 'connecting'` between attempts so UIs can show "reconnecting…".
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { newSegmentId } from '../ids';
|
|
12
|
+
import { sttLogger } from '../logger';
|
|
13
|
+
import { createEngineBus } from './index';
|
|
14
|
+
import { startMicCapture, type MicCaptureHandle } from './mediarecorder';
|
|
15
|
+
import type {
|
|
16
|
+
EngineStartOptions,
|
|
17
|
+
RecognitionEngine,
|
|
18
|
+
RecognitionError,
|
|
19
|
+
Unsub,
|
|
20
|
+
} from '../../types';
|
|
21
|
+
|
|
22
|
+
export type WsParsedEvent =
|
|
23
|
+
| { kind: 'partial'; text: string; segmentId?: string; confidence?: number }
|
|
24
|
+
| { kind: 'final'; text: string; segmentId?: string; confidence?: number }
|
|
25
|
+
| { kind: 'error'; error: RecognitionError }
|
|
26
|
+
| { kind: 'ignore' };
|
|
27
|
+
|
|
28
|
+
export interface WebSocketEngineOptions {
|
|
29
|
+
url: string | ((language: string) => Promise<string> | string);
|
|
30
|
+
protocols?: string[];
|
|
31
|
+
/** Chunk emission interval, ms. Default 250 for realtime feel. */
|
|
32
|
+
chunkMs?: number;
|
|
33
|
+
mime?: string;
|
|
34
|
+
/** Parse one frame (string or binary) into our normalised event shape. */
|
|
35
|
+
parseMessage: (data: string | ArrayBuffer) => WsParsedEvent;
|
|
36
|
+
/** Stable engine id for telemetry / UI badge. Default 'websocket'. */
|
|
37
|
+
id?: string;
|
|
38
|
+
/** Max reconnect attempts before giving up. Default 5. */
|
|
39
|
+
maxReconnect?: number;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
const MIN_BACKOFF = 250;
|
|
43
|
+
const MAX_BACKOFF = 5000;
|
|
44
|
+
|
|
45
|
+
export function createWebSocketEngine(
|
|
46
|
+
opts: WebSocketEngineOptions,
|
|
47
|
+
): RecognitionEngine {
|
|
48
|
+
const bus = createEngineBus();
|
|
49
|
+
let socket: WebSocket | null = null;
|
|
50
|
+
let capture: MicCaptureHandle | null = null;
|
|
51
|
+
let currentSegmentId: string | null = null;
|
|
52
|
+
let stopping = false;
|
|
53
|
+
let attempts = 0;
|
|
54
|
+
|
|
55
|
+
function emitParsed(parsed: WsParsedEvent): void {
|
|
56
|
+
switch (parsed.kind) {
|
|
57
|
+
case 'partial': {
|
|
58
|
+
const id = parsed.segmentId ?? currentSegmentId ?? newSegmentId();
|
|
59
|
+
currentSegmentId = id;
|
|
60
|
+
bus.emit('partial', parsed.text, id);
|
|
61
|
+
return;
|
|
62
|
+
}
|
|
63
|
+
case 'final': {
|
|
64
|
+
const id = parsed.segmentId ?? currentSegmentId ?? newSegmentId();
|
|
65
|
+
bus.emit('final', parsed.text, id, parsed.confidence);
|
|
66
|
+
currentSegmentId = null;
|
|
67
|
+
return;
|
|
68
|
+
}
|
|
69
|
+
case 'error':
|
|
70
|
+
bus.emit('error', parsed.error);
|
|
71
|
+
return;
|
|
72
|
+
case 'ignore':
|
|
73
|
+
default:
|
|
74
|
+
return;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
async function openSocket(language: string): Promise<WebSocket> {
|
|
79
|
+
const url =
|
|
80
|
+
typeof opts.url === 'function' ? await opts.url(language) : opts.url;
|
|
81
|
+
const ws = new WebSocket(url, opts.protocols);
|
|
82
|
+
ws.binaryType = 'arraybuffer';
|
|
83
|
+
return ws;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
async function connect(start: EngineStartOptions): Promise<void> {
|
|
87
|
+
if (stopping) return;
|
|
88
|
+
bus.emit('state', 'connecting');
|
|
89
|
+
let ws: WebSocket;
|
|
90
|
+
try {
|
|
91
|
+
ws = await openSocket(start.language);
|
|
92
|
+
} catch (cause) {
|
|
93
|
+
bus.emit('error', {
|
|
94
|
+
code: 'network',
|
|
95
|
+
message: 'Failed to open STT socket.',
|
|
96
|
+
cause,
|
|
97
|
+
});
|
|
98
|
+
return;
|
|
99
|
+
}
|
|
100
|
+
socket = ws;
|
|
101
|
+
|
|
102
|
+
ws.onopen = () => {
|
|
103
|
+
attempts = 0;
|
|
104
|
+
bus.emit('state', 'listening');
|
|
105
|
+
};
|
|
106
|
+
ws.onmessage = (e) => {
|
|
107
|
+
try {
|
|
108
|
+
const parsed = opts.parseMessage(e.data as string | ArrayBuffer);
|
|
109
|
+
emitParsed(parsed);
|
|
110
|
+
} catch (cause) {
|
|
111
|
+
sttLogger.warn('[ws] parseMessage threw', cause);
|
|
112
|
+
}
|
|
113
|
+
};
|
|
114
|
+
ws.onerror = () => {
|
|
115
|
+
bus.emit('error', { code: 'network', message: 'STT socket error.' });
|
|
116
|
+
};
|
|
117
|
+
ws.onclose = () => {
|
|
118
|
+
socket = null;
|
|
119
|
+
if (stopping) {
|
|
120
|
+
bus.emit('state', 'closed');
|
|
121
|
+
return;
|
|
122
|
+
}
|
|
123
|
+
attempts += 1;
|
|
124
|
+
const max = opts.maxReconnect ?? 5;
|
|
125
|
+
if (attempts > max) {
|
|
126
|
+
bus.emit('error', {
|
|
127
|
+
code: 'network',
|
|
128
|
+
message: `STT socket closed; gave up after ${max} attempts.`,
|
|
129
|
+
});
|
|
130
|
+
bus.emit('state', 'closed');
|
|
131
|
+
return;
|
|
132
|
+
}
|
|
133
|
+
const delay = Math.min(MIN_BACKOFF * 2 ** (attempts - 1), MAX_BACKOFF);
|
|
134
|
+
setTimeout(() => {
|
|
135
|
+
void connect(start);
|
|
136
|
+
}, delay);
|
|
137
|
+
};
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
return {
|
|
141
|
+
id: opts.id ?? 'websocket',
|
|
142
|
+
isSupported:
|
|
143
|
+
typeof WebSocket !== 'undefined' &&
|
|
144
|
+
typeof navigator !== 'undefined' &&
|
|
145
|
+
!!navigator.mediaDevices?.getUserMedia &&
|
|
146
|
+
typeof MediaRecorder !== 'undefined',
|
|
147
|
+
on(event, cb): Unsub {
|
|
148
|
+
return bus.on(event, cb);
|
|
149
|
+
},
|
|
150
|
+
async start(start: EngineStartOptions): Promise<void> {
|
|
151
|
+
if (capture) return;
|
|
152
|
+
stopping = false;
|
|
153
|
+
attempts = 0;
|
|
154
|
+
try {
|
|
155
|
+
capture = await startMicCapture({
|
|
156
|
+
deviceId: start.deviceId,
|
|
157
|
+
mime: opts.mime,
|
|
158
|
+
chunkMs: opts.chunkMs ?? 250,
|
|
159
|
+
onChunk: (chunk) => {
|
|
160
|
+
if (socket?.readyState === WebSocket.OPEN) {
|
|
161
|
+
chunk
|
|
162
|
+
.arrayBuffer()
|
|
163
|
+
.then((buf) => socket?.send(buf))
|
|
164
|
+
.catch((cause) => sttLogger.warn('[ws] send failed', cause));
|
|
165
|
+
}
|
|
166
|
+
},
|
|
167
|
+
onError: (err) => bus.emit('error', err),
|
|
168
|
+
});
|
|
169
|
+
} catch (cause) {
|
|
170
|
+
const err = cause as RecognitionError;
|
|
171
|
+
bus.emit('error', err);
|
|
172
|
+
bus.emit('state', 'error');
|
|
173
|
+
throw err;
|
|
174
|
+
}
|
|
175
|
+
await connect(start);
|
|
176
|
+
start.signal?.addEventListener('abort', () => {
|
|
177
|
+
void this.stop();
|
|
178
|
+
});
|
|
179
|
+
},
|
|
180
|
+
async stop(): Promise<void> {
|
|
181
|
+
stopping = true;
|
|
182
|
+
bus.emit('state', 'closing');
|
|
183
|
+
try {
|
|
184
|
+
socket?.close(1000, 'client-stop');
|
|
185
|
+
} catch {
|
|
186
|
+
// ignore
|
|
187
|
+
}
|
|
188
|
+
socket = null;
|
|
189
|
+
await capture?.stop();
|
|
190
|
+
capture = null;
|
|
191
|
+
currentSegmentId = null;
|
|
192
|
+
bus.emit('state', 'closed');
|
|
193
|
+
},
|
|
194
|
+
abort(): void {
|
|
195
|
+
stopping = true;
|
|
196
|
+
try {
|
|
197
|
+
socket?.close(4000, 'client-abort');
|
|
198
|
+
} catch {
|
|
199
|
+
// ignore
|
|
200
|
+
}
|
|
201
|
+
socket = null;
|
|
202
|
+
capture?.stop().catch(() => undefined);
|
|
203
|
+
capture = null;
|
|
204
|
+
currentSegmentId = null;
|
|
205
|
+
bus.emit('state', 'closed');
|
|
206
|
+
},
|
|
207
|
+
getStream(): MediaStream | null {
|
|
208
|
+
return capture?.stream ?? null;
|
|
209
|
+
},
|
|
210
|
+
};
|
|
211
|
+
}
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Default engine — wraps the browser's `SpeechRecognition` API.
|
|
3
|
+
*
|
|
4
|
+
* Lives behind the same `RecognitionEngine` contract every other engine
|
|
5
|
+
* implements. When the browser doesn't expose `SpeechRecognition`
|
|
6
|
+
* (Firefox, some mobile WebViews) `isSupported` is `false` and `start()`
|
|
7
|
+
* throws an `unsupported` error.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { newSegmentId } from '../ids';
|
|
11
|
+
import { sttLogger } from '../logger';
|
|
12
|
+
import { createEngineBus } from './index';
|
|
13
|
+
import type {
|
|
14
|
+
EngineStartOptions,
|
|
15
|
+
RecognitionEngine,
|
|
16
|
+
RecognitionError,
|
|
17
|
+
RecognitionErrorCode,
|
|
18
|
+
Unsub,
|
|
19
|
+
} from '../../types';
|
|
20
|
+
|
|
21
|
+
// Minimal subset of the Web Speech API we actually rely on. Browsers
|
|
22
|
+
// expose either `SpeechRecognition` (Edge / Safari new) or the older
|
|
23
|
+
// `webkitSpeechRecognition` (Chrome). Both share the same shape.
|
|
24
|
+
interface BrowserSpeechRecognition extends EventTarget {
|
|
25
|
+
lang: string;
|
|
26
|
+
interimResults: boolean;
|
|
27
|
+
continuous: boolean;
|
|
28
|
+
maxAlternatives: number;
|
|
29
|
+
start(): void;
|
|
30
|
+
stop(): void;
|
|
31
|
+
abort(): void;
|
|
32
|
+
onresult: ((e: BrowserSpeechRecognitionEvent) => void) | null;
|
|
33
|
+
onerror: ((e: BrowserSpeechRecognitionError) => void) | null;
|
|
34
|
+
onstart: (() => void) | null;
|
|
35
|
+
onend: (() => void) | null;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
interface BrowserSpeechRecognitionResult {
|
|
39
|
+
isFinal: boolean;
|
|
40
|
+
0: { transcript: string; confidence: number };
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
interface BrowserSpeechRecognitionEvent extends Event {
|
|
44
|
+
resultIndex: number;
|
|
45
|
+
results: ArrayLike<BrowserSpeechRecognitionResult>;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
interface BrowserSpeechRecognitionError extends Event {
|
|
49
|
+
error: string;
|
|
50
|
+
message?: string;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
type Ctor = new () => BrowserSpeechRecognition;
|
|
54
|
+
|
|
55
|
+
function resolveCtor(): Ctor | null {
|
|
56
|
+
if (typeof window === 'undefined') return null;
|
|
57
|
+
const w = window as unknown as {
|
|
58
|
+
SpeechRecognition?: Ctor;
|
|
59
|
+
webkitSpeechRecognition?: Ctor;
|
|
60
|
+
};
|
|
61
|
+
return w.SpeechRecognition ?? w.webkitSpeechRecognition ?? null;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
const ERROR_MAP: Record<string, RecognitionErrorCode> = {
|
|
65
|
+
'no-speech': 'no-speech',
|
|
66
|
+
aborted: 'aborted',
|
|
67
|
+
'audio-capture': 'no-microphone',
|
|
68
|
+
network: 'network',
|
|
69
|
+
'not-allowed': 'permission-denied',
|
|
70
|
+
'service-not-allowed': 'permission-denied',
|
|
71
|
+
'bad-grammar': 'engine',
|
|
72
|
+
'language-not-supported': 'language',
|
|
73
|
+
};
|
|
74
|
+
|
|
75
|
+
export interface WebSpeechEngineOptions {
|
|
76
|
+
/** Whether the underlying recognition should be continuous. Default true. */
|
|
77
|
+
continuous?: boolean;
|
|
78
|
+
/** Max alternatives the engine should request. Default 1. */
|
|
79
|
+
maxAlternatives?: number;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
export function createWebSpeechEngine(
|
|
83
|
+
opts: WebSpeechEngineOptions = {},
|
|
84
|
+
): RecognitionEngine {
|
|
85
|
+
const Ctor = resolveCtor();
|
|
86
|
+
const bus = createEngineBus();
|
|
87
|
+
let instance: BrowserSpeechRecognition | null = null;
|
|
88
|
+
let currentSegmentId: string | null = null;
|
|
89
|
+
|
|
90
|
+
function teardown(): void {
|
|
91
|
+
if (!instance) return;
|
|
92
|
+
instance.onresult = null;
|
|
93
|
+
instance.onerror = null;
|
|
94
|
+
instance.onstart = null;
|
|
95
|
+
instance.onend = null;
|
|
96
|
+
instance = null;
|
|
97
|
+
currentSegmentId = null;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
return {
|
|
101
|
+
id: 'webspeech',
|
|
102
|
+
isSupported: Ctor !== null,
|
|
103
|
+
on(event, cb): Unsub {
|
|
104
|
+
return bus.on(event, cb);
|
|
105
|
+
},
|
|
106
|
+
async start(start: EngineStartOptions): Promise<void> {
|
|
107
|
+
if (!Ctor) {
|
|
108
|
+
const err: RecognitionError = {
|
|
109
|
+
code: 'unsupported',
|
|
110
|
+
message: 'Web Speech API is not available in this browser.',
|
|
111
|
+
};
|
|
112
|
+
bus.emit('error', err);
|
|
113
|
+
throw err;
|
|
114
|
+
}
|
|
115
|
+
if (instance) {
|
|
116
|
+
sttLogger.debug('[webspeech] start() called while running — ignoring');
|
|
117
|
+
return;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
bus.emit('state', 'connecting');
|
|
121
|
+
|
|
122
|
+
const rec = new Ctor();
|
|
123
|
+
rec.lang = start.language;
|
|
124
|
+
rec.interimResults = start.interim;
|
|
125
|
+
rec.continuous = opts.continuous ?? true;
|
|
126
|
+
rec.maxAlternatives = opts.maxAlternatives ?? 1;
|
|
127
|
+
|
|
128
|
+
rec.onstart = () => {
|
|
129
|
+
bus.emit('state', 'listening');
|
|
130
|
+
};
|
|
131
|
+
rec.onend = () => {
|
|
132
|
+
bus.emit('state', 'closed');
|
|
133
|
+
teardown();
|
|
134
|
+
};
|
|
135
|
+
rec.onerror = (e) => {
|
|
136
|
+
const code = ERROR_MAP[e.error] ?? 'engine';
|
|
137
|
+
const err: RecognitionError = {
|
|
138
|
+
code,
|
|
139
|
+
message: e.message || `Web Speech error: ${e.error}`,
|
|
140
|
+
};
|
|
141
|
+
bus.emit('error', err);
|
|
142
|
+
};
|
|
143
|
+
rec.onresult = (e) => {
|
|
144
|
+
for (let i = e.resultIndex; i < e.results.length; i += 1) {
|
|
145
|
+
const res = e.results[i];
|
|
146
|
+
const alt = res[0];
|
|
147
|
+
const text = alt.transcript;
|
|
148
|
+
if (!currentSegmentId) currentSegmentId = newSegmentId();
|
|
149
|
+
if (res.isFinal) {
|
|
150
|
+
bus.emit('final', text, currentSegmentId, alt.confidence);
|
|
151
|
+
currentSegmentId = null;
|
|
152
|
+
} else {
|
|
153
|
+
bus.emit('partial', text, currentSegmentId);
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
};
|
|
157
|
+
|
|
158
|
+
if (start.signal) {
|
|
159
|
+
start.signal.addEventListener('abort', () => {
|
|
160
|
+
rec.abort();
|
|
161
|
+
});
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
instance = rec;
|
|
165
|
+
try {
|
|
166
|
+
rec.start();
|
|
167
|
+
} catch (cause) {
|
|
168
|
+
const err: RecognitionError = {
|
|
169
|
+
code: 'engine',
|
|
170
|
+
message: 'Failed to start Web Speech recognition.',
|
|
171
|
+
cause,
|
|
172
|
+
};
|
|
173
|
+
bus.emit('error', err);
|
|
174
|
+
teardown();
|
|
175
|
+
throw err;
|
|
176
|
+
}
|
|
177
|
+
},
|
|
178
|
+
async stop(): Promise<void> {
|
|
179
|
+
if (!instance) return;
|
|
180
|
+
bus.emit('state', 'closing');
|
|
181
|
+
instance.stop();
|
|
182
|
+
},
|
|
183
|
+
abort(): void {
|
|
184
|
+
if (!instance) return;
|
|
185
|
+
instance.abort();
|
|
186
|
+
},
|
|
187
|
+
};
|
|
188
|
+
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
let counter = 0;
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Cheap monotonic id — collisions are fine across sessions, we just need
|
|
5
|
+
* uniqueness within one component lifecycle. Avoids pulling in nanoid for
|
|
6
|
+
* a tool that already keeps the lazy chunk small.
|
|
7
|
+
*/
|
|
8
|
+
export function newSegmentId(): string {
|
|
9
|
+
counter = (counter + 1) % Number.MAX_SAFE_INTEGER;
|
|
10
|
+
return `seg_${Date.now().toString(36)}_${counter.toString(36)}`;
|
|
11
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
export { newSegmentId } from './ids';
|
|
2
|
+
export { sttLogger } from './logger';
|
|
3
|
+
export {
|
|
4
|
+
EMPTY_TRANSCRIPT,
|
|
5
|
+
buildTranscript,
|
|
6
|
+
joinFinal,
|
|
7
|
+
normaliseFinal,
|
|
8
|
+
} from './transcript';
|
|
9
|
+
export {
|
|
10
|
+
INITIAL_STATE,
|
|
11
|
+
reducer,
|
|
12
|
+
type RecognitionAction,
|
|
13
|
+
type RecognitionState,
|
|
14
|
+
} from './reducer';
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Maps 2-letter ISO 639-1 codes (`en`, `ru`, `ko` — what
|
|
3
|
+
* `@djangocfg/i18n` exposes via `useLocale()`) to BCP-47 tags
|
|
4
|
+
* (`en-US`, `ru-RU`, `ko-KR`) that the Web Speech API and most cloud
|
|
5
|
+
* STT services expect.
|
|
6
|
+
*
|
|
7
|
+
* We keep a small built-in table for the locales we ship translations
|
|
8
|
+
* for; everything else falls through to `<code>-<UPPER(code)>`, which
|
|
9
|
+
* works for the majority of regions. The mapping is also re-exported
|
|
10
|
+
* so consumers can extend it.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
const ISO_TO_BCP47: Record<string, string> = {
|
|
14
|
+
en: 'en-US',
|
|
15
|
+
ru: 'ru-RU',
|
|
16
|
+
ko: 'ko-KR',
|
|
17
|
+
ja: 'ja-JP',
|
|
18
|
+
zh: 'zh-CN',
|
|
19
|
+
de: 'de-DE',
|
|
20
|
+
fr: 'fr-FR',
|
|
21
|
+
it: 'it-IT',
|
|
22
|
+
es: 'es-ES',
|
|
23
|
+
nl: 'nl-NL',
|
|
24
|
+
ar: 'ar-SA',
|
|
25
|
+
tr: 'tr-TR',
|
|
26
|
+
pl: 'pl-PL',
|
|
27
|
+
sv: 'sv-SE',
|
|
28
|
+
no: 'nb-NO',
|
|
29
|
+
da: 'da-DK',
|
|
30
|
+
pt: 'pt-BR',
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
export const DEFAULT_ISO_TO_BCP47 = ISO_TO_BCP47;
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Normalise any of:
|
|
37
|
+
* - BCP-47 ("en-US", "ru-RU") — passed through.
|
|
38
|
+
* - ISO 639-1 ("en", "ru") — mapped via the table above, or
|
|
39
|
+
* falls back to `<code>-<UPPER(code)>`.
|
|
40
|
+
* - `null`/`undefined`/empty — returns `undefined`.
|
|
41
|
+
*/
|
|
42
|
+
export function toBCP47(
|
|
43
|
+
code: string | null | undefined,
|
|
44
|
+
table: Record<string, string> = ISO_TO_BCP47,
|
|
45
|
+
): string | undefined {
|
|
46
|
+
if (!code) return undefined;
|
|
47
|
+
const trimmed = code.trim();
|
|
48
|
+
if (!trimmed) return undefined;
|
|
49
|
+
if (trimmed.includes('-')) return trimmed; // already BCP-47
|
|
50
|
+
const lower = trimmed.toLowerCase();
|
|
51
|
+
return table[lower] ?? `${lower}-${lower.toUpperCase()}`;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Resolve the language tag for a speech session in priority order:
|
|
56
|
+
* 1. `explicit` prop (always wins) — host-supplied override.
|
|
57
|
+
* 2. `prefs` — value stored in `useSpeechPrefs` (user picked it
|
|
58
|
+
* via `<LanguagePicker>` or programmatically).
|
|
59
|
+
* 3. `i18n` — current i18n locale (2-letter ISO).
|
|
60
|
+
* 4. `navigator.language` — browser default.
|
|
61
|
+
* 5. `'en-US'` — last-resort safety net.
|
|
62
|
+
*
|
|
63
|
+
* All inputs may be ISO-2 or BCP-47; the function normalises before
|
|
64
|
+
* returning.
|
|
65
|
+
*/
|
|
66
|
+
export function resolveSpeechLanguage(opts: {
|
|
67
|
+
explicit?: string;
|
|
68
|
+
prefs?: string | null;
|
|
69
|
+
i18n?: string | null;
|
|
70
|
+
}): string {
|
|
71
|
+
return (
|
|
72
|
+
toBCP47(opts.explicit) ??
|
|
73
|
+
toBCP47(opts.prefs) ??
|
|
74
|
+
toBCP47(opts.i18n) ??
|
|
75
|
+
toBCP47(typeof navigator !== 'undefined' ? navigator.language : null) ??
|
|
76
|
+
'en-US'
|
|
77
|
+
);
|
|
78
|
+
}
|